Coverage Report

Created: 2025-07-11 06:47

/src/tinysparql/subprojects/libxml2-2.13.1/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
0
#define NS_INDEX_EMPTY  INT_MAX
78
0
#define NS_INDEX_XML    (INT_MAX - 1)
79
0
#define URI_HASH_EMPTY  0xD943A04E
80
0
#define URI_HASH_XML    0xF0451F02
81
82
#ifndef STDIN_FILENO
83
0
  #define STDIN_FILENO 0
84
#endif
85
86
struct _xmlStartTag {
87
    const xmlChar *prefix;
88
    const xmlChar *URI;
89
    int line;
90
    int nsNr;
91
};
92
93
typedef struct {
94
    void *saxData;
95
    unsigned prefixHashValue;
96
    unsigned uriHashValue;
97
    unsigned elementId;
98
    int oldIndex;
99
} xmlParserNsExtra;
100
101
typedef struct {
102
    unsigned hashValue;
103
    int index;
104
} xmlParserNsBucket;
105
106
struct _xmlParserNsData {
107
    xmlParserNsExtra *extra;
108
109
    unsigned hashSize;
110
    unsigned hashElems;
111
    xmlParserNsBucket *hash;
112
113
    unsigned elementId;
114
    int defaultNsIndex;
115
    int minNsIndex;
116
};
117
118
struct _xmlAttrHashBucket {
119
    int index;
120
};
121
122
static int
123
xmlParseElementStart(xmlParserCtxtPtr ctxt);
124
125
static void
126
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
127
128
static xmlEntityPtr
129
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
130
131
static const xmlChar *
132
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
133
134
/************************************************************************
135
 *                  *
136
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
137
 *                  *
138
 ************************************************************************/
139
140
#define XML_PARSER_BIG_ENTITY 1000
141
#define XML_PARSER_LOT_ENTITY 5000
142
143
/*
144
 * Constants for protection against abusive entity expansion
145
 * ("billion laughs").
146
 */
147
148
/*
149
 * A certain amount of entity expansion which is always allowed.
150
 */
151
0
#define XML_PARSER_ALLOWED_EXPANSION 1000000
152
153
/*
154
 * Fixed cost for each entity reference. This crudely models processing time
155
 * as well to protect, for example, against exponential expansion of empty
156
 * or very short entities.
157
 */
158
0
#define XML_ENT_FIXED_COST 20
159
160
/**
161
 * xmlParserMaxDepth:
162
 *
163
 * arbitrary depth limit for the XML documents that we allow to
164
 * process. This is not a limitation of the parser but a safety
165
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
166
 * parser option.
167
 */
168
const unsigned int xmlParserMaxDepth = 256;
169
170
171
172
0
#define XML_PARSER_BIG_BUFFER_SIZE 300
173
0
#define XML_PARSER_BUFFER_SIZE 100
174
0
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
175
176
/**
177
 * XML_PARSER_CHUNK_SIZE
178
 *
179
 * When calling GROW that's the minimal amount of data
180
 * the parser expected to have received. It is not a hard
181
 * limit but an optimization when reading strings like Names
182
 * It is not strictly needed as long as inputs available characters
183
 * are followed by 0, which should be provided by the I/O level
184
 */
185
#define XML_PARSER_CHUNK_SIZE 100
186
187
/**
188
 * xmlParserVersion:
189
 *
190
 * Constant string describing the internal version of the library
191
 */
192
const char *const
193
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
194
195
/*
196
 * List of XML prefixed PI allowed by W3C specs
197
 */
198
199
static const char* const xmlW3CPIs[] = {
200
    "xml-stylesheet",
201
    "xml-model",
202
    NULL
203
};
204
205
206
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
207
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
208
                                              const xmlChar **str);
209
210
static void
211
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
212
213
static int
214
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
215
216
/************************************************************************
217
 *                  *
218
 *    Some factorized error routines        *
219
 *                  *
220
 ************************************************************************/
221
222
static void
223
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
224
0
    xmlCtxtErrMemory(ctxt);
225
0
}
226
227
/**
228
 * xmlErrAttributeDup:
229
 * @ctxt:  an XML parser context
230
 * @prefix:  the attribute prefix
231
 * @localname:  the attribute localname
232
 *
233
 * Handle a redefinition of attribute error
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
0
{
239
0
    if (prefix == NULL)
240
0
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
0
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
0
                   "Attribute %s redefined\n", localname);
243
0
    else
244
0
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
0
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
0
                   "Attribute %s:%s redefined\n", prefix, localname);
247
0
}
248
249
/**
250
 * xmlFatalErrMsg:
251
 * @ctxt:  an XML parser context
252
 * @error:  the error number
253
 * @msg:  the error message
254
 *
255
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
256
 */
257
static void LIBXML_ATTR_FORMAT(3,0)
258
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
259
               const char *msg)
260
0
{
261
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
262
0
               NULL, NULL, NULL, 0, "%s", msg);
263
0
}
264
265
/**
266
 * xmlWarningMsg:
267
 * @ctxt:  an XML parser context
268
 * @error:  the error number
269
 * @msg:  the error message
270
 * @str1:  extra data
271
 * @str2:  extra data
272
 *
273
 * Handle a warning.
274
 */
275
void LIBXML_ATTR_FORMAT(3,0)
276
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
277
              const char *msg, const xmlChar *str1, const xmlChar *str2)
278
0
{
279
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
280
0
               str1, str2, NULL, 0, msg, str1, str2);
281
0
}
282
283
/**
284
 * xmlValidityError:
285
 * @ctxt:  an XML parser context
286
 * @error:  the error number
287
 * @msg:  the error message
288
 * @str1:  extra data
289
 *
290
 * Handle a validity error.
291
 */
292
static void LIBXML_ATTR_FORMAT(3,0)
293
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
294
              const char *msg, const xmlChar *str1, const xmlChar *str2)
295
0
{
296
0
    ctxt->valid = 0;
297
298
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
299
0
               str1, str2, NULL, 0, msg, str1, str2);
300
0
}
301
302
/**
303
 * xmlFatalErrMsgInt:
304
 * @ctxt:  an XML parser context
305
 * @error:  the error number
306
 * @msg:  the error message
307
 * @val:  an integer value
308
 *
309
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
310
 */
311
static void LIBXML_ATTR_FORMAT(3,0)
312
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
313
                  const char *msg, int val)
314
0
{
315
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
316
0
               NULL, NULL, NULL, val, msg, val);
317
0
}
318
319
/**
320
 * xmlFatalErrMsgStrIntStr:
321
 * @ctxt:  an XML parser context
322
 * @error:  the error number
323
 * @msg:  the error message
324
 * @str1:  an string info
325
 * @val:  an integer value
326
 * @str2:  an string info
327
 *
328
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
329
 */
330
static void LIBXML_ATTR_FORMAT(3,0)
331
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
332
                  const char *msg, const xmlChar *str1, int val,
333
      const xmlChar *str2)
334
0
{
335
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
336
0
               str1, str2, NULL, val, msg, str1, val, str2);
337
0
}
338
339
/**
340
 * xmlFatalErrMsgStr:
341
 * @ctxt:  an XML parser context
342
 * @error:  the error number
343
 * @msg:  the error message
344
 * @val:  a string value
345
 *
346
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
347
 */
348
static void LIBXML_ATTR_FORMAT(3,0)
349
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
350
                  const char *msg, const xmlChar * val)
351
0
{
352
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
353
0
               val, NULL, NULL, 0, msg, val);
354
0
}
355
356
/**
357
 * xmlErrMsgStr:
358
 * @ctxt:  an XML parser context
359
 * @error:  the error number
360
 * @msg:  the error message
361
 * @val:  a string value
362
 *
363
 * Handle a non fatal parser error
364
 */
365
static void LIBXML_ATTR_FORMAT(3,0)
366
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
367
                  const char *msg, const xmlChar * val)
368
0
{
369
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
370
0
               val, NULL, NULL, 0, msg, val);
371
0
}
372
373
/**
374
 * xmlNsErr:
375
 * @ctxt:  an XML parser context
376
 * @error:  the error number
377
 * @msg:  the message
378
 * @info1:  extra information string
379
 * @info2:  extra information string
380
 *
381
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
382
 */
383
static void LIBXML_ATTR_FORMAT(3,0)
384
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
385
         const char *msg,
386
         const xmlChar * info1, const xmlChar * info2,
387
         const xmlChar * info3)
388
0
{
389
0
    ctxt->nsWellFormed = 0;
390
391
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
392
0
               info1, info2, info3, 0, msg, info1, info2, info3);
393
0
}
394
395
/**
396
 * xmlNsWarn
397
 * @ctxt:  an XML parser context
398
 * @error:  the error number
399
 * @msg:  the message
400
 * @info1:  extra information string
401
 * @info2:  extra information string
402
 *
403
 * Handle a namespace warning error
404
 */
405
static void LIBXML_ATTR_FORMAT(3,0)
406
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
407
         const char *msg,
408
         const xmlChar * info1, const xmlChar * info2,
409
         const xmlChar * info3)
410
0
{
411
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
412
0
               info1, info2, info3, 0, msg, info1, info2, info3);
413
0
}
414
415
static void
416
0
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
417
0
    if (val > ULONG_MAX - *dst)
418
0
        *dst = ULONG_MAX;
419
0
    else
420
0
        *dst += val;
421
0
}
422
423
static void
424
0
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
425
0
    if (val > ULONG_MAX - *dst)
426
0
        *dst = ULONG_MAX;
427
0
    else
428
0
        *dst += val;
429
0
}
430
431
/**
432
 * xmlParserEntityCheck:
433
 * @ctxt:  parser context
434
 * @extra:  sum of unexpanded entity sizes
435
 *
436
 * Check for non-linear entity expansion behaviour.
437
 *
438
 * In some cases like xmlExpandEntityInAttValue, this function is called
439
 * for each, possibly nested entity and its unexpanded content length.
440
 *
441
 * In other cases like xmlParseReference, it's only called for each
442
 * top-level entity with its unexpanded content length plus the sum of
443
 * the unexpanded content lengths (plus fixed cost) of all nested
444
 * entities.
445
 *
446
 * Summing the unexpanded lengths also adds the length of the reference.
447
 * This is by design. Taking the length of the entity name into account
448
 * discourages attacks that try to waste CPU time with abusively long
449
 * entity names. See test/recurse/lol6.xml for example. Each call also
450
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
451
 * short entities.
452
 *
453
 * Returns 1 on error, 0 on success.
454
 */
455
static int
456
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
457
0
{
458
0
    unsigned long consumed;
459
0
    unsigned long *expandedSize;
460
0
    xmlParserInputPtr input = ctxt->input;
461
0
    xmlEntityPtr entity = input->entity;
462
463
0
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
464
0
        return(0);
465
466
    /*
467
     * Compute total consumed bytes so far, including input streams of
468
     * external entities.
469
     */
470
0
    consumed = input->consumed;
471
0
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
472
0
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
473
474
0
    if (entity)
475
0
        expandedSize = &entity->expandedSize;
476
0
    else
477
0
        expandedSize = &ctxt->sizeentcopy;
478
479
    /*
480
     * Add extra cost and some fixed cost.
481
     */
482
0
    xmlSaturatedAdd(expandedSize, extra);
483
0
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
484
485
    /*
486
     * It's important to always use saturation arithmetic when tracking
487
     * entity sizes to make the size checks reliable. If "sizeentcopy"
488
     * overflows, we have to abort.
489
     */
490
0
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
491
0
        ((*expandedSize >= ULONG_MAX) ||
492
0
         (*expandedSize / ctxt->maxAmpl > consumed))) {
493
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
494
0
                       "Maximum entity amplification factor exceeded, see "
495
0
                       "xmlCtxtSetMaxAmplification.\n");
496
0
        xmlHaltParser(ctxt);
497
0
        return(1);
498
0
    }
499
500
0
    return(0);
501
0
}
502
503
/************************************************************************
504
 *                  *
505
 *    Library wide options          *
506
 *                  *
507
 ************************************************************************/
508
509
/**
510
  * xmlHasFeature:
511
  * @feature: the feature to be examined
512
  *
513
  * Examines if the library has been compiled with a given feature.
514
  *
515
  * Returns a non-zero value if the feature exist, otherwise zero.
516
  * Returns zero (0) if the feature does not exist or an unknown
517
  * unknown feature is requested, non-zero otherwise.
518
  */
519
int
520
xmlHasFeature(xmlFeature feature)
521
0
{
522
0
    switch (feature) {
523
0
  case XML_WITH_THREAD:
524
0
#ifdef LIBXML_THREAD_ENABLED
525
0
      return(1);
526
#else
527
      return(0);
528
#endif
529
0
        case XML_WITH_TREE:
530
0
#ifdef LIBXML_TREE_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_OUTPUT:
536
0
#ifdef LIBXML_OUTPUT_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_PUSH:
542
0
#ifdef LIBXML_PUSH_ENABLED
543
0
            return(1);
544
#else
545
            return(0);
546
#endif
547
0
        case XML_WITH_READER:
548
0
#ifdef LIBXML_READER_ENABLED
549
0
            return(1);
550
#else
551
            return(0);
552
#endif
553
0
        case XML_WITH_PATTERN:
554
0
#ifdef LIBXML_PATTERN_ENABLED
555
0
            return(1);
556
#else
557
            return(0);
558
#endif
559
0
        case XML_WITH_WRITER:
560
0
#ifdef LIBXML_WRITER_ENABLED
561
0
            return(1);
562
#else
563
            return(0);
564
#endif
565
0
        case XML_WITH_SAX1:
566
0
#ifdef LIBXML_SAX1_ENABLED
567
0
            return(1);
568
#else
569
            return(0);
570
#endif
571
0
        case XML_WITH_FTP:
572
#ifdef LIBXML_FTP_ENABLED
573
            return(1);
574
#else
575
0
            return(0);
576
0
#endif
577
0
        case XML_WITH_HTTP:
578
#ifdef LIBXML_HTTP_ENABLED
579
            return(1);
580
#else
581
0
            return(0);
582
0
#endif
583
0
        case XML_WITH_VALID:
584
0
#ifdef LIBXML_VALID_ENABLED
585
0
            return(1);
586
#else
587
            return(0);
588
#endif
589
0
        case XML_WITH_HTML:
590
0
#ifdef LIBXML_HTML_ENABLED
591
0
            return(1);
592
#else
593
            return(0);
594
#endif
595
0
        case XML_WITH_LEGACY:
596
#ifdef LIBXML_LEGACY_ENABLED
597
            return(1);
598
#else
599
0
            return(0);
600
0
#endif
601
0
        case XML_WITH_C14N:
602
0
#ifdef LIBXML_C14N_ENABLED
603
0
            return(1);
604
#else
605
            return(0);
606
#endif
607
0
        case XML_WITH_CATALOG:
608
0
#ifdef LIBXML_CATALOG_ENABLED
609
0
            return(1);
610
#else
611
            return(0);
612
#endif
613
0
        case XML_WITH_XPATH:
614
0
#ifdef LIBXML_XPATH_ENABLED
615
0
            return(1);
616
#else
617
            return(0);
618
#endif
619
0
        case XML_WITH_XPTR:
620
0
#ifdef LIBXML_XPTR_ENABLED
621
0
            return(1);
622
#else
623
            return(0);
624
#endif
625
0
        case XML_WITH_XINCLUDE:
626
0
#ifdef LIBXML_XINCLUDE_ENABLED
627
0
            return(1);
628
#else
629
            return(0);
630
#endif
631
0
        case XML_WITH_ICONV:
632
0
#ifdef LIBXML_ICONV_ENABLED
633
0
            return(1);
634
#else
635
            return(0);
636
#endif
637
0
        case XML_WITH_ISO8859X:
638
0
#ifdef LIBXML_ISO8859X_ENABLED
639
0
            return(1);
640
#else
641
            return(0);
642
#endif
643
0
        case XML_WITH_UNICODE:
644
0
#ifdef LIBXML_UNICODE_ENABLED
645
0
            return(1);
646
#else
647
            return(0);
648
#endif
649
0
        case XML_WITH_REGEXP:
650
0
#ifdef LIBXML_REGEXP_ENABLED
651
0
            return(1);
652
#else
653
            return(0);
654
#endif
655
0
        case XML_WITH_AUTOMATA:
656
0
#ifdef LIBXML_AUTOMATA_ENABLED
657
0
            return(1);
658
#else
659
            return(0);
660
#endif
661
0
        case XML_WITH_EXPR:
662
#ifdef LIBXML_EXPR_ENABLED
663
            return(1);
664
#else
665
0
            return(0);
666
0
#endif
667
0
        case XML_WITH_SCHEMAS:
668
0
#ifdef LIBXML_SCHEMAS_ENABLED
669
0
            return(1);
670
#else
671
            return(0);
672
#endif
673
0
        case XML_WITH_SCHEMATRON:
674
0
#ifdef LIBXML_SCHEMATRON_ENABLED
675
0
            return(1);
676
#else
677
            return(0);
678
#endif
679
0
        case XML_WITH_MODULES:
680
0
#ifdef LIBXML_MODULES_ENABLED
681
0
            return(1);
682
#else
683
            return(0);
684
#endif
685
0
        case XML_WITH_DEBUG:
686
0
#ifdef LIBXML_DEBUG_ENABLED
687
0
            return(1);
688
#else
689
            return(0);
690
#endif
691
0
        case XML_WITH_DEBUG_MEM:
692
0
            return(0);
693
0
        case XML_WITH_ZLIB:
694
0
#ifdef LIBXML_ZLIB_ENABLED
695
0
            return(1);
696
#else
697
            return(0);
698
#endif
699
0
        case XML_WITH_LZMA:
700
#ifdef LIBXML_LZMA_ENABLED
701
            return(1);
702
#else
703
0
            return(0);
704
0
#endif
705
0
        case XML_WITH_ICU:
706
#ifdef LIBXML_ICU_ENABLED
707
            return(1);
708
#else
709
0
            return(0);
710
0
#endif
711
0
        default:
712
0
      break;
713
0
     }
714
0
     return(0);
715
0
}
716
717
/************************************************************************
718
 *                  *
719
 *      Simple string buffer        *
720
 *                  *
721
 ************************************************************************/
722
723
typedef struct {
724
    xmlChar *mem;
725
    unsigned size;
726
    unsigned cap; /* size < cap */
727
    unsigned max; /* size <= max */
728
    xmlParserErrors code;
729
} xmlSBuf;
730
731
static void
732
0
xmlSBufInit(xmlSBuf *buf, unsigned max) {
733
0
    buf->mem = NULL;
734
0
    buf->size = 0;
735
0
    buf->cap = 0;
736
0
    buf->max = max;
737
0
    buf->code = XML_ERR_OK;
738
0
}
739
740
static int
741
0
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
742
0
    xmlChar *mem;
743
0
    unsigned cap;
744
745
0
    if (len >= UINT_MAX / 2 - buf->size) {
746
0
        if (buf->code == XML_ERR_OK)
747
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
748
0
        return(-1);
749
0
    }
750
751
0
    cap = (buf->size + len) * 2;
752
0
    if (cap < 240)
753
0
        cap = 240;
754
755
0
    mem = xmlRealloc(buf->mem, cap);
756
0
    if (mem == NULL) {
757
0
        buf->code = XML_ERR_NO_MEMORY;
758
0
        return(-1);
759
0
    }
760
761
0
    buf->mem = mem;
762
0
    buf->cap = cap;
763
764
0
    return(0);
765
0
}
766
767
static void
768
0
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
769
0
    if (buf->max - buf->size < len) {
770
0
        if (buf->code == XML_ERR_OK)
771
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
772
0
        return;
773
0
    }
774
775
0
    if (buf->cap - buf->size <= len) {
776
0
        if (xmlSBufGrow(buf, len) < 0)
777
0
            return;
778
0
    }
779
780
0
    if (len > 0)
781
0
        memcpy(buf->mem + buf->size, str, len);
782
0
    buf->size += len;
783
0
}
784
785
static void
786
0
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
787
0
    xmlSBufAddString(buf, (const xmlChar *) str, len);
788
0
}
789
790
static void
791
0
xmlSBufAddChar(xmlSBuf *buf, int c) {
792
0
    xmlChar *end;
793
794
0
    if (buf->max - buf->size < 4) {
795
0
        if (buf->code == XML_ERR_OK)
796
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
797
0
        return;
798
0
    }
799
800
0
    if (buf->cap - buf->size <= 4) {
801
0
        if (xmlSBufGrow(buf, 4) < 0)
802
0
            return;
803
0
    }
804
805
0
    end = buf->mem + buf->size;
806
807
0
    if (c < 0x80) {
808
0
        *end = (xmlChar) c;
809
0
        buf->size += 1;
810
0
    } else {
811
0
        buf->size += xmlCopyCharMultiByte(end, c);
812
0
    }
813
0
}
814
815
static void
816
0
xmlSBufAddReplChar(xmlSBuf *buf) {
817
0
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
818
0
}
819
820
static void
821
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
822
0
    if (buf->code == XML_ERR_NO_MEMORY)
823
0
        xmlCtxtErrMemory(ctxt);
824
0
    else
825
0
        xmlFatalErr(ctxt, buf->code, errMsg);
826
0
}
827
828
static xmlChar *
829
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
830
0
              const char *errMsg) {
831
0
    if (buf->mem == NULL) {
832
0
        buf->mem = xmlMalloc(1);
833
0
        if (buf->mem == NULL) {
834
0
            buf->code = XML_ERR_NO_MEMORY;
835
0
        } else {
836
0
            buf->mem[0] = 0;
837
0
        }
838
0
    } else {
839
0
        buf->mem[buf->size] = 0;
840
0
    }
841
842
0
    if (buf->code == XML_ERR_OK) {
843
0
        if (sizeOut != NULL)
844
0
            *sizeOut = buf->size;
845
0
        return(buf->mem);
846
0
    }
847
848
0
    xmlSBufReportError(buf, ctxt, errMsg);
849
850
0
    xmlFree(buf->mem);
851
852
0
    if (sizeOut != NULL)
853
0
        *sizeOut = 0;
854
0
    return(NULL);
855
0
}
856
857
static void
858
0
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
859
0
    if (buf->code != XML_ERR_OK)
860
0
        xmlSBufReportError(buf, ctxt, errMsg);
861
862
0
    xmlFree(buf->mem);
863
0
}
864
865
static int
866
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
867
0
                    const char *errMsg) {
868
0
    int c = str[0];
869
0
    int c1 = str[1];
870
871
0
    if ((c1 & 0xC0) != 0x80)
872
0
        goto encoding_error;
873
874
0
    if (c < 0xE0) {
875
        /* 2-byte sequence */
876
0
        if (c < 0xC2)
877
0
            goto encoding_error;
878
879
0
        return(2);
880
0
    } else {
881
0
        int c2 = str[2];
882
883
0
        if ((c2 & 0xC0) != 0x80)
884
0
            goto encoding_error;
885
886
0
        if (c < 0xF0) {
887
            /* 3-byte sequence */
888
0
            if (c == 0xE0) {
889
                /* overlong */
890
0
                if (c1 < 0xA0)
891
0
                    goto encoding_error;
892
0
            } else if (c == 0xED) {
893
                /* surrogate */
894
0
                if (c1 >= 0xA0)
895
0
                    goto encoding_error;
896
0
            } else if (c == 0xEF) {
897
                /* U+FFFE and U+FFFF are invalid Chars */
898
0
                if ((c1 == 0xBF) && (c2 >= 0xBE))
899
0
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
900
0
            }
901
902
0
            return(3);
903
0
        } else {
904
            /* 4-byte sequence */
905
0
            if ((str[3] & 0xC0) != 0x80)
906
0
                goto encoding_error;
907
0
            if (c == 0xF0) {
908
                /* overlong */
909
0
                if (c1 < 0x90)
910
0
                    goto encoding_error;
911
0
            } else if (c >= 0xF4) {
912
                /* greater than 0x10FFFF */
913
0
                if ((c > 0xF4) || (c1 >= 0x90))
914
0
                    goto encoding_error;
915
0
            }
916
917
0
            return(4);
918
0
        }
919
0
    }
920
921
0
encoding_error:
922
    /* Only report the first error */
923
0
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
924
0
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
925
0
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
926
0
    }
927
928
0
    return(0);
929
0
}
930
931
/************************************************************************
932
 *                  *
933
 *    SAX2 defaulted attributes handling      *
934
 *                  *
935
 ************************************************************************/
936
937
/**
938
 * xmlCtxtInitializeLate:
939
 * @ctxt:  an XML parser context
940
 *
941
 * Final initialization of the parser context before starting to parse.
942
 *
943
 * This accounts for users modifying struct members of parser context
944
 * directly.
945
 */
946
static void
947
0
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
948
0
    xmlSAXHandlerPtr sax;
949
950
    /* Avoid unused variable warning if features are disabled. */
951
0
    (void) sax;
952
953
    /*
954
     * Changing the SAX struct directly is still widespread practice
955
     * in internal and external code.
956
     */
957
0
    if (ctxt == NULL) return;
958
0
    sax = ctxt->sax;
959
0
#ifdef LIBXML_SAX1_ENABLED
960
    /*
961
     * Only enable SAX2 if there SAX2 element handlers, except when there
962
     * are no element handlers at all.
963
     */
964
0
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
965
0
        (sax) &&
966
0
        (sax->initialized == XML_SAX2_MAGIC) &&
967
0
        ((sax->startElementNs != NULL) ||
968
0
         (sax->endElementNs != NULL) ||
969
0
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
970
0
        ctxt->sax2 = 1;
971
#else
972
    ctxt->sax2 = 1;
973
#endif /* LIBXML_SAX1_ENABLED */
974
975
    /*
976
     * Some users replace the dictionary directly in the context struct.
977
     * We really need an API function to do that cleanly.
978
     */
979
0
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
980
0
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
981
0
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
982
0
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
983
0
    (ctxt->str_xml_ns == NULL)) {
984
0
        xmlErrMemory(ctxt);
985
0
    }
986
0
}
987
988
typedef struct {
989
    xmlHashedString prefix;
990
    xmlHashedString name;
991
    xmlHashedString value;
992
    const xmlChar *valueEnd;
993
    int external;
994
    int expandedSize;
995
} xmlDefAttr;
996
997
typedef struct _xmlDefAttrs xmlDefAttrs;
998
typedef xmlDefAttrs *xmlDefAttrsPtr;
999
struct _xmlDefAttrs {
1000
    int nbAttrs;  /* number of defaulted attributes on that element */
1001
    int maxAttrs;       /* the size of the array */
1002
#if __STDC_VERSION__ >= 199901L
1003
    /* Using a C99 flexible array member avoids UBSan errors. */
1004
    xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1005
#else
1006
    xmlDefAttr attrs[1];
1007
#endif
1008
};
1009
1010
/**
1011
 * xmlAttrNormalizeSpace:
1012
 * @src: the source string
1013
 * @dst: the target string
1014
 *
1015
 * Normalize the space in non CDATA attribute values:
1016
 * If the attribute type is not CDATA, then the XML processor MUST further
1017
 * process the normalized attribute value by discarding any leading and
1018
 * trailing space (#x20) characters, and by replacing sequences of space
1019
 * (#x20) characters by a single space (#x20) character.
1020
 * Note that the size of dst need to be at least src, and if one doesn't need
1021
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1022
 * passing src as dst is just fine.
1023
 *
1024
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1025
 *         is needed.
1026
 */
1027
static xmlChar *
1028
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1029
0
{
1030
0
    if ((src == NULL) || (dst == NULL))
1031
0
        return(NULL);
1032
1033
0
    while (*src == 0x20) src++;
1034
0
    while (*src != 0) {
1035
0
  if (*src == 0x20) {
1036
0
      while (*src == 0x20) src++;
1037
0
      if (*src != 0)
1038
0
    *dst++ = 0x20;
1039
0
  } else {
1040
0
      *dst++ = *src++;
1041
0
  }
1042
0
    }
1043
0
    *dst = 0;
1044
0
    if (dst == src)
1045
0
       return(NULL);
1046
0
    return(dst);
1047
0
}
1048
1049
/**
1050
 * xmlAddDefAttrs:
1051
 * @ctxt:  an XML parser context
1052
 * @fullname:  the element fullname
1053
 * @fullattr:  the attribute fullname
1054
 * @value:  the attribute value
1055
 *
1056
 * Add a defaulted attribute for an element
1057
 */
1058
static void
1059
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1060
               const xmlChar *fullname,
1061
               const xmlChar *fullattr,
1062
0
               const xmlChar *value) {
1063
0
    xmlDefAttrsPtr defaults;
1064
0
    xmlDefAttr *attr;
1065
0
    int len, expandedSize;
1066
0
    xmlHashedString name;
1067
0
    xmlHashedString prefix;
1068
0
    xmlHashedString hvalue;
1069
0
    const xmlChar *localname;
1070
1071
    /*
1072
     * Allows to detect attribute redefinitions
1073
     */
1074
0
    if (ctxt->attsSpecial != NULL) {
1075
0
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1076
0
      return;
1077
0
    }
1078
1079
0
    if (ctxt->attsDefault == NULL) {
1080
0
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1081
0
  if (ctxt->attsDefault == NULL)
1082
0
      goto mem_error;
1083
0
    }
1084
1085
    /*
1086
     * split the element name into prefix:localname , the string found
1087
     * are within the DTD and then not associated to namespace names.
1088
     */
1089
0
    localname = xmlSplitQName3(fullname, &len);
1090
0
    if (localname == NULL) {
1091
0
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1092
0
  prefix.name = NULL;
1093
0
    } else {
1094
0
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1095
0
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1096
0
        if (prefix.name == NULL)
1097
0
            goto mem_error;
1098
0
    }
1099
0
    if (name.name == NULL)
1100
0
        goto mem_error;
1101
1102
    /*
1103
     * make sure there is some storage
1104
     */
1105
0
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1106
0
    if ((defaults == NULL) ||
1107
0
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1108
0
        xmlDefAttrsPtr temp;
1109
0
        int newSize;
1110
1111
0
        newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1112
0
        temp = xmlRealloc(defaults,
1113
0
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1114
0
  if (temp == NULL)
1115
0
      goto mem_error;
1116
0
        if (defaults == NULL)
1117
0
            temp->nbAttrs = 0;
1118
0
  temp->maxAttrs = newSize;
1119
0
        defaults = temp;
1120
0
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1121
0
                          defaults, NULL) < 0) {
1122
0
      xmlFree(defaults);
1123
0
      goto mem_error;
1124
0
  }
1125
0
    }
1126
1127
    /*
1128
     * Split the attribute name into prefix:localname , the string found
1129
     * are within the DTD and hen not associated to namespace names.
1130
     */
1131
0
    localname = xmlSplitQName3(fullattr, &len);
1132
0
    if (localname == NULL) {
1133
0
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1134
0
  prefix.name = NULL;
1135
0
    } else {
1136
0
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1137
0
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1138
0
        if (prefix.name == NULL)
1139
0
            goto mem_error;
1140
0
    }
1141
0
    if (name.name == NULL)
1142
0
        goto mem_error;
1143
1144
    /* intern the string and precompute the end */
1145
0
    len = strlen((const char *) value);
1146
0
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1147
0
    if (hvalue.name == NULL)
1148
0
        goto mem_error;
1149
1150
0
    expandedSize = strlen((const char *) name.name);
1151
0
    if (prefix.name != NULL)
1152
0
        expandedSize += strlen((const char *) prefix.name);
1153
0
    expandedSize += len;
1154
1155
0
    attr = &defaults->attrs[defaults->nbAttrs++];
1156
0
    attr->name = name;
1157
0
    attr->prefix = prefix;
1158
0
    attr->value = hvalue;
1159
0
    attr->valueEnd = hvalue.name + len;
1160
0
    attr->external = PARSER_EXTERNAL(ctxt);
1161
0
    attr->expandedSize = expandedSize;
1162
1163
0
    return;
1164
1165
0
mem_error:
1166
0
    xmlErrMemory(ctxt);
1167
0
    return;
1168
0
}
1169
1170
/**
1171
 * xmlAddSpecialAttr:
1172
 * @ctxt:  an XML parser context
1173
 * @fullname:  the element fullname
1174
 * @fullattr:  the attribute fullname
1175
 * @type:  the attribute type
1176
 *
1177
 * Register this attribute type
1178
 */
1179
static void
1180
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1181
      const xmlChar *fullname,
1182
      const xmlChar *fullattr,
1183
      int type)
1184
0
{
1185
0
    if (ctxt->attsSpecial == NULL) {
1186
0
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1187
0
  if (ctxt->attsSpecial == NULL)
1188
0
      goto mem_error;
1189
0
    }
1190
1191
0
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1192
0
                    (void *) (ptrdiff_t) type) < 0)
1193
0
        goto mem_error;
1194
0
    return;
1195
1196
0
mem_error:
1197
0
    xmlErrMemory(ctxt);
1198
0
    return;
1199
0
}
1200
1201
/**
1202
 * xmlCleanSpecialAttrCallback:
1203
 *
1204
 * Removes CDATA attributes from the special attribute table
1205
 */
1206
static void
1207
xmlCleanSpecialAttrCallback(void *payload, void *data,
1208
                            const xmlChar *fullname, const xmlChar *fullattr,
1209
0
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1210
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1211
1212
0
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1213
0
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1214
0
    }
1215
0
}
1216
1217
/**
1218
 * xmlCleanSpecialAttr:
1219
 * @ctxt:  an XML parser context
1220
 *
1221
 * Trim the list of attributes defined to remove all those of type
1222
 * CDATA as they are not special. This call should be done when finishing
1223
 * to parse the DTD and before starting to parse the document root.
1224
 */
1225
static void
1226
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1227
0
{
1228
0
    if (ctxt->attsSpecial == NULL)
1229
0
        return;
1230
1231
0
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1232
1233
0
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1234
0
        xmlHashFree(ctxt->attsSpecial, NULL);
1235
0
        ctxt->attsSpecial = NULL;
1236
0
    }
1237
0
    return;
1238
0
}
1239
1240
/**
1241
 * xmlCheckLanguageID:
1242
 * @lang:  pointer to the string value
1243
 *
1244
 * DEPRECATED: Internal function, do not use.
1245
 *
1246
 * Checks that the value conforms to the LanguageID production:
1247
 *
1248
 * NOTE: this is somewhat deprecated, those productions were removed from
1249
 *       the XML Second edition.
1250
 *
1251
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1252
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1253
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1254
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1255
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1256
 * [38] Subcode ::= ([a-z] | [A-Z])+
1257
 *
1258
 * The current REC reference the successors of RFC 1766, currently 5646
1259
 *
1260
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1261
 * langtag       = language
1262
 *                 ["-" script]
1263
 *                 ["-" region]
1264
 *                 *("-" variant)
1265
 *                 *("-" extension)
1266
 *                 ["-" privateuse]
1267
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1268
 *                 ["-" extlang]       ; sometimes followed by
1269
 *                                     ; extended language subtags
1270
 *               / 4ALPHA              ; or reserved for future use
1271
 *               / 5*8ALPHA            ; or registered language subtag
1272
 *
1273
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1274
 *                 *2("-" 3ALPHA)      ; permanently reserved
1275
 *
1276
 * script        = 4ALPHA              ; ISO 15924 code
1277
 *
1278
 * region        = 2ALPHA              ; ISO 3166-1 code
1279
 *               / 3DIGIT              ; UN M.49 code
1280
 *
1281
 * variant       = 5*8alphanum         ; registered variants
1282
 *               / (DIGIT 3alphanum)
1283
 *
1284
 * extension     = singleton 1*("-" (2*8alphanum))
1285
 *
1286
 *                                     ; Single alphanumerics
1287
 *                                     ; "x" reserved for private use
1288
 * singleton     = DIGIT               ; 0 - 9
1289
 *               / %x41-57             ; A - W
1290
 *               / %x59-5A             ; Y - Z
1291
 *               / %x61-77             ; a - w
1292
 *               / %x79-7A             ; y - z
1293
 *
1294
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1295
 * The parser below doesn't try to cope with extension or privateuse
1296
 * that could be added but that's not interoperable anyway
1297
 *
1298
 * Returns 1 if correct 0 otherwise
1299
 **/
1300
int
1301
xmlCheckLanguageID(const xmlChar * lang)
1302
0
{
1303
0
    const xmlChar *cur = lang, *nxt;
1304
1305
0
    if (cur == NULL)
1306
0
        return (0);
1307
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1308
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1309
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1310
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1311
        /*
1312
         * Still allow IANA code and user code which were coming
1313
         * from the previous version of the XML-1.0 specification
1314
         * it's deprecated but we should not fail
1315
         */
1316
0
        cur += 2;
1317
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1318
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1319
0
            cur++;
1320
0
        return(cur[0] == 0);
1321
0
    }
1322
0
    nxt = cur;
1323
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1324
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1325
0
           nxt++;
1326
0
    if (nxt - cur >= 4) {
1327
        /*
1328
         * Reserved
1329
         */
1330
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1331
0
            return(0);
1332
0
        return(1);
1333
0
    }
1334
0
    if (nxt - cur < 2)
1335
0
        return(0);
1336
    /* we got an ISO 639 code */
1337
0
    if (nxt[0] == 0)
1338
0
        return(1);
1339
0
    if (nxt[0] != '-')
1340
0
        return(0);
1341
1342
0
    nxt++;
1343
0
    cur = nxt;
1344
    /* now we can have extlang or script or region or variant */
1345
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1346
0
        goto region_m49;
1347
1348
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1349
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1350
0
           nxt++;
1351
0
    if (nxt - cur == 4)
1352
0
        goto script;
1353
0
    if (nxt - cur == 2)
1354
0
        goto region;
1355
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1356
0
        goto variant;
1357
0
    if (nxt - cur != 3)
1358
0
        return(0);
1359
    /* we parsed an extlang */
1360
0
    if (nxt[0] == 0)
1361
0
        return(1);
1362
0
    if (nxt[0] != '-')
1363
0
        return(0);
1364
1365
0
    nxt++;
1366
0
    cur = nxt;
1367
    /* now we can have script or region or variant */
1368
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1369
0
        goto region_m49;
1370
1371
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1372
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1373
0
           nxt++;
1374
0
    if (nxt - cur == 2)
1375
0
        goto region;
1376
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1377
0
        goto variant;
1378
0
    if (nxt - cur != 4)
1379
0
        return(0);
1380
    /* we parsed a script */
1381
0
script:
1382
0
    if (nxt[0] == 0)
1383
0
        return(1);
1384
0
    if (nxt[0] != '-')
1385
0
        return(0);
1386
1387
0
    nxt++;
1388
0
    cur = nxt;
1389
    /* now we can have region or variant */
1390
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1391
0
        goto region_m49;
1392
1393
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1394
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1395
0
           nxt++;
1396
1397
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1398
0
        goto variant;
1399
0
    if (nxt - cur != 2)
1400
0
        return(0);
1401
    /* we parsed a region */
1402
0
region:
1403
0
    if (nxt[0] == 0)
1404
0
        return(1);
1405
0
    if (nxt[0] != '-')
1406
0
        return(0);
1407
1408
0
    nxt++;
1409
0
    cur = nxt;
1410
    /* now we can just have a variant */
1411
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1412
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1413
0
           nxt++;
1414
1415
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1416
0
        return(0);
1417
1418
    /* we parsed a variant */
1419
0
variant:
1420
0
    if (nxt[0] == 0)
1421
0
        return(1);
1422
0
    if (nxt[0] != '-')
1423
0
        return(0);
1424
    /* extensions and private use subtags not checked */
1425
0
    return (1);
1426
1427
0
region_m49:
1428
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1429
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1430
0
        nxt += 3;
1431
0
        goto region;
1432
0
    }
1433
0
    return(0);
1434
0
}
1435
1436
/************************************************************************
1437
 *                  *
1438
 *    Parser stacks related functions and macros    *
1439
 *                  *
1440
 ************************************************************************/
1441
1442
static xmlChar *
1443
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1444
1445
/**
1446
 * xmlParserNsCreate:
1447
 *
1448
 * Create a new namespace database.
1449
 *
1450
 * Returns the new obejct.
1451
 */
1452
xmlParserNsData *
1453
0
xmlParserNsCreate(void) {
1454
0
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1455
1456
0
    if (nsdb == NULL)
1457
0
        return(NULL);
1458
0
    memset(nsdb, 0, sizeof(*nsdb));
1459
0
    nsdb->defaultNsIndex = INT_MAX;
1460
1461
0
    return(nsdb);
1462
0
}
1463
1464
/**
1465
 * xmlParserNsFree:
1466
 * @nsdb: namespace database
1467
 *
1468
 * Free a namespace database.
1469
 */
1470
void
1471
0
xmlParserNsFree(xmlParserNsData *nsdb) {
1472
0
    if (nsdb == NULL)
1473
0
        return;
1474
1475
0
    xmlFree(nsdb->extra);
1476
0
    xmlFree(nsdb->hash);
1477
0
    xmlFree(nsdb);
1478
0
}
1479
1480
/**
1481
 * xmlParserNsReset:
1482
 * @nsdb: namespace database
1483
 *
1484
 * Reset a namespace database.
1485
 */
1486
static void
1487
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1488
0
    if (nsdb == NULL)
1489
0
        return;
1490
1491
0
    nsdb->hashElems = 0;
1492
0
    nsdb->elementId = 0;
1493
0
    nsdb->defaultNsIndex = INT_MAX;
1494
1495
0
    if (nsdb->hash)
1496
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1497
0
}
1498
1499
/**
1500
 * xmlParserStartElement:
1501
 * @nsdb: namespace database
1502
 *
1503
 * Signal that a new element has started.
1504
 *
1505
 * Returns 0 on success, -1 if the element counter overflowed.
1506
 */
1507
static int
1508
0
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1509
0
    if (nsdb->elementId == UINT_MAX)
1510
0
        return(-1);
1511
0
    nsdb->elementId++;
1512
1513
0
    return(0);
1514
0
}
1515
1516
/**
1517
 * xmlParserNsLookup:
1518
 * @ctxt: parser context
1519
 * @prefix: namespace prefix
1520
 * @bucketPtr: optional bucket (return value)
1521
 *
1522
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1523
 * be set to the matching bucket, or the first empty bucket if no match
1524
 * was found.
1525
 *
1526
 * Returns the namespace index on success, INT_MAX if no namespace was
1527
 * found.
1528
 */
1529
static int
1530
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1531
0
                  xmlParserNsBucket **bucketPtr) {
1532
0
    xmlParserNsBucket *bucket, *tombstone;
1533
0
    unsigned index, hashValue;
1534
1535
0
    if (prefix->name == NULL)
1536
0
        return(ctxt->nsdb->defaultNsIndex);
1537
1538
0
    if (ctxt->nsdb->hashSize == 0)
1539
0
        return(INT_MAX);
1540
1541
0
    hashValue = prefix->hashValue;
1542
0
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1543
0
    bucket = &ctxt->nsdb->hash[index];
1544
0
    tombstone = NULL;
1545
1546
0
    while (bucket->hashValue) {
1547
0
        if (bucket->index == INT_MAX) {
1548
0
            if (tombstone == NULL)
1549
0
                tombstone = bucket;
1550
0
        } else if (bucket->hashValue == hashValue) {
1551
0
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1552
0
                if (bucketPtr != NULL)
1553
0
                    *bucketPtr = bucket;
1554
0
                return(bucket->index);
1555
0
            }
1556
0
        }
1557
1558
0
        index++;
1559
0
        bucket++;
1560
0
        if (index == ctxt->nsdb->hashSize) {
1561
0
            index = 0;
1562
0
            bucket = ctxt->nsdb->hash;
1563
0
        }
1564
0
    }
1565
1566
0
    if (bucketPtr != NULL)
1567
0
        *bucketPtr = tombstone ? tombstone : bucket;
1568
0
    return(INT_MAX);
1569
0
}
1570
1571
/**
1572
 * xmlParserNsLookupUri:
1573
 * @ctxt: parser context
1574
 * @prefix: namespace prefix
1575
 *
1576
 * Lookup namespace URI with given prefix.
1577
 *
1578
 * Returns the namespace URI on success, NULL if no namespace was found.
1579
 */
1580
static const xmlChar *
1581
0
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1582
0
    const xmlChar *ret;
1583
0
    int nsIndex;
1584
1585
0
    if (prefix->name == ctxt->str_xml)
1586
0
        return(ctxt->str_xml_ns);
1587
1588
    /*
1589
     * minNsIndex is used when building an entity tree. We must
1590
     * ignore namespaces declared outside the entity.
1591
     */
1592
0
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1593
0
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1594
0
        return(NULL);
1595
1596
0
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1597
0
    if (ret[0] == 0)
1598
0
        ret = NULL;
1599
0
    return(ret);
1600
0
}
1601
1602
/**
1603
 * xmlParserNsLookupSax:
1604
 * @ctxt: parser context
1605
 * @prefix: namespace prefix
1606
 *
1607
 * Lookup extra data for the given prefix. This returns data stored
1608
 * with xmlParserNsUdpateSax.
1609
 *
1610
 * Returns the data on success, NULL if no namespace was found.
1611
 */
1612
void *
1613
0
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1614
0
    xmlHashedString hprefix;
1615
0
    int nsIndex;
1616
1617
0
    if (prefix == ctxt->str_xml)
1618
0
        return(NULL);
1619
1620
0
    hprefix.name = prefix;
1621
0
    if (prefix != NULL)
1622
0
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1623
0
    else
1624
0
        hprefix.hashValue = 0;
1625
0
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1626
0
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1627
0
        return(NULL);
1628
1629
0
    return(ctxt->nsdb->extra[nsIndex].saxData);
1630
0
}
1631
1632
/**
1633
 * xmlParserNsUpdateSax:
1634
 * @ctxt: parser context
1635
 * @prefix: namespace prefix
1636
 * @saxData: extra data for SAX handler
1637
 *
1638
 * Sets or updates extra data for the given prefix. This value will be
1639
 * returned by xmlParserNsLookupSax as long as the namespace with the
1640
 * given prefix is in scope.
1641
 *
1642
 * Returns the data on success, NULL if no namespace was found.
1643
 */
1644
int
1645
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1646
0
                     void *saxData) {
1647
0
    xmlHashedString hprefix;
1648
0
    int nsIndex;
1649
1650
0
    if (prefix == ctxt->str_xml)
1651
0
        return(-1);
1652
1653
0
    hprefix.name = prefix;
1654
0
    if (prefix != NULL)
1655
0
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1656
0
    else
1657
0
        hprefix.hashValue = 0;
1658
0
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1659
0
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1660
0
        return(-1);
1661
1662
0
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1663
0
    return(0);
1664
0
}
1665
1666
/**
1667
 * xmlParserNsGrow:
1668
 * @ctxt: parser context
1669
 *
1670
 * Grows the namespace tables.
1671
 *
1672
 * Returns 0 on success, -1 if a memory allocation failed.
1673
 */
1674
static int
1675
0
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1676
0
    const xmlChar **table;
1677
0
    xmlParserNsExtra *extra;
1678
0
    int newSize;
1679
1680
0
    if (ctxt->nsMax > INT_MAX / 2)
1681
0
        goto error;
1682
0
    newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1683
1684
0
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1685
0
    if (table == NULL)
1686
0
        goto error;
1687
0
    ctxt->nsTab = table;
1688
1689
0
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1690
0
    if (extra == NULL)
1691
0
        goto error;
1692
0
    ctxt->nsdb->extra = extra;
1693
1694
0
    ctxt->nsMax = newSize;
1695
0
    return(0);
1696
1697
0
error:
1698
0
    xmlErrMemory(ctxt);
1699
0
    return(-1);
1700
0
}
1701
1702
/**
1703
 * xmlParserNsPush:
1704
 * @ctxt: parser context
1705
 * @prefix: prefix with hash value
1706
 * @uri: uri with hash value
1707
 * @saxData: extra data for SAX handler
1708
 * @defAttr: whether the namespace comes from a default attribute
1709
 *
1710
 * Push a new namespace on the table.
1711
 *
1712
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1713
 * -1 if a memory allocation failed.
1714
 */
1715
static int
1716
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1717
0
                const xmlHashedString *uri, void *saxData, int defAttr) {
1718
0
    xmlParserNsBucket *bucket = NULL;
1719
0
    xmlParserNsExtra *extra;
1720
0
    const xmlChar **ns;
1721
0
    unsigned hashValue, nsIndex, oldIndex;
1722
1723
0
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1724
0
        return(0);
1725
1726
0
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1727
0
        xmlErrMemory(ctxt);
1728
0
        return(-1);
1729
0
    }
1730
1731
    /*
1732
     * Default namespace and 'xml' namespace
1733
     */
1734
0
    if ((prefix == NULL) || (prefix->name == NULL)) {
1735
0
        oldIndex = ctxt->nsdb->defaultNsIndex;
1736
1737
0
        if (oldIndex != INT_MAX) {
1738
0
            extra = &ctxt->nsdb->extra[oldIndex];
1739
1740
0
            if (extra->elementId == ctxt->nsdb->elementId) {
1741
0
                if (defAttr == 0)
1742
0
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1743
0
                return(0);
1744
0
            }
1745
1746
0
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1747
0
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1748
0
                return(0);
1749
0
        }
1750
1751
0
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1752
0
        goto populate_entry;
1753
0
    }
1754
1755
    /*
1756
     * Hash table lookup
1757
     */
1758
0
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1759
0
    if (oldIndex != INT_MAX) {
1760
0
        extra = &ctxt->nsdb->extra[oldIndex];
1761
1762
        /*
1763
         * Check for duplicate definitions on the same element.
1764
         */
1765
0
        if (extra->elementId == ctxt->nsdb->elementId) {
1766
0
            if (defAttr == 0)
1767
0
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1768
0
            return(0);
1769
0
        }
1770
1771
0
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1772
0
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1773
0
            return(0);
1774
1775
0
        bucket->index = ctxt->nsNr;
1776
0
        goto populate_entry;
1777
0
    }
1778
1779
    /*
1780
     * Insert new bucket
1781
     */
1782
1783
0
    hashValue = prefix->hashValue;
1784
1785
    /*
1786
     * Grow hash table, 50% fill factor
1787
     */
1788
0
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1789
0
        xmlParserNsBucket *newHash;
1790
0
        unsigned newSize, i, index;
1791
1792
0
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1793
0
            xmlErrMemory(ctxt);
1794
0
            return(-1);
1795
0
        }
1796
0
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1797
0
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1798
0
        if (newHash == NULL) {
1799
0
            xmlErrMemory(ctxt);
1800
0
            return(-1);
1801
0
        }
1802
0
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1803
1804
0
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1805
0
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1806
0
            unsigned newIndex;
1807
1808
0
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1809
0
                continue;
1810
0
            newIndex = hv & (newSize - 1);
1811
1812
0
            while (newHash[newIndex].hashValue != 0) {
1813
0
                newIndex++;
1814
0
                if (newIndex == newSize)
1815
0
                    newIndex = 0;
1816
0
            }
1817
1818
0
            newHash[newIndex] = ctxt->nsdb->hash[i];
1819
0
        }
1820
1821
0
        xmlFree(ctxt->nsdb->hash);
1822
0
        ctxt->nsdb->hash = newHash;
1823
0
        ctxt->nsdb->hashSize = newSize;
1824
1825
        /*
1826
         * Relookup
1827
         */
1828
0
        index = hashValue & (newSize - 1);
1829
1830
0
        while (newHash[index].hashValue != 0) {
1831
0
            index++;
1832
0
            if (index == newSize)
1833
0
                index = 0;
1834
0
        }
1835
1836
0
        bucket = &newHash[index];
1837
0
    }
1838
1839
0
    bucket->hashValue = hashValue;
1840
0
    bucket->index = ctxt->nsNr;
1841
0
    ctxt->nsdb->hashElems++;
1842
0
    oldIndex = INT_MAX;
1843
1844
0
populate_entry:
1845
0
    nsIndex = ctxt->nsNr;
1846
1847
0
    ns = &ctxt->nsTab[nsIndex * 2];
1848
0
    ns[0] = prefix ? prefix->name : NULL;
1849
0
    ns[1] = uri->name;
1850
1851
0
    extra = &ctxt->nsdb->extra[nsIndex];
1852
0
    extra->saxData = saxData;
1853
0
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1854
0
    extra->uriHashValue = uri->hashValue;
1855
0
    extra->elementId = ctxt->nsdb->elementId;
1856
0
    extra->oldIndex = oldIndex;
1857
1858
0
    ctxt->nsNr++;
1859
1860
0
    return(1);
1861
0
}
1862
1863
/**
1864
 * xmlParserNsPop:
1865
 * @ctxt: an XML parser context
1866
 * @nr:  the number to pop
1867
 *
1868
 * Pops the top @nr namespaces and restores the hash table.
1869
 *
1870
 * Returns the number of namespaces popped.
1871
 */
1872
static int
1873
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1874
0
{
1875
0
    int i;
1876
1877
    /* assert(nr <= ctxt->nsNr); */
1878
1879
0
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1880
0
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1881
0
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1882
1883
0
        if (prefix == NULL) {
1884
0
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1885
0
        } else {
1886
0
            xmlHashedString hprefix;
1887
0
            xmlParserNsBucket *bucket = NULL;
1888
1889
0
            hprefix.name = prefix;
1890
0
            hprefix.hashValue = extra->prefixHashValue;
1891
0
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1892
            /* assert(bucket && bucket->hashValue); */
1893
0
            bucket->index = extra->oldIndex;
1894
0
        }
1895
0
    }
1896
1897
0
    ctxt->nsNr -= nr;
1898
0
    return(nr);
1899
0
}
1900
1901
static int
1902
0
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1903
0
    const xmlChar **atts;
1904
0
    unsigned *attallocs;
1905
0
    int maxatts;
1906
1907
0
    if (nr + 5 > ctxt->maxatts) {
1908
0
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1909
0
  atts = (const xmlChar **) xmlMalloc(
1910
0
             maxatts * sizeof(const xmlChar *));
1911
0
  if (atts == NULL) goto mem_error;
1912
0
  attallocs = xmlRealloc(ctxt->attallocs,
1913
0
                               (maxatts / 5) * sizeof(attallocs[0]));
1914
0
  if (attallocs == NULL) {
1915
0
            xmlFree(atts);
1916
0
            goto mem_error;
1917
0
        }
1918
0
        if (ctxt->maxatts > 0)
1919
0
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1920
0
        xmlFree(ctxt->atts);
1921
0
  ctxt->atts = atts;
1922
0
  ctxt->attallocs = attallocs;
1923
0
  ctxt->maxatts = maxatts;
1924
0
    }
1925
0
    return(ctxt->maxatts);
1926
0
mem_error:
1927
0
    xmlErrMemory(ctxt);
1928
0
    return(-1);
1929
0
}
1930
1931
/**
1932
 * inputPush:
1933
 * @ctxt:  an XML parser context
1934
 * @value:  the parser input
1935
 *
1936
 * Pushes a new parser input on top of the input stack
1937
 *
1938
 * Returns -1 in case of error, the index in the stack otherwise
1939
 */
1940
int
1941
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1942
0
{
1943
0
    if ((ctxt == NULL) || (value == NULL))
1944
0
        return(-1);
1945
0
    if (ctxt->inputNr >= ctxt->inputMax) {
1946
0
        size_t newSize = ctxt->inputMax * 2;
1947
0
        xmlParserInputPtr *tmp;
1948
1949
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1950
0
                                               newSize * sizeof(*tmp));
1951
0
        if (tmp == NULL) {
1952
0
            xmlErrMemory(ctxt);
1953
0
            return (-1);
1954
0
        }
1955
0
        ctxt->inputTab = tmp;
1956
0
        ctxt->inputMax = newSize;
1957
0
    }
1958
0
    ctxt->inputTab[ctxt->inputNr] = value;
1959
0
    ctxt->input = value;
1960
0
    return (ctxt->inputNr++);
1961
0
}
1962
/**
1963
 * inputPop:
1964
 * @ctxt: an XML parser context
1965
 *
1966
 * Pops the top parser input from the input stack
1967
 *
1968
 * Returns the input just removed
1969
 */
1970
xmlParserInputPtr
1971
inputPop(xmlParserCtxtPtr ctxt)
1972
0
{
1973
0
    xmlParserInputPtr ret;
1974
1975
0
    if (ctxt == NULL)
1976
0
        return(NULL);
1977
0
    if (ctxt->inputNr <= 0)
1978
0
        return (NULL);
1979
0
    ctxt->inputNr--;
1980
0
    if (ctxt->inputNr > 0)
1981
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1982
0
    else
1983
0
        ctxt->input = NULL;
1984
0
    ret = ctxt->inputTab[ctxt->inputNr];
1985
0
    ctxt->inputTab[ctxt->inputNr] = NULL;
1986
0
    return (ret);
1987
0
}
1988
/**
1989
 * nodePush:
1990
 * @ctxt:  an XML parser context
1991
 * @value:  the element node
1992
 *
1993
 * DEPRECATED: Internal function, do not use.
1994
 *
1995
 * Pushes a new element node on top of the node stack
1996
 *
1997
 * Returns -1 in case of error, the index in the stack otherwise
1998
 */
1999
int
2000
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
2001
0
{
2002
0
    int maxDepth;
2003
2004
0
    if (ctxt == NULL)
2005
0
        return(0);
2006
2007
0
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2008
0
    if (ctxt->nodeNr > maxDepth) {
2009
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2010
0
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2011
0
                ctxt->nodeNr);
2012
0
        xmlHaltParser(ctxt);
2013
0
        return(-1);
2014
0
    }
2015
0
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2016
0
        xmlNodePtr *tmp;
2017
2018
0
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2019
0
                                      ctxt->nodeMax * 2 *
2020
0
                                      sizeof(ctxt->nodeTab[0]));
2021
0
        if (tmp == NULL) {
2022
0
            xmlErrMemory(ctxt);
2023
0
            return (-1);
2024
0
        }
2025
0
        ctxt->nodeTab = tmp;
2026
0
  ctxt->nodeMax *= 2;
2027
0
    }
2028
0
    ctxt->nodeTab[ctxt->nodeNr] = value;
2029
0
    ctxt->node = value;
2030
0
    return (ctxt->nodeNr++);
2031
0
}
2032
2033
/**
2034
 * nodePop:
2035
 * @ctxt: an XML parser context
2036
 *
2037
 * DEPRECATED: Internal function, do not use.
2038
 *
2039
 * Pops the top element node from the node stack
2040
 *
2041
 * Returns the node just removed
2042
 */
2043
xmlNodePtr
2044
nodePop(xmlParserCtxtPtr ctxt)
2045
0
{
2046
0
    xmlNodePtr ret;
2047
2048
0
    if (ctxt == NULL) return(NULL);
2049
0
    if (ctxt->nodeNr <= 0)
2050
0
        return (NULL);
2051
0
    ctxt->nodeNr--;
2052
0
    if (ctxt->nodeNr > 0)
2053
0
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2054
0
    else
2055
0
        ctxt->node = NULL;
2056
0
    ret = ctxt->nodeTab[ctxt->nodeNr];
2057
0
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2058
0
    return (ret);
2059
0
}
2060
2061
/**
2062
 * nameNsPush:
2063
 * @ctxt:  an XML parser context
2064
 * @value:  the element name
2065
 * @prefix:  the element prefix
2066
 * @URI:  the element namespace name
2067
 * @line:  the current line number for error messages
2068
 * @nsNr:  the number of namespaces pushed on the namespace table
2069
 *
2070
 * Pushes a new element name/prefix/URL on top of the name stack
2071
 *
2072
 * Returns -1 in case of error, the index in the stack otherwise
2073
 */
2074
static int
2075
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2076
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2077
0
{
2078
0
    xmlStartTag *tag;
2079
2080
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2081
0
        const xmlChar * *tmp;
2082
0
        xmlStartTag *tmp2;
2083
0
        ctxt->nameMax *= 2;
2084
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2085
0
                                    ctxt->nameMax *
2086
0
                                    sizeof(ctxt->nameTab[0]));
2087
0
        if (tmp == NULL) {
2088
0
      ctxt->nameMax /= 2;
2089
0
      goto mem_error;
2090
0
        }
2091
0
  ctxt->nameTab = tmp;
2092
0
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2093
0
                                    ctxt->nameMax *
2094
0
                                    sizeof(ctxt->pushTab[0]));
2095
0
        if (tmp2 == NULL) {
2096
0
      ctxt->nameMax /= 2;
2097
0
      goto mem_error;
2098
0
        }
2099
0
  ctxt->pushTab = tmp2;
2100
0
    } else if (ctxt->pushTab == NULL) {
2101
0
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2102
0
                                            sizeof(ctxt->pushTab[0]));
2103
0
        if (ctxt->pushTab == NULL)
2104
0
            goto mem_error;
2105
0
    }
2106
0
    ctxt->nameTab[ctxt->nameNr] = value;
2107
0
    ctxt->name = value;
2108
0
    tag = &ctxt->pushTab[ctxt->nameNr];
2109
0
    tag->prefix = prefix;
2110
0
    tag->URI = URI;
2111
0
    tag->line = line;
2112
0
    tag->nsNr = nsNr;
2113
0
    return (ctxt->nameNr++);
2114
0
mem_error:
2115
0
    xmlErrMemory(ctxt);
2116
0
    return (-1);
2117
0
}
2118
#ifdef LIBXML_PUSH_ENABLED
2119
/**
2120
 * nameNsPop:
2121
 * @ctxt: an XML parser context
2122
 *
2123
 * Pops the top element/prefix/URI name from the name stack
2124
 *
2125
 * Returns the name just removed
2126
 */
2127
static const xmlChar *
2128
nameNsPop(xmlParserCtxtPtr ctxt)
2129
0
{
2130
0
    const xmlChar *ret;
2131
2132
0
    if (ctxt->nameNr <= 0)
2133
0
        return (NULL);
2134
0
    ctxt->nameNr--;
2135
0
    if (ctxt->nameNr > 0)
2136
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2137
0
    else
2138
0
        ctxt->name = NULL;
2139
0
    ret = ctxt->nameTab[ctxt->nameNr];
2140
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2141
0
    return (ret);
2142
0
}
2143
#endif /* LIBXML_PUSH_ENABLED */
2144
2145
/**
2146
 * namePush:
2147
 * @ctxt:  an XML parser context
2148
 * @value:  the element name
2149
 *
2150
 * DEPRECATED: Internal function, do not use.
2151
 *
2152
 * Pushes a new element name on top of the name stack
2153
 *
2154
 * Returns -1 in case of error, the index in the stack otherwise
2155
 */
2156
int
2157
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2158
0
{
2159
0
    if (ctxt == NULL) return (-1);
2160
2161
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2162
0
        const xmlChar * *tmp;
2163
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2164
0
                                    ctxt->nameMax * 2 *
2165
0
                                    sizeof(ctxt->nameTab[0]));
2166
0
        if (tmp == NULL) {
2167
0
      goto mem_error;
2168
0
        }
2169
0
  ctxt->nameTab = tmp;
2170
0
        ctxt->nameMax *= 2;
2171
0
    }
2172
0
    ctxt->nameTab[ctxt->nameNr] = value;
2173
0
    ctxt->name = value;
2174
0
    return (ctxt->nameNr++);
2175
0
mem_error:
2176
0
    xmlErrMemory(ctxt);
2177
0
    return (-1);
2178
0
}
2179
2180
/**
2181
 * namePop:
2182
 * @ctxt: an XML parser context
2183
 *
2184
 * DEPRECATED: Internal function, do not use.
2185
 *
2186
 * Pops the top element name from the name stack
2187
 *
2188
 * Returns the name just removed
2189
 */
2190
const xmlChar *
2191
namePop(xmlParserCtxtPtr ctxt)
2192
0
{
2193
0
    const xmlChar *ret;
2194
2195
0
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2196
0
        return (NULL);
2197
0
    ctxt->nameNr--;
2198
0
    if (ctxt->nameNr > 0)
2199
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2200
0
    else
2201
0
        ctxt->name = NULL;
2202
0
    ret = ctxt->nameTab[ctxt->nameNr];
2203
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2204
0
    return (ret);
2205
0
}
2206
2207
0
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2208
0
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2209
0
        int *tmp;
2210
2211
0
  ctxt->spaceMax *= 2;
2212
0
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2213
0
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2214
0
        if (tmp == NULL) {
2215
0
      xmlErrMemory(ctxt);
2216
0
      ctxt->spaceMax /=2;
2217
0
      return(-1);
2218
0
  }
2219
0
  ctxt->spaceTab = tmp;
2220
0
    }
2221
0
    ctxt->spaceTab[ctxt->spaceNr] = val;
2222
0
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2223
0
    return(ctxt->spaceNr++);
2224
0
}
2225
2226
0
static int spacePop(xmlParserCtxtPtr ctxt) {
2227
0
    int ret;
2228
0
    if (ctxt->spaceNr <= 0) return(0);
2229
0
    ctxt->spaceNr--;
2230
0
    if (ctxt->spaceNr > 0)
2231
0
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2232
0
    else
2233
0
        ctxt->space = &ctxt->spaceTab[0];
2234
0
    ret = ctxt->spaceTab[ctxt->spaceNr];
2235
0
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2236
0
    return(ret);
2237
0
}
2238
2239
/*
2240
 * Macros for accessing the content. Those should be used only by the parser,
2241
 * and not exported.
2242
 *
2243
 * Dirty macros, i.e. one often need to make assumption on the context to
2244
 * use them
2245
 *
2246
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2247
 *           To be used with extreme caution since operations consuming
2248
 *           characters may move the input buffer to a different location !
2249
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2250
 *           This should be used internally by the parser
2251
 *           only to compare to ASCII values otherwise it would break when
2252
 *           running with UTF-8 encoding.
2253
 *   RAW     same as CUR but in the input buffer, bypass any token
2254
 *           extraction that may have been done
2255
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2256
 *           to compare on ASCII based substring.
2257
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2258
 *           strings without newlines within the parser.
2259
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2260
 *           defined char within the parser.
2261
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2262
 *
2263
 *   NEXT    Skip to the next character, this does the proper decoding
2264
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2265
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2266
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2267
 *           to the number of xmlChars used for the encoding [0-5].
2268
 *   CUR_SCHAR  same but operate on a string instead of the context
2269
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2270
 *            the index
2271
 *   GROW, SHRINK  handling of input buffers
2272
 */
2273
2274
0
#define RAW (*ctxt->input->cur)
2275
0
#define CUR (*ctxt->input->cur)
2276
0
#define NXT(val) ctxt->input->cur[(val)]
2277
0
#define CUR_PTR ctxt->input->cur
2278
0
#define BASE_PTR ctxt->input->base
2279
2280
#define CMP4( s, c1, c2, c3, c4 ) \
2281
0
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2282
0
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2283
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2284
0
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2285
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2286
0
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2287
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2288
0
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2289
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2290
0
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2291
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2292
0
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2293
0
    ((unsigned char *) s)[ 8 ] == c9 )
2294
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2295
0
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2296
0
    ((unsigned char *) s)[ 9 ] == c10 )
2297
2298
0
#define SKIP(val) do {             \
2299
0
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2300
0
    if (*ctxt->input->cur == 0)           \
2301
0
        xmlParserGrow(ctxt);           \
2302
0
  } while (0)
2303
2304
0
#define SKIPL(val) do {             \
2305
0
    int skipl;                \
2306
0
    for(skipl=0; skipl<val; skipl++) {         \
2307
0
  if (*(ctxt->input->cur) == '\n') {       \
2308
0
  ctxt->input->line++; ctxt->input->col = 1;      \
2309
0
  } else ctxt->input->col++;         \
2310
0
  ctxt->input->cur++;           \
2311
0
    }                 \
2312
0
    if (*ctxt->input->cur == 0)           \
2313
0
        xmlParserGrow(ctxt);           \
2314
0
  } while (0)
2315
2316
#define SHRINK \
2317
0
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2318
0
        (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2319
0
  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2320
0
  xmlParserShrink(ctxt);
2321
2322
#define GROW \
2323
0
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2324
0
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2325
0
  xmlParserGrow(ctxt);
2326
2327
0
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2328
2329
0
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2330
2331
0
#define NEXT xmlNextChar(ctxt)
2332
2333
0
#define NEXT1 {               \
2334
0
  ctxt->input->col++;           \
2335
0
  ctxt->input->cur++;           \
2336
0
  if (*ctxt->input->cur == 0)         \
2337
0
      xmlParserGrow(ctxt);           \
2338
0
    }
2339
2340
0
#define NEXTL(l) do {             \
2341
0
    if (*(ctxt->input->cur) == '\n') {         \
2342
0
  ctxt->input->line++; ctxt->input->col = 1;      \
2343
0
    } else ctxt->input->col++;           \
2344
0
    ctxt->input->cur += l;        \
2345
0
  } while (0)
2346
2347
0
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2348
0
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2349
2350
#define COPY_BUF(b, i, v)           \
2351
0
    if (v < 0x80) b[i++] = v;           \
2352
0
    else i += xmlCopyCharMultiByte(&b[i],v)
2353
2354
/**
2355
 * xmlSkipBlankChars:
2356
 * @ctxt:  the XML parser context
2357
 *
2358
 * DEPRECATED: Internal function, do not use.
2359
 *
2360
 * Skip whitespace in the input stream.
2361
 *
2362
 * Returns the number of space chars skipped
2363
 */
2364
int
2365
0
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2366
0
    const xmlChar *cur;
2367
0
    int res = 0;
2368
2369
    /*
2370
     * It's Okay to use CUR/NEXT here since all the blanks are on
2371
     * the ASCII range.
2372
     */
2373
0
    cur = ctxt->input->cur;
2374
0
    while (IS_BLANK_CH(*cur)) {
2375
0
        if (*cur == '\n') {
2376
0
            ctxt->input->line++; ctxt->input->col = 1;
2377
0
        } else {
2378
0
            ctxt->input->col++;
2379
0
        }
2380
0
        cur++;
2381
0
        if (res < INT_MAX)
2382
0
            res++;
2383
0
        if (*cur == 0) {
2384
0
            ctxt->input->cur = cur;
2385
0
            xmlParserGrow(ctxt);
2386
0
            cur = ctxt->input->cur;
2387
0
        }
2388
0
    }
2389
0
    ctxt->input->cur = cur;
2390
2391
0
    return(res);
2392
0
}
2393
2394
static void
2395
0
xmlPopPE(xmlParserCtxtPtr ctxt) {
2396
0
    unsigned long consumed;
2397
0
    xmlEntityPtr ent;
2398
2399
0
    ent = ctxt->input->entity;
2400
2401
0
    ent->flags &= ~XML_ENT_EXPANDING;
2402
2403
0
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2404
0
        int result;
2405
2406
        /*
2407
         * Read the rest of the stream in case of errors. We want
2408
         * to account for the whole entity size.
2409
         */
2410
0
        do {
2411
0
            ctxt->input->cur = ctxt->input->end;
2412
0
            xmlParserShrink(ctxt);
2413
0
            result = xmlParserGrow(ctxt);
2414
0
        } while (result > 0);
2415
2416
0
        consumed = ctxt->input->consumed;
2417
0
        xmlSaturatedAddSizeT(&consumed,
2418
0
                             ctxt->input->end - ctxt->input->base);
2419
2420
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2421
2422
        /*
2423
         * Add to sizeentities when parsing an external entity
2424
         * for the first time.
2425
         */
2426
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2427
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2428
0
        }
2429
2430
0
        ent->flags |= XML_ENT_CHECKED;
2431
0
    }
2432
2433
0
    xmlPopInput(ctxt);
2434
2435
0
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2436
0
}
2437
2438
/**
2439
 * xmlSkipBlankCharsPE:
2440
 * @ctxt:  the XML parser context
2441
 *
2442
 * Skip whitespace in the input stream, also handling parameter
2443
 * entities.
2444
 *
2445
 * Returns the number of space chars skipped
2446
 */
2447
static int
2448
0
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2449
0
    int res = 0;
2450
0
    int inParam;
2451
0
    int expandParam;
2452
2453
0
    inParam = PARSER_IN_PE(ctxt);
2454
0
    expandParam = PARSER_EXTERNAL(ctxt);
2455
2456
0
    if (!inParam && !expandParam)
2457
0
        return(xmlSkipBlankChars(ctxt));
2458
2459
0
    while (PARSER_STOPPED(ctxt) == 0) {
2460
0
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2461
0
            NEXT;
2462
0
        } else if (CUR == '%') {
2463
0
            if ((expandParam == 0) ||
2464
0
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2465
0
                break;
2466
2467
            /*
2468
             * Expand parameter entity. We continue to consume
2469
             * whitespace at the start of the entity and possible
2470
             * even consume the whole entity and pop it. We might
2471
             * even pop multiple PEs in this loop.
2472
             */
2473
0
            xmlParsePEReference(ctxt);
2474
2475
0
            inParam = PARSER_IN_PE(ctxt);
2476
0
            expandParam = PARSER_EXTERNAL(ctxt);
2477
0
        } else if (CUR == 0) {
2478
0
            if (inParam == 0)
2479
0
                break;
2480
2481
0
            xmlPopPE(ctxt);
2482
2483
0
            inParam = PARSER_IN_PE(ctxt);
2484
0
            expandParam = PARSER_EXTERNAL(ctxt);
2485
0
        } else {
2486
0
            break;
2487
0
        }
2488
2489
        /*
2490
         * Also increase the counter when entering or exiting a PERef.
2491
         * The spec says: "When a parameter-entity reference is recognized
2492
         * in the DTD and included, its replacement text MUST be enlarged
2493
         * by the attachment of one leading and one following space (#x20)
2494
         * character."
2495
         */
2496
0
        if (res < INT_MAX)
2497
0
            res++;
2498
0
    }
2499
2500
0
    return(res);
2501
0
}
2502
2503
/************************************************************************
2504
 *                  *
2505
 *    Commodity functions to handle entities      *
2506
 *                  *
2507
 ************************************************************************/
2508
2509
/**
2510
 * xmlPopInput:
2511
 * @ctxt:  an XML parser context
2512
 *
2513
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2514
 *          pop it and return the next char.
2515
 *
2516
 * Returns the current xmlChar in the parser context
2517
 */
2518
xmlChar
2519
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2520
0
    xmlParserInputPtr input;
2521
2522
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2523
0
    input = inputPop(ctxt);
2524
0
    xmlFreeInputStream(input);
2525
0
    if (*ctxt->input->cur == 0)
2526
0
        xmlParserGrow(ctxt);
2527
0
    return(CUR);
2528
0
}
2529
2530
/**
2531
 * xmlPushInput:
2532
 * @ctxt:  an XML parser context
2533
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2534
 *
2535
 * Push an input stream onto the stack.
2536
 *
2537
 * Returns -1 in case of error or the index in the input stack
2538
 */
2539
int
2540
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2541
0
    int maxDepth;
2542
0
    int ret;
2543
2544
0
    if ((ctxt == NULL) || (input == NULL))
2545
0
        return(-1);
2546
2547
0
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2548
0
    if (ctxt->inputNr > maxDepth) {
2549
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2550
0
                       "Maximum entity nesting depth exceeded");
2551
0
        xmlHaltParser(ctxt);
2552
0
  return(-1);
2553
0
    }
2554
0
    ret = inputPush(ctxt, input);
2555
0
    GROW;
2556
0
    return(ret);
2557
0
}
2558
2559
/**
2560
 * xmlParseCharRef:
2561
 * @ctxt:  an XML parser context
2562
 *
2563
 * DEPRECATED: Internal function, don't use.
2564
 *
2565
 * Parse a numeric character reference. Always consumes '&'.
2566
 *
2567
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2568
 *                  '&#x' [0-9a-fA-F]+ ';'
2569
 *
2570
 * [ WFC: Legal Character ]
2571
 * Characters referred to using character references must match the
2572
 * production for Char.
2573
 *
2574
 * Returns the value parsed (as an int), 0 in case of error
2575
 */
2576
int
2577
0
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2578
0
    int val = 0;
2579
0
    int count = 0;
2580
2581
    /*
2582
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2583
     */
2584
0
    if ((RAW == '&') && (NXT(1) == '#') &&
2585
0
        (NXT(2) == 'x')) {
2586
0
  SKIP(3);
2587
0
  GROW;
2588
0
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2589
0
      if (count++ > 20) {
2590
0
    count = 0;
2591
0
    GROW;
2592
0
      }
2593
0
      if ((RAW >= '0') && (RAW <= '9'))
2594
0
          val = val * 16 + (CUR - '0');
2595
0
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2596
0
          val = val * 16 + (CUR - 'a') + 10;
2597
0
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2598
0
          val = val * 16 + (CUR - 'A') + 10;
2599
0
      else {
2600
0
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2601
0
    val = 0;
2602
0
    break;
2603
0
      }
2604
0
      if (val > 0x110000)
2605
0
          val = 0x110000;
2606
2607
0
      NEXT;
2608
0
      count++;
2609
0
  }
2610
0
  if (RAW == ';') {
2611
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2612
0
      ctxt->input->col++;
2613
0
      ctxt->input->cur++;
2614
0
  }
2615
0
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2616
0
  SKIP(2);
2617
0
  GROW;
2618
0
  while (RAW != ';') { /* loop blocked by count */
2619
0
      if (count++ > 20) {
2620
0
    count = 0;
2621
0
    GROW;
2622
0
      }
2623
0
      if ((RAW >= '0') && (RAW <= '9'))
2624
0
          val = val * 10 + (CUR - '0');
2625
0
      else {
2626
0
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2627
0
    val = 0;
2628
0
    break;
2629
0
      }
2630
0
      if (val > 0x110000)
2631
0
          val = 0x110000;
2632
2633
0
      NEXT;
2634
0
      count++;
2635
0
  }
2636
0
  if (RAW == ';') {
2637
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2638
0
      ctxt->input->col++;
2639
0
      ctxt->input->cur++;
2640
0
  }
2641
0
    } else {
2642
0
        if (RAW == '&')
2643
0
            SKIP(1);
2644
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2645
0
    }
2646
2647
    /*
2648
     * [ WFC: Legal Character ]
2649
     * Characters referred to using character references must match the
2650
     * production for Char.
2651
     */
2652
0
    if (val >= 0x110000) {
2653
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2654
0
                "xmlParseCharRef: character reference out of bounds\n",
2655
0
          val);
2656
0
    } else if (IS_CHAR(val)) {
2657
0
        return(val);
2658
0
    } else {
2659
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2660
0
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2661
0
                    val);
2662
0
    }
2663
0
    return(0);
2664
0
}
2665
2666
/**
2667
 * xmlParseStringCharRef:
2668
 * @ctxt:  an XML parser context
2669
 * @str:  a pointer to an index in the string
2670
 *
2671
 * parse Reference declarations, variant parsing from a string rather
2672
 * than an an input flow.
2673
 *
2674
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2675
 *                  '&#x' [0-9a-fA-F]+ ';'
2676
 *
2677
 * [ WFC: Legal Character ]
2678
 * Characters referred to using character references must match the
2679
 * production for Char.
2680
 *
2681
 * Returns the value parsed (as an int), 0 in case of error, str will be
2682
 *         updated to the current value of the index
2683
 */
2684
static int
2685
0
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2686
0
    const xmlChar *ptr;
2687
0
    xmlChar cur;
2688
0
    int val = 0;
2689
2690
0
    if ((str == NULL) || (*str == NULL)) return(0);
2691
0
    ptr = *str;
2692
0
    cur = *ptr;
2693
0
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2694
0
  ptr += 3;
2695
0
  cur = *ptr;
2696
0
  while (cur != ';') { /* Non input consuming loop */
2697
0
      if ((cur >= '0') && (cur <= '9'))
2698
0
          val = val * 16 + (cur - '0');
2699
0
      else if ((cur >= 'a') && (cur <= 'f'))
2700
0
          val = val * 16 + (cur - 'a') + 10;
2701
0
      else if ((cur >= 'A') && (cur <= 'F'))
2702
0
          val = val * 16 + (cur - 'A') + 10;
2703
0
      else {
2704
0
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2705
0
    val = 0;
2706
0
    break;
2707
0
      }
2708
0
      if (val > 0x110000)
2709
0
          val = 0x110000;
2710
2711
0
      ptr++;
2712
0
      cur = *ptr;
2713
0
  }
2714
0
  if (cur == ';')
2715
0
      ptr++;
2716
0
    } else if  ((cur == '&') && (ptr[1] == '#')){
2717
0
  ptr += 2;
2718
0
  cur = *ptr;
2719
0
  while (cur != ';') { /* Non input consuming loops */
2720
0
      if ((cur >= '0') && (cur <= '9'))
2721
0
          val = val * 10 + (cur - '0');
2722
0
      else {
2723
0
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2724
0
    val = 0;
2725
0
    break;
2726
0
      }
2727
0
      if (val > 0x110000)
2728
0
          val = 0x110000;
2729
2730
0
      ptr++;
2731
0
      cur = *ptr;
2732
0
  }
2733
0
  if (cur == ';')
2734
0
      ptr++;
2735
0
    } else {
2736
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2737
0
  return(0);
2738
0
    }
2739
0
    *str = ptr;
2740
2741
    /*
2742
     * [ WFC: Legal Character ]
2743
     * Characters referred to using character references must match the
2744
     * production for Char.
2745
     */
2746
0
    if (val >= 0x110000) {
2747
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2748
0
                "xmlParseStringCharRef: character reference out of bounds\n",
2749
0
                val);
2750
0
    } else if (IS_CHAR(val)) {
2751
0
        return(val);
2752
0
    } else {
2753
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2754
0
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2755
0
        val);
2756
0
    }
2757
0
    return(0);
2758
0
}
2759
2760
/**
2761
 * xmlParserHandlePEReference:
2762
 * @ctxt:  the parser context
2763
 *
2764
 * DEPRECATED: Internal function, do not use.
2765
 *
2766
 * [69] PEReference ::= '%' Name ';'
2767
 *
2768
 * [ WFC: No Recursion ]
2769
 * A parsed entity must not contain a recursive
2770
 * reference to itself, either directly or indirectly.
2771
 *
2772
 * [ WFC: Entity Declared ]
2773
 * In a document without any DTD, a document with only an internal DTD
2774
 * subset which contains no parameter entity references, or a document
2775
 * with "standalone='yes'", ...  ... The declaration of a parameter
2776
 * entity must precede any reference to it...
2777
 *
2778
 * [ VC: Entity Declared ]
2779
 * In a document with an external subset or external parameter entities
2780
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2781
 * must precede any reference to it...
2782
 *
2783
 * [ WFC: In DTD ]
2784
 * Parameter-entity references may only appear in the DTD.
2785
 * NOTE: misleading but this is handled.
2786
 *
2787
 * A PEReference may have been detected in the current input stream
2788
 * the handling is done accordingly to
2789
 *      http://www.w3.org/TR/REC-xml#entproc
2790
 * i.e.
2791
 *   - Included in literal in entity values
2792
 *   - Included as Parameter Entity reference within DTDs
2793
 */
2794
void
2795
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2796
0
    xmlParsePEReference(ctxt);
2797
0
}
2798
2799
/**
2800
 * xmlStringLenDecodeEntities:
2801
 * @ctxt:  the parser context
2802
 * @str:  the input string
2803
 * @len: the string length
2804
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2805
 * @end:  an end marker xmlChar, 0 if none
2806
 * @end2:  an end marker xmlChar, 0 if none
2807
 * @end3:  an end marker xmlChar, 0 if none
2808
 *
2809
 * DEPRECATED: Internal function, don't use.
2810
 *
2811
 * Returns A newly allocated string with the substitution done. The caller
2812
 *      must deallocate it !
2813
 */
2814
xmlChar *
2815
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2816
                           int what ATTRIBUTE_UNUSED,
2817
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2818
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2819
0
        return(NULL);
2820
2821
0
    if ((str[len] != 0) ||
2822
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2823
0
        return(NULL);
2824
2825
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2826
0
}
2827
2828
/**
2829
 * xmlStringDecodeEntities:
2830
 * @ctxt:  the parser context
2831
 * @str:  the input string
2832
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2833
 * @end:  an end marker xmlChar, 0 if none
2834
 * @end2:  an end marker xmlChar, 0 if none
2835
 * @end3:  an end marker xmlChar, 0 if none
2836
 *
2837
 * DEPRECATED: Internal function, don't use.
2838
 *
2839
 * Returns A newly allocated string with the substitution done. The caller
2840
 *      must deallocate it !
2841
 */
2842
xmlChar *
2843
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2844
                        int what ATTRIBUTE_UNUSED,
2845
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2846
0
    if ((ctxt == NULL) || (str == NULL))
2847
0
        return(NULL);
2848
2849
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2850
0
        return(NULL);
2851
2852
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2853
0
}
2854
2855
/************************************************************************
2856
 *                  *
2857
 *    Commodity functions, cleanup needed ?     *
2858
 *                  *
2859
 ************************************************************************/
2860
2861
/**
2862
 * areBlanks:
2863
 * @ctxt:  an XML parser context
2864
 * @str:  a xmlChar *
2865
 * @len:  the size of @str
2866
 * @blank_chars: we know the chars are blanks
2867
 *
2868
 * Is this a sequence of blank chars that one can ignore ?
2869
 *
2870
 * Returns 1 if ignorable 0 otherwise.
2871
 */
2872
2873
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2874
0
                     int blank_chars) {
2875
0
    int i;
2876
0
    xmlNodePtr lastChild;
2877
2878
    /*
2879
     * Don't spend time trying to differentiate them, the same callback is
2880
     * used !
2881
     */
2882
0
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2883
0
  return(0);
2884
2885
    /*
2886
     * Check for xml:space value.
2887
     */
2888
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2889
0
        (*(ctxt->space) == -2))
2890
0
  return(0);
2891
2892
    /*
2893
     * Check that the string is made of blanks
2894
     */
2895
0
    if (blank_chars == 0) {
2896
0
  for (i = 0;i < len;i++)
2897
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2898
0
    }
2899
2900
    /*
2901
     * Look if the element is mixed content in the DTD if available
2902
     */
2903
0
    if (ctxt->node == NULL) return(0);
2904
0
    if (ctxt->myDoc != NULL) {
2905
0
        xmlElementPtr elemDecl = NULL;
2906
0
        xmlDocPtr doc = ctxt->myDoc;
2907
0
        const xmlChar *prefix = NULL;
2908
2909
0
        if (ctxt->node->ns)
2910
0
            prefix = ctxt->node->ns->prefix;
2911
0
        if (doc->intSubset != NULL)
2912
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2913
0
                                      prefix);
2914
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2915
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2916
0
                                      prefix);
2917
0
        if (elemDecl != NULL) {
2918
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2919
0
                return(1);
2920
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2921
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2922
0
                return(0);
2923
0
        }
2924
0
    }
2925
2926
    /*
2927
     * Otherwise, heuristic :-\
2928
     */
2929
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2930
0
    if ((ctxt->node->children == NULL) &&
2931
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2932
2933
0
    lastChild = xmlGetLastChild(ctxt->node);
2934
0
    if (lastChild == NULL) {
2935
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2936
0
            (ctxt->node->content != NULL)) return(0);
2937
0
    } else if (xmlNodeIsText(lastChild))
2938
0
        return(0);
2939
0
    else if ((ctxt->node->children != NULL) &&
2940
0
             (xmlNodeIsText(ctxt->node->children)))
2941
0
        return(0);
2942
0
    return(1);
2943
0
}
2944
2945
/************************************************************************
2946
 *                  *
2947
 *    Extra stuff for namespace support     *
2948
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2949
 *                  *
2950
 ************************************************************************/
2951
2952
/**
2953
 * xmlSplitQName:
2954
 * @ctxt:  an XML parser context
2955
 * @name:  an XML parser context
2956
 * @prefixOut:  a xmlChar **
2957
 *
2958
 * parse an UTF8 encoded XML qualified name string
2959
 *
2960
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2961
 *
2962
 * [NS 6] Prefix ::= NCName
2963
 *
2964
 * [NS 7] LocalPart ::= NCName
2965
 *
2966
 * Returns the local part, and prefix is updated
2967
 *   to get the Prefix if any.
2968
 */
2969
2970
xmlChar *
2971
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2972
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2973
0
    xmlChar *buffer = NULL;
2974
0
    int len = 0;
2975
0
    int max = XML_MAX_NAMELEN;
2976
0
    xmlChar *ret = NULL;
2977
0
    xmlChar *prefix;
2978
0
    const xmlChar *cur = name;
2979
0
    int c;
2980
2981
0
    if (prefixOut == NULL) return(NULL);
2982
0
    *prefixOut = NULL;
2983
2984
0
    if (cur == NULL) return(NULL);
2985
2986
    /* nasty but well=formed */
2987
0
    if (cur[0] == ':')
2988
0
  return(xmlStrdup(name));
2989
2990
0
    c = *cur++;
2991
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2992
0
  buf[len++] = c;
2993
0
  c = *cur++;
2994
0
    }
2995
0
    if (len >= max) {
2996
  /*
2997
   * Okay someone managed to make a huge name, so he's ready to pay
2998
   * for the processing speed.
2999
   */
3000
0
  max = len * 2;
3001
3002
0
  buffer = (xmlChar *) xmlMallocAtomic(max);
3003
0
  if (buffer == NULL) {
3004
0
      xmlErrMemory(ctxt);
3005
0
      return(NULL);
3006
0
  }
3007
0
  memcpy(buffer, buf, len);
3008
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3009
0
      if (len + 10 > max) {
3010
0
          xmlChar *tmp;
3011
3012
0
    max *= 2;
3013
0
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3014
0
    if (tmp == NULL) {
3015
0
        xmlFree(buffer);
3016
0
        xmlErrMemory(ctxt);
3017
0
        return(NULL);
3018
0
    }
3019
0
    buffer = tmp;
3020
0
      }
3021
0
      buffer[len++] = c;
3022
0
      c = *cur++;
3023
0
  }
3024
0
  buffer[len] = 0;
3025
0
    }
3026
3027
0
    if ((c == ':') && (*cur == 0)) {
3028
0
        if (buffer != NULL)
3029
0
      xmlFree(buffer);
3030
0
  return(xmlStrdup(name));
3031
0
    }
3032
3033
0
    if (buffer == NULL) {
3034
0
  ret = xmlStrndup(buf, len);
3035
0
        if (ret == NULL) {
3036
0
      xmlErrMemory(ctxt);
3037
0
      return(NULL);
3038
0
        }
3039
0
    } else {
3040
0
  ret = buffer;
3041
0
  buffer = NULL;
3042
0
  max = XML_MAX_NAMELEN;
3043
0
    }
3044
3045
3046
0
    if (c == ':') {
3047
0
  c = *cur;
3048
0
        prefix = ret;
3049
0
  if (c == 0) {
3050
0
      ret = xmlStrndup(BAD_CAST "", 0);
3051
0
            if (ret == NULL) {
3052
0
                xmlFree(prefix);
3053
0
                return(NULL);
3054
0
            }
3055
0
            *prefixOut = prefix;
3056
0
            return(ret);
3057
0
  }
3058
0
  len = 0;
3059
3060
  /*
3061
   * Check that the first character is proper to start
3062
   * a new name
3063
   */
3064
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3065
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3066
0
        (c == '_') || (c == ':'))) {
3067
0
      int l;
3068
0
      int first = CUR_SCHAR(cur, l);
3069
3070
0
      if (!IS_LETTER(first) && (first != '_')) {
3071
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3072
0
          "Name %s is not XML Namespace compliant\n",
3073
0
          name);
3074
0
      }
3075
0
  }
3076
0
  cur++;
3077
3078
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3079
0
      buf[len++] = c;
3080
0
      c = *cur++;
3081
0
  }
3082
0
  if (len >= max) {
3083
      /*
3084
       * Okay someone managed to make a huge name, so he's ready to pay
3085
       * for the processing speed.
3086
       */
3087
0
      max = len * 2;
3088
3089
0
      buffer = (xmlChar *) xmlMallocAtomic(max);
3090
0
      if (buffer == NULL) {
3091
0
          xmlErrMemory(ctxt);
3092
0
                xmlFree(prefix);
3093
0
    return(NULL);
3094
0
      }
3095
0
      memcpy(buffer, buf, len);
3096
0
      while (c != 0) { /* tested bigname2.xml */
3097
0
    if (len + 10 > max) {
3098
0
        xmlChar *tmp;
3099
3100
0
        max *= 2;
3101
0
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3102
0
        if (tmp == NULL) {
3103
0
      xmlErrMemory(ctxt);
3104
0
                        xmlFree(prefix);
3105
0
      xmlFree(buffer);
3106
0
      return(NULL);
3107
0
        }
3108
0
        buffer = tmp;
3109
0
    }
3110
0
    buffer[len++] = c;
3111
0
    c = *cur++;
3112
0
      }
3113
0
      buffer[len] = 0;
3114
0
  }
3115
3116
0
  if (buffer == NULL) {
3117
0
      ret = xmlStrndup(buf, len);
3118
0
            if (ret == NULL) {
3119
0
                xmlFree(prefix);
3120
0
                return(NULL);
3121
0
            }
3122
0
  } else {
3123
0
      ret = buffer;
3124
0
  }
3125
3126
0
        *prefixOut = prefix;
3127
0
    }
3128
3129
0
    return(ret);
3130
0
}
3131
3132
/************************************************************************
3133
 *                  *
3134
 *      The parser itself       *
3135
 *  Relates to http://www.w3.org/TR/REC-xml       *
3136
 *                  *
3137
 ************************************************************************/
3138
3139
/************************************************************************
3140
 *                  *
3141
 *  Routines to parse Name, NCName and NmToken      *
3142
 *                  *
3143
 ************************************************************************/
3144
3145
/*
3146
 * The two following functions are related to the change of accepted
3147
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3148
 * They correspond to the modified production [4] and the new production [4a]
3149
 * changes in that revision. Also note that the macros used for the
3150
 * productions Letter, Digit, CombiningChar and Extender are not needed
3151
 * anymore.
3152
 * We still keep compatibility to pre-revision5 parsing semantic if the
3153
 * new XML_PARSE_OLD10 option is given to the parser.
3154
 */
3155
static int
3156
0
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3157
0
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3158
        /*
3159
   * Use the new checks of production [4] [4a] amd [5] of the
3160
   * Update 5 of XML-1.0
3161
   */
3162
0
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3163
0
      (((c >= 'a') && (c <= 'z')) ||
3164
0
       ((c >= 'A') && (c <= 'Z')) ||
3165
0
       (c == '_') || (c == ':') ||
3166
0
       ((c >= 0xC0) && (c <= 0xD6)) ||
3167
0
       ((c >= 0xD8) && (c <= 0xF6)) ||
3168
0
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3169
0
       ((c >= 0x370) && (c <= 0x37D)) ||
3170
0
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3171
0
       ((c >= 0x200C) && (c <= 0x200D)) ||
3172
0
       ((c >= 0x2070) && (c <= 0x218F)) ||
3173
0
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3174
0
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3175
0
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3176
0
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3177
0
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3178
0
      return(1);
3179
0
    } else {
3180
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3181
0
      return(1);
3182
0
    }
3183
0
    return(0);
3184
0
}
3185
3186
static int
3187
0
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3188
0
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3189
        /*
3190
   * Use the new checks of production [4] [4a] amd [5] of the
3191
   * Update 5 of XML-1.0
3192
   */
3193
0
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3194
0
      (((c >= 'a') && (c <= 'z')) ||
3195
0
       ((c >= 'A') && (c <= 'Z')) ||
3196
0
       ((c >= '0') && (c <= '9')) || /* !start */
3197
0
       (c == '_') || (c == ':') ||
3198
0
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3199
0
       ((c >= 0xC0) && (c <= 0xD6)) ||
3200
0
       ((c >= 0xD8) && (c <= 0xF6)) ||
3201
0
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3202
0
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3203
0
       ((c >= 0x370) && (c <= 0x37D)) ||
3204
0
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3205
0
       ((c >= 0x200C) && (c <= 0x200D)) ||
3206
0
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3207
0
       ((c >= 0x2070) && (c <= 0x218F)) ||
3208
0
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3209
0
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3210
0
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3211
0
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3212
0
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3213
0
       return(1);
3214
0
    } else {
3215
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3216
0
            (c == '.') || (c == '-') ||
3217
0
      (c == '_') || (c == ':') ||
3218
0
      (IS_COMBINING(c)) ||
3219
0
      (IS_EXTENDER(c)))
3220
0
      return(1);
3221
0
    }
3222
0
    return(0);
3223
0
}
3224
3225
static const xmlChar *
3226
0
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3227
0
    const xmlChar *ret;
3228
0
    int len = 0, l;
3229
0
    int c;
3230
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3231
0
                    XML_MAX_TEXT_LENGTH :
3232
0
                    XML_MAX_NAME_LENGTH;
3233
3234
    /*
3235
     * Handler for more complex cases
3236
     */
3237
0
    c = CUR_CHAR(l);
3238
0
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3239
        /*
3240
   * Use the new checks of production [4] [4a] amd [5] of the
3241
   * Update 5 of XML-1.0
3242
   */
3243
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3244
0
      (!(((c >= 'a') && (c <= 'z')) ||
3245
0
         ((c >= 'A') && (c <= 'Z')) ||
3246
0
         (c == '_') || (c == ':') ||
3247
0
         ((c >= 0xC0) && (c <= 0xD6)) ||
3248
0
         ((c >= 0xD8) && (c <= 0xF6)) ||
3249
0
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3250
0
         ((c >= 0x370) && (c <= 0x37D)) ||
3251
0
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3252
0
         ((c >= 0x200C) && (c <= 0x200D)) ||
3253
0
         ((c >= 0x2070) && (c <= 0x218F)) ||
3254
0
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3255
0
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3256
0
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3257
0
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3258
0
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3259
0
      return(NULL);
3260
0
  }
3261
0
  len += l;
3262
0
  NEXTL(l);
3263
0
  c = CUR_CHAR(l);
3264
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3265
0
         (((c >= 'a') && (c <= 'z')) ||
3266
0
          ((c >= 'A') && (c <= 'Z')) ||
3267
0
          ((c >= '0') && (c <= '9')) || /* !start */
3268
0
          (c == '_') || (c == ':') ||
3269
0
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3270
0
          ((c >= 0xC0) && (c <= 0xD6)) ||
3271
0
          ((c >= 0xD8) && (c <= 0xF6)) ||
3272
0
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3273
0
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3274
0
          ((c >= 0x370) && (c <= 0x37D)) ||
3275
0
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3276
0
          ((c >= 0x200C) && (c <= 0x200D)) ||
3277
0
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3278
0
          ((c >= 0x2070) && (c <= 0x218F)) ||
3279
0
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3280
0
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3281
0
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3282
0
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3283
0
          ((c >= 0x10000) && (c <= 0xEFFFF))
3284
0
    )) {
3285
0
            if (len <= INT_MAX - l)
3286
0
          len += l;
3287
0
      NEXTL(l);
3288
0
      c = CUR_CHAR(l);
3289
0
  }
3290
0
    } else {
3291
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3292
0
      (!IS_LETTER(c) && (c != '_') &&
3293
0
       (c != ':'))) {
3294
0
      return(NULL);
3295
0
  }
3296
0
  len += l;
3297
0
  NEXTL(l);
3298
0
  c = CUR_CHAR(l);
3299
3300
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3301
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3302
0
    (c == '.') || (c == '-') ||
3303
0
    (c == '_') || (c == ':') ||
3304
0
    (IS_COMBINING(c)) ||
3305
0
    (IS_EXTENDER(c)))) {
3306
0
            if (len <= INT_MAX - l)
3307
0
          len += l;
3308
0
      NEXTL(l);
3309
0
      c = CUR_CHAR(l);
3310
0
  }
3311
0
    }
3312
0
    if (len > maxLength) {
3313
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3314
0
        return(NULL);
3315
0
    }
3316
0
    if (ctxt->input->cur - ctxt->input->base < len) {
3317
        /*
3318
         * There were a couple of bugs where PERefs lead to to a change
3319
         * of the buffer. Check the buffer size to avoid passing an invalid
3320
         * pointer to xmlDictLookup.
3321
         */
3322
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3323
0
                    "unexpected change of input buffer");
3324
0
        return (NULL);
3325
0
    }
3326
0
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3327
0
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3328
0
    else
3329
0
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3330
0
    if (ret == NULL)
3331
0
        xmlErrMemory(ctxt);
3332
0
    return(ret);
3333
0
}
3334
3335
/**
3336
 * xmlParseName:
3337
 * @ctxt:  an XML parser context
3338
 *
3339
 * DEPRECATED: Internal function, don't use.
3340
 *
3341
 * parse an XML name.
3342
 *
3343
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3344
 *                  CombiningChar | Extender
3345
 *
3346
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3347
 *
3348
 * [6] Names ::= Name (#x20 Name)*
3349
 *
3350
 * Returns the Name parsed or NULL
3351
 */
3352
3353
const xmlChar *
3354
0
xmlParseName(xmlParserCtxtPtr ctxt) {
3355
0
    const xmlChar *in;
3356
0
    const xmlChar *ret;
3357
0
    size_t count = 0;
3358
0
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3359
0
                       XML_MAX_TEXT_LENGTH :
3360
0
                       XML_MAX_NAME_LENGTH;
3361
3362
0
    GROW;
3363
3364
    /*
3365
     * Accelerator for simple ASCII names
3366
     */
3367
0
    in = ctxt->input->cur;
3368
0
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3369
0
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3370
0
  (*in == '_') || (*in == ':')) {
3371
0
  in++;
3372
0
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3373
0
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3374
0
         ((*in >= 0x30) && (*in <= 0x39)) ||
3375
0
         (*in == '_') || (*in == '-') ||
3376
0
         (*in == ':') || (*in == '.'))
3377
0
      in++;
3378
0
  if ((*in > 0) && (*in < 0x80)) {
3379
0
      count = in - ctxt->input->cur;
3380
0
            if (count > maxLength) {
3381
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3382
0
                return(NULL);
3383
0
            }
3384
0
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3385
0
      ctxt->input->cur = in;
3386
0
      ctxt->input->col += count;
3387
0
      if (ret == NULL)
3388
0
          xmlErrMemory(ctxt);
3389
0
      return(ret);
3390
0
  }
3391
0
    }
3392
    /* accelerator for special cases */
3393
0
    return(xmlParseNameComplex(ctxt));
3394
0
}
3395
3396
static xmlHashedString
3397
0
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3398
0
    xmlHashedString ret;
3399
0
    int len = 0, l;
3400
0
    int c;
3401
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3402
0
                    XML_MAX_TEXT_LENGTH :
3403
0
                    XML_MAX_NAME_LENGTH;
3404
0
    size_t startPosition = 0;
3405
3406
0
    ret.name = NULL;
3407
0
    ret.hashValue = 0;
3408
3409
    /*
3410
     * Handler for more complex cases
3411
     */
3412
0
    startPosition = CUR_PTR - BASE_PTR;
3413
0
    c = CUR_CHAR(l);
3414
0
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3415
0
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3416
0
  return(ret);
3417
0
    }
3418
3419
0
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3420
0
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3421
0
        if (len <= INT_MAX - l)
3422
0
      len += l;
3423
0
  NEXTL(l);
3424
0
  c = CUR_CHAR(l);
3425
0
    }
3426
0
    if (len > maxLength) {
3427
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3428
0
        return(ret);
3429
0
    }
3430
0
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3431
0
    if (ret.name == NULL)
3432
0
        xmlErrMemory(ctxt);
3433
0
    return(ret);
3434
0
}
3435
3436
/**
3437
 * xmlParseNCName:
3438
 * @ctxt:  an XML parser context
3439
 * @len:  length of the string parsed
3440
 *
3441
 * parse an XML name.
3442
 *
3443
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3444
 *                      CombiningChar | Extender
3445
 *
3446
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3447
 *
3448
 * Returns the Name parsed or NULL
3449
 */
3450
3451
static xmlHashedString
3452
0
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3453
0
    const xmlChar *in, *e;
3454
0
    xmlHashedString ret;
3455
0
    size_t count = 0;
3456
0
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3457
0
                       XML_MAX_TEXT_LENGTH :
3458
0
                       XML_MAX_NAME_LENGTH;
3459
3460
0
    ret.name = NULL;
3461
3462
    /*
3463
     * Accelerator for simple ASCII names
3464
     */
3465
0
    in = ctxt->input->cur;
3466
0
    e = ctxt->input->end;
3467
0
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3468
0
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3469
0
   (*in == '_')) && (in < e)) {
3470
0
  in++;
3471
0
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3472
0
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3473
0
          ((*in >= 0x30) && (*in <= 0x39)) ||
3474
0
          (*in == '_') || (*in == '-') ||
3475
0
          (*in == '.')) && (in < e))
3476
0
      in++;
3477
0
  if (in >= e)
3478
0
      goto complex;
3479
0
  if ((*in > 0) && (*in < 0x80)) {
3480
0
      count = in - ctxt->input->cur;
3481
0
            if (count > maxLength) {
3482
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3483
0
                return(ret);
3484
0
            }
3485
0
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3486
0
      ctxt->input->cur = in;
3487
0
      ctxt->input->col += count;
3488
0
      if (ret.name == NULL) {
3489
0
          xmlErrMemory(ctxt);
3490
0
      }
3491
0
      return(ret);
3492
0
  }
3493
0
    }
3494
0
complex:
3495
0
    return(xmlParseNCNameComplex(ctxt));
3496
0
}
3497
3498
/**
3499
 * xmlParseNameAndCompare:
3500
 * @ctxt:  an XML parser context
3501
 *
3502
 * parse an XML name and compares for match
3503
 * (specialized for endtag parsing)
3504
 *
3505
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3506
 * and the name for mismatch
3507
 */
3508
3509
static const xmlChar *
3510
0
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3511
0
    register const xmlChar *cmp = other;
3512
0
    register const xmlChar *in;
3513
0
    const xmlChar *ret;
3514
3515
0
    GROW;
3516
3517
0
    in = ctxt->input->cur;
3518
0
    while (*in != 0 && *in == *cmp) {
3519
0
  ++in;
3520
0
  ++cmp;
3521
0
    }
3522
0
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3523
  /* success */
3524
0
  ctxt->input->col += in - ctxt->input->cur;
3525
0
  ctxt->input->cur = in;
3526
0
  return (const xmlChar*) 1;
3527
0
    }
3528
    /* failure (or end of input buffer), check with full function */
3529
0
    ret = xmlParseName (ctxt);
3530
    /* strings coming from the dictionary direct compare possible */
3531
0
    if (ret == other) {
3532
0
  return (const xmlChar*) 1;
3533
0
    }
3534
0
    return ret;
3535
0
}
3536
3537
/**
3538
 * xmlParseStringName:
3539
 * @ctxt:  an XML parser context
3540
 * @str:  a pointer to the string pointer (IN/OUT)
3541
 *
3542
 * parse an XML name.
3543
 *
3544
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3545
 *                  CombiningChar | Extender
3546
 *
3547
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3548
 *
3549
 * [6] Names ::= Name (#x20 Name)*
3550
 *
3551
 * Returns the Name parsed or NULL. The @str pointer
3552
 * is updated to the current location in the string.
3553
 */
3554
3555
static xmlChar *
3556
0
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3557
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
3558
0
    xmlChar *ret;
3559
0
    const xmlChar *cur = *str;
3560
0
    int len = 0, l;
3561
0
    int c;
3562
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3563
0
                    XML_MAX_TEXT_LENGTH :
3564
0
                    XML_MAX_NAME_LENGTH;
3565
3566
0
    c = CUR_SCHAR(cur, l);
3567
0
    if (!xmlIsNameStartChar(ctxt, c)) {
3568
0
  return(NULL);
3569
0
    }
3570
3571
0
    COPY_BUF(buf, len, c);
3572
0
    cur += l;
3573
0
    c = CUR_SCHAR(cur, l);
3574
0
    while (xmlIsNameChar(ctxt, c)) {
3575
0
  COPY_BUF(buf, len, c);
3576
0
  cur += l;
3577
0
  c = CUR_SCHAR(cur, l);
3578
0
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3579
      /*
3580
       * Okay someone managed to make a huge name, so he's ready to pay
3581
       * for the processing speed.
3582
       */
3583
0
      xmlChar *buffer;
3584
0
      int max = len * 2;
3585
3586
0
      buffer = (xmlChar *) xmlMallocAtomic(max);
3587
0
      if (buffer == NULL) {
3588
0
          xmlErrMemory(ctxt);
3589
0
    return(NULL);
3590
0
      }
3591
0
      memcpy(buffer, buf, len);
3592
0
      while (xmlIsNameChar(ctxt, c)) {
3593
0
    if (len + 10 > max) {
3594
0
        xmlChar *tmp;
3595
3596
0
        max *= 2;
3597
0
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3598
0
        if (tmp == NULL) {
3599
0
      xmlErrMemory(ctxt);
3600
0
      xmlFree(buffer);
3601
0
      return(NULL);
3602
0
        }
3603
0
        buffer = tmp;
3604
0
    }
3605
0
    COPY_BUF(buffer, len, c);
3606
0
    cur += l;
3607
0
    c = CUR_SCHAR(cur, l);
3608
0
                if (len > maxLength) {
3609
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3610
0
                    xmlFree(buffer);
3611
0
                    return(NULL);
3612
0
                }
3613
0
      }
3614
0
      buffer[len] = 0;
3615
0
      *str = cur;
3616
0
      return(buffer);
3617
0
  }
3618
0
    }
3619
0
    if (len > maxLength) {
3620
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3621
0
        return(NULL);
3622
0
    }
3623
0
    *str = cur;
3624
0
    ret = xmlStrndup(buf, len);
3625
0
    if (ret == NULL)
3626
0
        xmlErrMemory(ctxt);
3627
0
    return(ret);
3628
0
}
3629
3630
/**
3631
 * xmlParseNmtoken:
3632
 * @ctxt:  an XML parser context
3633
 *
3634
 * DEPRECATED: Internal function, don't use.
3635
 *
3636
 * parse an XML Nmtoken.
3637
 *
3638
 * [7] Nmtoken ::= (NameChar)+
3639
 *
3640
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3641
 *
3642
 * Returns the Nmtoken parsed or NULL
3643
 */
3644
3645
xmlChar *
3646
0
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3647
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
3648
0
    xmlChar *ret;
3649
0
    int len = 0, l;
3650
0
    int c;
3651
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3652
0
                    XML_MAX_TEXT_LENGTH :
3653
0
                    XML_MAX_NAME_LENGTH;
3654
3655
0
    c = CUR_CHAR(l);
3656
3657
0
    while (xmlIsNameChar(ctxt, c)) {
3658
0
  COPY_BUF(buf, len, c);
3659
0
  NEXTL(l);
3660
0
  c = CUR_CHAR(l);
3661
0
  if (len >= XML_MAX_NAMELEN) {
3662
      /*
3663
       * Okay someone managed to make a huge token, so he's ready to pay
3664
       * for the processing speed.
3665
       */
3666
0
      xmlChar *buffer;
3667
0
      int max = len * 2;
3668
3669
0
      buffer = (xmlChar *) xmlMallocAtomic(max);
3670
0
      if (buffer == NULL) {
3671
0
          xmlErrMemory(ctxt);
3672
0
    return(NULL);
3673
0
      }
3674
0
      memcpy(buffer, buf, len);
3675
0
      while (xmlIsNameChar(ctxt, c)) {
3676
0
    if (len + 10 > max) {
3677
0
        xmlChar *tmp;
3678
3679
0
        max *= 2;
3680
0
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3681
0
        if (tmp == NULL) {
3682
0
      xmlErrMemory(ctxt);
3683
0
      xmlFree(buffer);
3684
0
      return(NULL);
3685
0
        }
3686
0
        buffer = tmp;
3687
0
    }
3688
0
    COPY_BUF(buffer, len, c);
3689
0
                if (len > maxLength) {
3690
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3691
0
                    xmlFree(buffer);
3692
0
                    return(NULL);
3693
0
                }
3694
0
    NEXTL(l);
3695
0
    c = CUR_CHAR(l);
3696
0
      }
3697
0
      buffer[len] = 0;
3698
0
      return(buffer);
3699
0
  }
3700
0
    }
3701
0
    if (len == 0)
3702
0
        return(NULL);
3703
0
    if (len > maxLength) {
3704
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3705
0
        return(NULL);
3706
0
    }
3707
0
    ret = xmlStrndup(buf, len);
3708
0
    if (ret == NULL)
3709
0
        xmlErrMemory(ctxt);
3710
0
    return(ret);
3711
0
}
3712
3713
/**
3714
 * xmlExpandPEsInEntityValue:
3715
 * @ctxt:  parser context
3716
 * @buf:  string buffer
3717
 * @str:  entity value
3718
 * @length:  size of entity value
3719
 * @depth:  nesting depth
3720
 *
3721
 * Validate an entity value and expand parameter entities.
3722
 */
3723
static void
3724
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3725
0
                          const xmlChar *str, int length, int depth) {
3726
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3727
0
    const xmlChar *end, *chunk;
3728
0
    int c, l;
3729
3730
0
    if (str == NULL)
3731
0
        return;
3732
3733
0
    depth += 1;
3734
0
    if (depth > maxDepth) {
3735
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3736
0
                       "Maximum entity nesting depth exceeded");
3737
0
  return;
3738
0
    }
3739
3740
0
    end = str + length;
3741
0
    chunk = str;
3742
3743
0
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3744
0
        c = *str;
3745
3746
0
        if (c >= 0x80) {
3747
0
            l = xmlUTF8MultibyteLen(ctxt, str,
3748
0
                    "invalid character in entity value\n");
3749
0
            if (l == 0) {
3750
0
                if (chunk < str)
3751
0
                    xmlSBufAddString(buf, chunk, str - chunk);
3752
0
                xmlSBufAddReplChar(buf);
3753
0
                str += 1;
3754
0
                chunk = str;
3755
0
            } else {
3756
0
                str += l;
3757
0
            }
3758
0
        } else if (c == '&') {
3759
0
            if (str[1] == '#') {
3760
0
                if (chunk < str)
3761
0
                    xmlSBufAddString(buf, chunk, str - chunk);
3762
3763
0
                c = xmlParseStringCharRef(ctxt, &str);
3764
0
                if (c == 0)
3765
0
                    return;
3766
3767
0
                xmlSBufAddChar(buf, c);
3768
3769
0
                chunk = str;
3770
0
            } else {
3771
0
                xmlChar *name;
3772
3773
                /*
3774
                 * General entity references are checked for
3775
                 * syntactic validity.
3776
                 */
3777
0
                str++;
3778
0
                name = xmlParseStringName(ctxt, &str);
3779
3780
0
                if ((name == NULL) || (*str++ != ';')) {
3781
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3782
0
                            "EntityValue: '&' forbidden except for entities "
3783
0
                            "references\n");
3784
0
                    xmlFree(name);
3785
0
                    return;
3786
0
                }
3787
3788
0
                xmlFree(name);
3789
0
            }
3790
0
        } else if (c == '%') {
3791
0
            xmlEntityPtr ent;
3792
3793
0
            if (chunk < str)
3794
0
                xmlSBufAddString(buf, chunk, str - chunk);
3795
3796
0
            ent = xmlParseStringPEReference(ctxt, &str);
3797
0
            if (ent == NULL)
3798
0
                return;
3799
3800
0
            if (!PARSER_EXTERNAL(ctxt)) {
3801
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3802
0
                return;
3803
0
            }
3804
3805
0
            if (ent->content == NULL) {
3806
                /*
3807
                 * Note: external parsed entities will not be loaded,
3808
                 * it is not required for a non-validating parser to
3809
                 * complete external PEReferences coming from the
3810
                 * internal subset
3811
                 */
3812
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3813
0
                    ((ctxt->replaceEntities) ||
3814
0
                     (ctxt->validate))) {
3815
0
                    xmlLoadEntityContent(ctxt, ent);
3816
0
                } else {
3817
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3818
0
                                  "not validating will not read content for "
3819
0
                                  "PE entity %s\n", ent->name, NULL);
3820
0
                }
3821
0
            }
3822
3823
            /*
3824
             * TODO: Skip if ent->content is still NULL.
3825
             */
3826
3827
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3828
0
                return;
3829
3830
0
            if (ent->flags & XML_ENT_EXPANDING) {
3831
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3832
0
                xmlHaltParser(ctxt);
3833
0
                return;
3834
0
            }
3835
3836
0
            ent->flags |= XML_ENT_EXPANDING;
3837
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3838
0
                                      depth);
3839
0
            ent->flags &= ~XML_ENT_EXPANDING;
3840
3841
0
            chunk = str;
3842
0
        } else {
3843
            /* Normal ASCII char */
3844
0
            if (!IS_BYTE_CHAR(c)) {
3845
0
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3846
0
                        "invalid character in entity value\n");
3847
0
                if (chunk < str)
3848
0
                    xmlSBufAddString(buf, chunk, str - chunk);
3849
0
                xmlSBufAddReplChar(buf);
3850
0
                str += 1;
3851
0
                chunk = str;
3852
0
            } else {
3853
0
                str += 1;
3854
0
            }
3855
0
        }
3856
0
    }
3857
3858
0
    if (chunk < str)
3859
0
        xmlSBufAddString(buf, chunk, str - chunk);
3860
3861
0
    return;
3862
0
}
3863
3864
/**
3865
 * xmlParseEntityValue:
3866
 * @ctxt:  an XML parser context
3867
 * @orig:  if non-NULL store a copy of the original entity value
3868
 *
3869
 * DEPRECATED: Internal function, don't use.
3870
 *
3871
 * parse a value for ENTITY declarations
3872
 *
3873
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3874
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3875
 *
3876
 * Returns the EntityValue parsed with reference substituted or NULL
3877
 */
3878
xmlChar *
3879
0
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3880
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3881
0
                         XML_MAX_HUGE_LENGTH :
3882
0
                         XML_MAX_TEXT_LENGTH;
3883
0
    xmlSBuf buf;
3884
0
    const xmlChar *start;
3885
0
    int quote, length;
3886
3887
0
    xmlSBufInit(&buf, maxLength);
3888
3889
0
    GROW;
3890
3891
0
    quote = CUR;
3892
0
    if ((quote != '"') && (quote != '\'')) {
3893
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3894
0
  return(NULL);
3895
0
    }
3896
0
    CUR_PTR++;
3897
3898
0
    length = 0;
3899
3900
    /*
3901
     * Copy raw content of the entity into a buffer
3902
     */
3903
0
    while (1) {
3904
0
        int c;
3905
3906
0
        if (PARSER_STOPPED(ctxt))
3907
0
            goto error;
3908
3909
0
        if (CUR_PTR >= ctxt->input->end) {
3910
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3911
0
            goto error;
3912
0
        }
3913
3914
0
        c = CUR;
3915
3916
0
        if (c == 0) {
3917
0
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3918
0
                    "invalid character in entity value\n");
3919
0
            goto error;
3920
0
        }
3921
0
        if (c == quote)
3922
0
            break;
3923
0
        NEXTL(1);
3924
0
        length += 1;
3925
3926
        /*
3927
         * TODO: Check growth threshold
3928
         */
3929
0
        if (ctxt->input->end - CUR_PTR < 10)
3930
0
            GROW;
3931
0
    }
3932
3933
0
    start = CUR_PTR - length;
3934
3935
0
    if (orig != NULL) {
3936
0
        *orig = xmlStrndup(start, length);
3937
0
        if (*orig == NULL)
3938
0
            xmlErrMemory(ctxt);
3939
0
    }
3940
3941
0
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3942
3943
0
    NEXTL(1);
3944
3945
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3946
3947
0
error:
3948
0
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3949
0
    return(NULL);
3950
0
}
3951
3952
/**
3953
 * xmlCheckEntityInAttValue:
3954
 * @ctxt:  parser context
3955
 * @pent:  entity
3956
 * @depth:  nesting depth
3957
 *
3958
 * Check an entity reference in an attribute value for validity
3959
 * without expanding it.
3960
 */
3961
static void
3962
0
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3963
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3964
0
    const xmlChar *str;
3965
0
    unsigned long expandedSize = pent->length;
3966
0
    int c, flags;
3967
3968
0
    depth += 1;
3969
0
    if (depth > maxDepth) {
3970
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3971
0
                       "Maximum entity nesting depth exceeded");
3972
0
  return;
3973
0
    }
3974
3975
0
    if (pent->flags & XML_ENT_EXPANDING) {
3976
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3977
0
        xmlHaltParser(ctxt);
3978
0
        return;
3979
0
    }
3980
3981
    /*
3982
     * If we're parsing a default attribute value in DTD content,
3983
     * the entity might reference other entities which weren't
3984
     * defined yet, so the check isn't reliable.
3985
     */
3986
0
    if (ctxt->inSubset == 0)
3987
0
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3988
0
    else
3989
0
        flags = XML_ENT_VALIDATED;
3990
3991
0
    str = pent->content;
3992
0
    if (str == NULL)
3993
0
        goto done;
3994
3995
    /*
3996
     * Note that entity values are already validated. We only check
3997
     * for illegal less-than signs and compute the expanded size
3998
     * of the entity. No special handling for multi-byte characters
3999
     * is needed.
4000
     */
4001
0
    while (!PARSER_STOPPED(ctxt)) {
4002
0
        c = *str;
4003
4004
0
  if (c != '&') {
4005
0
            if (c == 0)
4006
0
                break;
4007
4008
0
            if (c == '<')
4009
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4010
0
                        "'<' in entity '%s' is not allowed in attributes "
4011
0
                        "values\n", pent->name);
4012
4013
0
            str += 1;
4014
0
        } else if (str[1] == '#') {
4015
0
            int val;
4016
4017
0
      val = xmlParseStringCharRef(ctxt, &str);
4018
0
      if (val == 0) {
4019
0
                pent->content[0] = 0;
4020
0
                break;
4021
0
            }
4022
0
  } else {
4023
0
            xmlChar *name;
4024
0
            xmlEntityPtr ent;
4025
4026
0
      name = xmlParseStringEntityRef(ctxt, &str);
4027
0
      if (name == NULL) {
4028
0
                pent->content[0] = 0;
4029
0
                break;
4030
0
            }
4031
4032
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4033
0
            xmlFree(name);
4034
4035
0
            if ((ent != NULL) &&
4036
0
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4037
0
                if ((ent->flags & flags) != flags) {
4038
0
                    pent->flags |= XML_ENT_EXPANDING;
4039
0
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4040
0
                    pent->flags &= ~XML_ENT_EXPANDING;
4041
0
                }
4042
4043
0
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4044
0
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4045
0
            }
4046
0
        }
4047
0
    }
4048
4049
0
done:
4050
0
    if (ctxt->inSubset == 0)
4051
0
        pent->expandedSize = expandedSize;
4052
4053
0
    pent->flags |= flags;
4054
0
}
4055
4056
/**
4057
 * xmlExpandEntityInAttValue:
4058
 * @ctxt:  parser context
4059
 * @buf:  string buffer
4060
 * @str:  entity or attribute value
4061
 * @pent:  entity for entity value, NULL for attribute values
4062
 * @normalize:  whether to collapse whitespace
4063
 * @inSpace:  whitespace state
4064
 * @depth:  nesting depth
4065
 * @check:  whether to check for amplification
4066
 *
4067
 * Expand general entity references in an entity or attribute value.
4068
 * Perform attribute value normalization.
4069
 */
4070
static void
4071
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4072
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4073
0
                          int *inSpace, int depth, int check) {
4074
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4075
0
    int c, chunkSize;
4076
4077
0
    if (str == NULL)
4078
0
        return;
4079
4080
0
    depth += 1;
4081
0
    if (depth > maxDepth) {
4082
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4083
0
                       "Maximum entity nesting depth exceeded");
4084
0
  return;
4085
0
    }
4086
4087
0
    if (pent != NULL) {
4088
0
        if (pent->flags & XML_ENT_EXPANDING) {
4089
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4090
0
            xmlHaltParser(ctxt);
4091
0
            return;
4092
0
        }
4093
4094
0
        if (check) {
4095
0
            if (xmlParserEntityCheck(ctxt, pent->length))
4096
0
                return;
4097
0
        }
4098
0
    }
4099
4100
0
    chunkSize = 0;
4101
4102
    /*
4103
     * Note that entity values are already validated. No special
4104
     * handling for multi-byte characters is needed.
4105
     */
4106
0
    while (!PARSER_STOPPED(ctxt)) {
4107
0
        c = *str;
4108
4109
0
  if (c != '&') {
4110
0
            if (c == 0)
4111
0
                break;
4112
4113
            /*
4114
             * If this function is called without an entity, it is used to
4115
             * expand entities in an attribute content where less-than was
4116
             * already unscaped and is allowed.
4117
             */
4118
0
            if ((pent != NULL) && (c == '<')) {
4119
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4120
0
                        "'<' in entity '%s' is not allowed in attributes "
4121
0
                        "values\n", pent->name);
4122
0
                break;
4123
0
            }
4124
4125
0
            if (c <= 0x20) {
4126
0
                if ((normalize) && (*inSpace)) {
4127
                    /* Skip char */
4128
0
                    if (chunkSize > 0) {
4129
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4130
0
                        chunkSize = 0;
4131
0
                    }
4132
0
                } else if (c < 0x20) {
4133
0
                    if (chunkSize > 0) {
4134
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4135
0
                        chunkSize = 0;
4136
0
                    }
4137
4138
0
                    xmlSBufAddCString(buf, " ", 1);
4139
0
                } else {
4140
0
                    chunkSize += 1;
4141
0
                }
4142
4143
0
                *inSpace = 1;
4144
0
            } else {
4145
0
                chunkSize += 1;
4146
0
                *inSpace = 0;
4147
0
            }
4148
4149
0
            str += 1;
4150
0
        } else if (str[1] == '#') {
4151
0
            int val;
4152
4153
0
            if (chunkSize > 0) {
4154
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4155
0
                chunkSize = 0;
4156
0
            }
4157
4158
0
      val = xmlParseStringCharRef(ctxt, &str);
4159
0
      if (val == 0) {
4160
0
                if (pent != NULL)
4161
0
                    pent->content[0] = 0;
4162
0
                break;
4163
0
            }
4164
4165
0
            if (val == ' ') {
4166
0
                if ((!normalize) || (!*inSpace))
4167
0
                    xmlSBufAddCString(buf, " ", 1);
4168
0
                *inSpace = 1;
4169
0
            } else {
4170
0
                xmlSBufAddChar(buf, val);
4171
0
                *inSpace = 0;
4172
0
            }
4173
0
  } else {
4174
0
            xmlChar *name;
4175
0
            xmlEntityPtr ent;
4176
4177
0
            if (chunkSize > 0) {
4178
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4179
0
                chunkSize = 0;
4180
0
            }
4181
4182
0
      name = xmlParseStringEntityRef(ctxt, &str);
4183
0
            if (name == NULL) {
4184
0
                if (pent != NULL)
4185
0
                    pent->content[0] = 0;
4186
0
                break;
4187
0
            }
4188
4189
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4190
0
            xmlFree(name);
4191
4192
0
      if ((ent != NULL) &&
4193
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4194
0
    if (ent->content == NULL) {
4195
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4196
0
          "predefined entity has no content\n");
4197
0
                    break;
4198
0
                }
4199
4200
0
                xmlSBufAddString(buf, ent->content, ent->length);
4201
4202
0
                *inSpace = 0;
4203
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
4204
0
                if (pent != NULL)
4205
0
                    pent->flags |= XML_ENT_EXPANDING;
4206
0
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4207
0
                                          normalize, inSpace, depth, check);
4208
0
                if (pent != NULL)
4209
0
                    pent->flags &= ~XML_ENT_EXPANDING;
4210
0
      }
4211
0
        }
4212
0
    }
4213
4214
0
    if (chunkSize > 0)
4215
0
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4216
4217
0
    return;
4218
0
}
4219
4220
/**
4221
 * xmlExpandEntitiesInAttValue:
4222
 * @ctxt:  parser context
4223
 * @str:  entity or attribute value
4224
 * @normalize:  whether to collapse whitespace
4225
 *
4226
 * Expand general entity references in an entity or attribute value.
4227
 * Perform attribute value normalization.
4228
 *
4229
 * Returns the expanded attribtue value.
4230
 */
4231
xmlChar *
4232
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4233
0
                            int normalize) {
4234
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4235
0
                         XML_MAX_HUGE_LENGTH :
4236
0
                         XML_MAX_TEXT_LENGTH;
4237
0
    xmlSBuf buf;
4238
0
    int inSpace = 1;
4239
4240
0
    xmlSBufInit(&buf, maxLength);
4241
4242
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4243
0
                              ctxt->inputNr, /* check */ 0);
4244
4245
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4246
0
        buf.size--;
4247
4248
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4249
0
}
4250
4251
/**
4252
 * xmlParseAttValueInternal:
4253
 * @ctxt:  an XML parser context
4254
 * @len:  attribute len result
4255
 * @alloc:  whether the attribute was reallocated as a new string
4256
 * @normalize:  if 1 then further non-CDATA normalization must be done
4257
 *
4258
 * parse a value for an attribute.
4259
 * NOTE: if no normalization is needed, the routine will return pointers
4260
 *       directly from the data buffer.
4261
 *
4262
 * 3.3.3 Attribute-Value Normalization:
4263
 * Before the value of an attribute is passed to the application or
4264
 * checked for validity, the XML processor must normalize it as follows:
4265
 * - a character reference is processed by appending the referenced
4266
 *   character to the attribute value
4267
 * - an entity reference is processed by recursively processing the
4268
 *   replacement text of the entity
4269
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4270
 *   appending #x20 to the normalized value, except that only a single
4271
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4272
 *   parsed entity or the literal entity value of an internal parsed entity
4273
 * - other characters are processed by appending them to the normalized value
4274
 * If the declared value is not CDATA, then the XML processor must further
4275
 * process the normalized attribute value by discarding any leading and
4276
 * trailing space (#x20) characters, and by replacing sequences of space
4277
 * (#x20) characters by a single space (#x20) character.
4278
 * All attributes for which no declaration has been read should be treated
4279
 * by a non-validating parser as if declared CDATA.
4280
 *
4281
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4282
 *     caller if it was copied, this can be detected by val[*len] == 0.
4283
 */
4284
static xmlChar *
4285
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4286
0
                         int normalize, int isNamespace) {
4287
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4288
0
                         XML_MAX_HUGE_LENGTH :
4289
0
                         XML_MAX_TEXT_LENGTH;
4290
0
    xmlSBuf buf;
4291
0
    xmlChar *ret;
4292
0
    int c, l, quote, flags, chunkSize;
4293
0
    int inSpace = 1;
4294
0
    int replaceEntities;
4295
4296
    /* Always expand namespace URIs */
4297
0
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4298
4299
0
    xmlSBufInit(&buf, maxLength);
4300
4301
0
    GROW;
4302
4303
0
    quote = CUR;
4304
0
    if ((quote != '"') && (quote != '\'')) {
4305
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4306
0
  return(NULL);
4307
0
    }
4308
0
    NEXTL(1);
4309
4310
0
    if (ctxt->inSubset == 0)
4311
0
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4312
0
    else
4313
0
        flags = XML_ENT_VALIDATED;
4314
4315
0
    inSpace = 1;
4316
0
    chunkSize = 0;
4317
4318
0
    while (1) {
4319
0
        if (PARSER_STOPPED(ctxt))
4320
0
            goto error;
4321
4322
0
        if (CUR_PTR >= ctxt->input->end) {
4323
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4324
0
                           "AttValue: ' expected\n");
4325
0
            goto error;
4326
0
        }
4327
4328
        /*
4329
         * TODO: Check growth threshold
4330
         */
4331
0
        if (ctxt->input->end - CUR_PTR < 10)
4332
0
            GROW;
4333
4334
0
        c = CUR;
4335
4336
0
        if (c >= 0x80) {
4337
0
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4338
0
                    "invalid character in attribute value\n");
4339
0
            if (l == 0) {
4340
0
                if (chunkSize > 0) {
4341
0
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4342
0
                    chunkSize = 0;
4343
0
                }
4344
0
                xmlSBufAddReplChar(&buf);
4345
0
                NEXTL(1);
4346
0
            } else {
4347
0
                chunkSize += l;
4348
0
                NEXTL(l);
4349
0
            }
4350
4351
0
            inSpace = 0;
4352
0
        } else if (c != '&') {
4353
0
            if (c > 0x20) {
4354
0
                if (c == quote)
4355
0
                    break;
4356
4357
0
                if (c == '<')
4358
0
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4359
4360
0
                chunkSize += 1;
4361
0
                inSpace = 0;
4362
0
            } else if (!IS_BYTE_CHAR(c)) {
4363
0
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4364
0
                        "invalid character in attribute value\n");
4365
0
                if (chunkSize > 0) {
4366
0
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367
0
                    chunkSize = 0;
4368
0
                }
4369
0
                xmlSBufAddReplChar(&buf);
4370
0
                inSpace = 0;
4371
0
            } else {
4372
                /* Whitespace */
4373
0
                if ((normalize) && (inSpace)) {
4374
                    /* Skip char */
4375
0
                    if (chunkSize > 0) {
4376
0
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4377
0
                        chunkSize = 0;
4378
0
                    }
4379
0
                } else if (c < 0x20) {
4380
                    /* Convert to space */
4381
0
                    if (chunkSize > 0) {
4382
0
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4383
0
                        chunkSize = 0;
4384
0
                    }
4385
4386
0
                    xmlSBufAddCString(&buf, " ", 1);
4387
0
                } else {
4388
0
                    chunkSize += 1;
4389
0
                }
4390
4391
0
                inSpace = 1;
4392
4393
0
                if ((c == 0xD) && (NXT(1) == 0xA))
4394
0
                    CUR_PTR++;
4395
0
            }
4396
4397
0
            NEXTL(1);
4398
0
        } else if (NXT(1) == '#') {
4399
0
            int val;
4400
4401
0
            if (chunkSize > 0) {
4402
0
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4403
0
                chunkSize = 0;
4404
0
            }
4405
4406
0
            val = xmlParseCharRef(ctxt);
4407
0
            if (val == 0)
4408
0
                goto error;
4409
4410
0
            if ((val == '&') && (!replaceEntities)) {
4411
                /*
4412
                 * The reparsing will be done in xmlStringGetNodeList()
4413
                 * called by the attribute() function in SAX.c
4414
                 */
4415
0
                xmlSBufAddCString(&buf, "&#38;", 5);
4416
0
                inSpace = 0;
4417
0
            } else if (val == ' ') {
4418
0
                if ((!normalize) || (!inSpace))
4419
0
                    xmlSBufAddCString(&buf, " ", 1);
4420
0
                inSpace = 1;
4421
0
            } else {
4422
0
                xmlSBufAddChar(&buf, val);
4423
0
                inSpace = 0;
4424
0
            }
4425
0
        } else {
4426
0
            const xmlChar *name;
4427
0
            xmlEntityPtr ent;
4428
4429
0
            if (chunkSize > 0) {
4430
0
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4431
0
                chunkSize = 0;
4432
0
            }
4433
4434
0
            name = xmlParseEntityRefInternal(ctxt);
4435
0
            if (name == NULL) {
4436
                /*
4437
                 * Probably a literal '&' which wasn't escaped.
4438
                 * TODO: Handle gracefully in recovery mode.
4439
                 */
4440
0
                continue;
4441
0
            }
4442
4443
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4444
0
            if (ent == NULL)
4445
0
                continue;
4446
4447
0
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4448
0
                if ((ent->content[0] == '&') && (!replaceEntities))
4449
0
                    xmlSBufAddCString(&buf, "&#38;", 5);
4450
0
                else
4451
0
                    xmlSBufAddString(&buf, ent->content, ent->length);
4452
0
                inSpace = 0;
4453
0
            } else if (replaceEntities) {
4454
0
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4455
0
                                          normalize, &inSpace, ctxt->inputNr,
4456
0
                                          /* check */ 1);
4457
0
            } else {
4458
0
                if ((ent->flags & flags) != flags)
4459
0
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4460
4461
0
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4462
0
                    ent->content[0] = 0;
4463
0
                    goto error;
4464
0
                }
4465
4466
                /*
4467
                 * Just output the reference
4468
                 */
4469
0
                xmlSBufAddCString(&buf, "&", 1);
4470
0
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4471
0
                xmlSBufAddCString(&buf, ";", 1);
4472
4473
0
                inSpace = 0;
4474
0
            }
4475
0
  }
4476
0
    }
4477
4478
0
    if ((buf.mem == NULL) && (alloc != NULL)) {
4479
0
        ret = (xmlChar *) CUR_PTR - chunkSize;
4480
4481
0
        if (attlen != NULL)
4482
0
            *attlen = chunkSize;
4483
0
        if ((normalize) && (inSpace) && (chunkSize > 0))
4484
0
            *attlen -= 1;
4485
0
        *alloc = 0;
4486
4487
        /* Report potential error */
4488
0
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4489
0
    } else {
4490
0
        if (chunkSize > 0)
4491
0
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4492
4493
0
        if ((normalize) && (inSpace) && (buf.size > 0))
4494
0
            buf.size--;
4495
4496
0
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4497
4498
0
        if (ret != NULL) {
4499
0
            if (attlen != NULL)
4500
0
                *attlen = buf.size;
4501
0
            if (alloc != NULL)
4502
0
                *alloc = 1;
4503
0
        }
4504
0
    }
4505
4506
0
    NEXTL(1);
4507
4508
0
    return(ret);
4509
4510
0
error:
4511
0
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4512
0
    return(NULL);
4513
0
}
4514
4515
/**
4516
 * xmlParseAttValue:
4517
 * @ctxt:  an XML parser context
4518
 *
4519
 * DEPRECATED: Internal function, don't use.
4520
 *
4521
 * parse a value for an attribute
4522
 * Note: the parser won't do substitution of entities here, this
4523
 * will be handled later in xmlStringGetNodeList
4524
 *
4525
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4526
 *                   "'" ([^<&'] | Reference)* "'"
4527
 *
4528
 * 3.3.3 Attribute-Value Normalization:
4529
 * Before the value of an attribute is passed to the application or
4530
 * checked for validity, the XML processor must normalize it as follows:
4531
 * - a character reference is processed by appending the referenced
4532
 *   character to the attribute value
4533
 * - an entity reference is processed by recursively processing the
4534
 *   replacement text of the entity
4535
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4536
 *   appending #x20 to the normalized value, except that only a single
4537
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4538
 *   parsed entity or the literal entity value of an internal parsed entity
4539
 * - other characters are processed by appending them to the normalized value
4540
 * If the declared value is not CDATA, then the XML processor must further
4541
 * process the normalized attribute value by discarding any leading and
4542
 * trailing space (#x20) characters, and by replacing sequences of space
4543
 * (#x20) characters by a single space (#x20) character.
4544
 * All attributes for which no declaration has been read should be treated
4545
 * by a non-validating parser as if declared CDATA.
4546
 *
4547
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4548
 */
4549
4550
4551
xmlChar *
4552
0
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4553
0
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4554
0
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4555
0
}
4556
4557
/**
4558
 * xmlParseSystemLiteral:
4559
 * @ctxt:  an XML parser context
4560
 *
4561
 * DEPRECATED: Internal function, don't use.
4562
 *
4563
 * parse an XML Literal
4564
 *
4565
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4566
 *
4567
 * Returns the SystemLiteral parsed or NULL
4568
 */
4569
4570
xmlChar *
4571
0
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4572
0
    xmlChar *buf = NULL;
4573
0
    int len = 0;
4574
0
    int size = XML_PARSER_BUFFER_SIZE;
4575
0
    int cur, l;
4576
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4577
0
                    XML_MAX_TEXT_LENGTH :
4578
0
                    XML_MAX_NAME_LENGTH;
4579
0
    xmlChar stop;
4580
4581
0
    if (RAW == '"') {
4582
0
        NEXT;
4583
0
  stop = '"';
4584
0
    } else if (RAW == '\'') {
4585
0
        NEXT;
4586
0
  stop = '\'';
4587
0
    } else {
4588
0
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4589
0
  return(NULL);
4590
0
    }
4591
4592
0
    buf = (xmlChar *) xmlMallocAtomic(size);
4593
0
    if (buf == NULL) {
4594
0
        xmlErrMemory(ctxt);
4595
0
  return(NULL);
4596
0
    }
4597
0
    cur = CUR_CHAR(l);
4598
0
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4599
0
  if (len + 5 >= size) {
4600
0
      xmlChar *tmp;
4601
4602
0
      size *= 2;
4603
0
      tmp = (xmlChar *) xmlRealloc(buf, size);
4604
0
      if (tmp == NULL) {
4605
0
          xmlFree(buf);
4606
0
    xmlErrMemory(ctxt);
4607
0
    return(NULL);
4608
0
      }
4609
0
      buf = tmp;
4610
0
  }
4611
0
  COPY_BUF(buf, len, cur);
4612
0
        if (len > maxLength) {
4613
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4614
0
            xmlFree(buf);
4615
0
            return(NULL);
4616
0
        }
4617
0
  NEXTL(l);
4618
0
  cur = CUR_CHAR(l);
4619
0
    }
4620
0
    buf[len] = 0;
4621
0
    if (!IS_CHAR(cur)) {
4622
0
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4623
0
    } else {
4624
0
  NEXT;
4625
0
    }
4626
0
    return(buf);
4627
0
}
4628
4629
/**
4630
 * xmlParsePubidLiteral:
4631
 * @ctxt:  an XML parser context
4632
 *
4633
 * DEPRECATED: Internal function, don't use.
4634
 *
4635
 * parse an XML public literal
4636
 *
4637
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4638
 *
4639
 * Returns the PubidLiteral parsed or NULL.
4640
 */
4641
4642
xmlChar *
4643
0
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4644
0
    xmlChar *buf = NULL;
4645
0
    int len = 0;
4646
0
    int size = XML_PARSER_BUFFER_SIZE;
4647
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4648
0
                    XML_MAX_TEXT_LENGTH :
4649
0
                    XML_MAX_NAME_LENGTH;
4650
0
    xmlChar cur;
4651
0
    xmlChar stop;
4652
4653
0
    if (RAW == '"') {
4654
0
        NEXT;
4655
0
  stop = '"';
4656
0
    } else if (RAW == '\'') {
4657
0
        NEXT;
4658
0
  stop = '\'';
4659
0
    } else {
4660
0
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4661
0
  return(NULL);
4662
0
    }
4663
0
    buf = (xmlChar *) xmlMallocAtomic(size);
4664
0
    if (buf == NULL) {
4665
0
  xmlErrMemory(ctxt);
4666
0
  return(NULL);
4667
0
    }
4668
0
    cur = CUR;
4669
0
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4670
0
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4671
0
  if (len + 1 >= size) {
4672
0
      xmlChar *tmp;
4673
4674
0
      size *= 2;
4675
0
      tmp = (xmlChar *) xmlRealloc(buf, size);
4676
0
      if (tmp == NULL) {
4677
0
    xmlErrMemory(ctxt);
4678
0
    xmlFree(buf);
4679
0
    return(NULL);
4680
0
      }
4681
0
      buf = tmp;
4682
0
  }
4683
0
  buf[len++] = cur;
4684
0
        if (len > maxLength) {
4685
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4686
0
            xmlFree(buf);
4687
0
            return(NULL);
4688
0
        }
4689
0
  NEXT;
4690
0
  cur = CUR;
4691
0
    }
4692
0
    buf[len] = 0;
4693
0
    if (cur != stop) {
4694
0
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4695
0
    } else {
4696
0
  NEXTL(1);
4697
0
    }
4698
0
    return(buf);
4699
0
}
4700
4701
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4702
4703
/*
4704
 * used for the test in the inner loop of the char data testing
4705
 */
4706
static const unsigned char test_char_data[256] = {
4707
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4708
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4709
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4710
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4711
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4712
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4713
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4714
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4715
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4716
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4717
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4718
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4719
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4720
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4721
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4722
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4723
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4724
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4729
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4730
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4731
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4732
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4734
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4737
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4738
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4739
};
4740
4741
/**
4742
 * xmlParseCharDataInternal:
4743
 * @ctxt:  an XML parser context
4744
 * @partial:  buffer may contain partial UTF-8 sequences
4745
 *
4746
 * Parse character data. Always makes progress if the first char isn't
4747
 * '<' or '&'.
4748
 *
4749
 * The right angle bracket (>) may be represented using the string "&gt;",
4750
 * and must, for compatibility, be escaped using "&gt;" or a character
4751
 * reference when it appears in the string "]]>" in content, when that
4752
 * string is not marking the end of a CDATA section.
4753
 *
4754
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4755
 */
4756
static void
4757
0
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4758
0
    const xmlChar *in;
4759
0
    int nbchar = 0;
4760
0
    int line = ctxt->input->line;
4761
0
    int col = ctxt->input->col;
4762
0
    int ccol;
4763
4764
0
    GROW;
4765
    /*
4766
     * Accelerated common case where input don't need to be
4767
     * modified before passing it to the handler.
4768
     */
4769
0
    in = ctxt->input->cur;
4770
0
    do {
4771
0
get_more_space:
4772
0
        while (*in == 0x20) { in++; ctxt->input->col++; }
4773
0
        if (*in == 0xA) {
4774
0
            do {
4775
0
                ctxt->input->line++; ctxt->input->col = 1;
4776
0
                in++;
4777
0
            } while (*in == 0xA);
4778
0
            goto get_more_space;
4779
0
        }
4780
0
        if (*in == '<') {
4781
0
            nbchar = in - ctxt->input->cur;
4782
0
            if (nbchar > 0) {
4783
0
                const xmlChar *tmp = ctxt->input->cur;
4784
0
                ctxt->input->cur = in;
4785
4786
0
                if ((ctxt->sax != NULL) &&
4787
0
                    (ctxt->disableSAX == 0) &&
4788
0
                    (ctxt->sax->ignorableWhitespace !=
4789
0
                     ctxt->sax->characters)) {
4790
0
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4791
0
                        if (ctxt->sax->ignorableWhitespace != NULL)
4792
0
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4793
0
                                                   tmp, nbchar);
4794
0
                    } else {
4795
0
                        if (ctxt->sax->characters != NULL)
4796
0
                            ctxt->sax->characters(ctxt->userData,
4797
0
                                                  tmp, nbchar);
4798
0
                        if (*ctxt->space == -1)
4799
0
                            *ctxt->space = -2;
4800
0
                    }
4801
0
                } else if ((ctxt->sax != NULL) &&
4802
0
                           (ctxt->disableSAX == 0) &&
4803
0
                           (ctxt->sax->characters != NULL)) {
4804
0
                    ctxt->sax->characters(ctxt->userData,
4805
0
                                          tmp, nbchar);
4806
0
                }
4807
0
            }
4808
0
            return;
4809
0
        }
4810
4811
0
get_more:
4812
0
        ccol = ctxt->input->col;
4813
0
        while (test_char_data[*in]) {
4814
0
            in++;
4815
0
            ccol++;
4816
0
        }
4817
0
        ctxt->input->col = ccol;
4818
0
        if (*in == 0xA) {
4819
0
            do {
4820
0
                ctxt->input->line++; ctxt->input->col = 1;
4821
0
                in++;
4822
0
            } while (*in == 0xA);
4823
0
            goto get_more;
4824
0
        }
4825
0
        if (*in == ']') {
4826
0
            if ((in[1] == ']') && (in[2] == '>')) {
4827
0
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4828
0
                ctxt->input->cur = in + 1;
4829
0
                return;
4830
0
            }
4831
0
            in++;
4832
0
            ctxt->input->col++;
4833
0
            goto get_more;
4834
0
        }
4835
0
        nbchar = in - ctxt->input->cur;
4836
0
        if (nbchar > 0) {
4837
0
            if ((ctxt->sax != NULL) &&
4838
0
                (ctxt->disableSAX == 0) &&
4839
0
                (ctxt->sax->ignorableWhitespace !=
4840
0
                 ctxt->sax->characters) &&
4841
0
                (IS_BLANK_CH(*ctxt->input->cur))) {
4842
0
                const xmlChar *tmp = ctxt->input->cur;
4843
0
                ctxt->input->cur = in;
4844
4845
0
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4846
0
                    if (ctxt->sax->ignorableWhitespace != NULL)
4847
0
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4848
0
                                                       tmp, nbchar);
4849
0
                } else {
4850
0
                    if (ctxt->sax->characters != NULL)
4851
0
                        ctxt->sax->characters(ctxt->userData,
4852
0
                                              tmp, nbchar);
4853
0
                    if (*ctxt->space == -1)
4854
0
                        *ctxt->space = -2;
4855
0
                }
4856
0
                line = ctxt->input->line;
4857
0
                col = ctxt->input->col;
4858
0
            } else if ((ctxt->sax != NULL) &&
4859
0
                       (ctxt->disableSAX == 0)) {
4860
0
                if (ctxt->sax->characters != NULL)
4861
0
                    ctxt->sax->characters(ctxt->userData,
4862
0
                                          ctxt->input->cur, nbchar);
4863
0
                line = ctxt->input->line;
4864
0
                col = ctxt->input->col;
4865
0
            }
4866
0
        }
4867
0
        ctxt->input->cur = in;
4868
0
        if (*in == 0xD) {
4869
0
            in++;
4870
0
            if (*in == 0xA) {
4871
0
                ctxt->input->cur = in;
4872
0
                in++;
4873
0
                ctxt->input->line++; ctxt->input->col = 1;
4874
0
                continue; /* while */
4875
0
            }
4876
0
            in--;
4877
0
        }
4878
0
        if (*in == '<') {
4879
0
            return;
4880
0
        }
4881
0
        if (*in == '&') {
4882
0
            return;
4883
0
        }
4884
0
        SHRINK;
4885
0
        GROW;
4886
0
        in = ctxt->input->cur;
4887
0
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4888
0
             (*in == 0x09) || (*in == 0x0a));
4889
0
    ctxt->input->line = line;
4890
0
    ctxt->input->col = col;
4891
0
    xmlParseCharDataComplex(ctxt, partial);
4892
0
}
4893
4894
/**
4895
 * xmlParseCharDataComplex:
4896
 * @ctxt:  an XML parser context
4897
 * @cdata:  int indicating whether we are within a CDATA section
4898
 *
4899
 * Always makes progress if the first char isn't '<' or '&'.
4900
 *
4901
 * parse a CharData section.this is the fallback function
4902
 * of xmlParseCharData() when the parsing requires handling
4903
 * of non-ASCII characters.
4904
 */
4905
static void
4906
0
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4907
0
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4908
0
    int nbchar = 0;
4909
0
    int cur, l;
4910
4911
0
    cur = CUR_CHAR(l);
4912
0
    while ((cur != '<') && /* checked */
4913
0
           (cur != '&') &&
4914
0
     (IS_CHAR(cur))) {
4915
0
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4916
0
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4917
0
  }
4918
0
  COPY_BUF(buf, nbchar, cur);
4919
  /* move current position before possible calling of ctxt->sax->characters */
4920
0
  NEXTL(l);
4921
0
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4922
0
      buf[nbchar] = 0;
4923
4924
      /*
4925
       * OK the segment is to be consumed as chars.
4926
       */
4927
0
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4928
0
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4929
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4930
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4931
0
                                     buf, nbchar);
4932
0
    } else {
4933
0
        if (ctxt->sax->characters != NULL)
4934
0
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4935
0
        if ((ctxt->sax->characters !=
4936
0
             ctxt->sax->ignorableWhitespace) &&
4937
0
      (*ctxt->space == -1))
4938
0
      *ctxt->space = -2;
4939
0
    }
4940
0
      }
4941
0
      nbchar = 0;
4942
0
            SHRINK;
4943
0
  }
4944
0
  cur = CUR_CHAR(l);
4945
0
    }
4946
0
    if (nbchar != 0) {
4947
0
        buf[nbchar] = 0;
4948
  /*
4949
   * OK the segment is to be consumed as chars.
4950
   */
4951
0
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4952
0
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4953
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4954
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4955
0
      } else {
4956
0
    if (ctxt->sax->characters != NULL)
4957
0
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4958
0
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4959
0
        (*ctxt->space == -1))
4960
0
        *ctxt->space = -2;
4961
0
      }
4962
0
  }
4963
0
    }
4964
    /*
4965
     * cur == 0 can mean
4966
     *
4967
     * - End of buffer.
4968
     * - An actual 0 character.
4969
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4970
     */
4971
0
    if (ctxt->input->cur < ctxt->input->end) {
4972
0
        if ((cur == 0) && (CUR != 0)) {
4973
0
            if (partial == 0) {
4974
0
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4975
0
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4976
0
                NEXTL(1);
4977
0
            }
4978
0
        } else if ((cur != '<') && (cur != '&')) {
4979
            /* Generate the error and skip the offending character */
4980
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4981
0
                              "PCDATA invalid Char value %d\n", cur);
4982
0
            NEXTL(l);
4983
0
        }
4984
0
    }
4985
0
}
4986
4987
/**
4988
 * xmlParseCharData:
4989
 * @ctxt:  an XML parser context
4990
 * @cdata:  unused
4991
 *
4992
 * DEPRECATED: Internal function, don't use.
4993
 */
4994
void
4995
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4996
0
    xmlParseCharDataInternal(ctxt, 0);
4997
0
}
4998
4999
/**
5000
 * xmlParseExternalID:
5001
 * @ctxt:  an XML parser context
5002
 * @publicID:  a xmlChar** receiving PubidLiteral
5003
 * @strict: indicate whether we should restrict parsing to only
5004
 *          production [75], see NOTE below
5005
 *
5006
 * DEPRECATED: Internal function, don't use.
5007
 *
5008
 * Parse an External ID or a Public ID
5009
 *
5010
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5011
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5012
 *
5013
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5014
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5015
 *
5016
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5017
 *
5018
 * Returns the function returns SystemLiteral and in the second
5019
 *                case publicID receives PubidLiteral, is strict is off
5020
 *                it is possible to return NULL and have publicID set.
5021
 */
5022
5023
xmlChar *
5024
0
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5025
0
    xmlChar *URI = NULL;
5026
5027
0
    *publicID = NULL;
5028
0
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5029
0
        SKIP(6);
5030
0
  if (SKIP_BLANKS == 0) {
5031
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032
0
                     "Space required after 'SYSTEM'\n");
5033
0
  }
5034
0
  URI = xmlParseSystemLiteral(ctxt);
5035
0
  if (URI == NULL) {
5036
0
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5037
0
        }
5038
0
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5039
0
        SKIP(6);
5040
0
  if (SKIP_BLANKS == 0) {
5041
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5042
0
        "Space required after 'PUBLIC'\n");
5043
0
  }
5044
0
  *publicID = xmlParsePubidLiteral(ctxt);
5045
0
  if (*publicID == NULL) {
5046
0
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5047
0
  }
5048
0
  if (strict) {
5049
      /*
5050
       * We don't handle [83] so "S SystemLiteral" is required.
5051
       */
5052
0
      if (SKIP_BLANKS == 0) {
5053
0
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5054
0
      "Space required after the Public Identifier\n");
5055
0
      }
5056
0
  } else {
5057
      /*
5058
       * We handle [83] so we return immediately, if
5059
       * "S SystemLiteral" is not detected. We skip blanks if no
5060
             * system literal was found, but this is harmless since we must
5061
             * be at the end of a NotationDecl.
5062
       */
5063
0
      if (SKIP_BLANKS == 0) return(NULL);
5064
0
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5065
0
  }
5066
0
  URI = xmlParseSystemLiteral(ctxt);
5067
0
  if (URI == NULL) {
5068
0
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5069
0
        }
5070
0
    }
5071
0
    return(URI);
5072
0
}
5073
5074
/**
5075
 * xmlParseCommentComplex:
5076
 * @ctxt:  an XML parser context
5077
 * @buf:  the already parsed part of the buffer
5078
 * @len:  number of bytes in the buffer
5079
 * @size:  allocated size of the buffer
5080
 *
5081
 * Skip an XML (SGML) comment <!-- .... -->
5082
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5083
 *  must not occur within comments. "
5084
 * This is the slow routine in case the accelerator for ascii didn't work
5085
 *
5086
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5087
 */
5088
static void
5089
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5090
0
                       size_t len, size_t size) {
5091
0
    int q, ql;
5092
0
    int r, rl;
5093
0
    int cur, l;
5094
0
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5095
0
                       XML_MAX_HUGE_LENGTH :
5096
0
                       XML_MAX_TEXT_LENGTH;
5097
5098
0
    if (buf == NULL) {
5099
0
        len = 0;
5100
0
  size = XML_PARSER_BUFFER_SIZE;
5101
0
  buf = (xmlChar *) xmlMallocAtomic(size);
5102
0
  if (buf == NULL) {
5103
0
      xmlErrMemory(ctxt);
5104
0
      return;
5105
0
  }
5106
0
    }
5107
0
    q = CUR_CHAR(ql);
5108
0
    if (q == 0)
5109
0
        goto not_terminated;
5110
0
    if (!IS_CHAR(q)) {
5111
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5112
0
                          "xmlParseComment: invalid xmlChar value %d\n",
5113
0
                    q);
5114
0
  xmlFree (buf);
5115
0
  return;
5116
0
    }
5117
0
    NEXTL(ql);
5118
0
    r = CUR_CHAR(rl);
5119
0
    if (r == 0)
5120
0
        goto not_terminated;
5121
0
    if (!IS_CHAR(r)) {
5122
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5123
0
                          "xmlParseComment: invalid xmlChar value %d\n",
5124
0
                    r);
5125
0
  xmlFree (buf);
5126
0
  return;
5127
0
    }
5128
0
    NEXTL(rl);
5129
0
    cur = CUR_CHAR(l);
5130
0
    if (cur == 0)
5131
0
        goto not_terminated;
5132
0
    while (IS_CHAR(cur) && /* checked */
5133
0
           ((cur != '>') ||
5134
0
      (r != '-') || (q != '-'))) {
5135
0
  if ((r == '-') && (q == '-')) {
5136
0
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5137
0
  }
5138
0
  if (len + 5 >= size) {
5139
0
      xmlChar *new_buf;
5140
0
            size_t new_size;
5141
5142
0
      new_size = size * 2;
5143
0
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5144
0
      if (new_buf == NULL) {
5145
0
    xmlFree (buf);
5146
0
    xmlErrMemory(ctxt);
5147
0
    return;
5148
0
      }
5149
0
      buf = new_buf;
5150
0
            size = new_size;
5151
0
  }
5152
0
  COPY_BUF(buf, len, q);
5153
0
        if (len > maxLength) {
5154
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5155
0
                         "Comment too big found", NULL);
5156
0
            xmlFree (buf);
5157
0
            return;
5158
0
        }
5159
5160
0
  q = r;
5161
0
  ql = rl;
5162
0
  r = cur;
5163
0
  rl = l;
5164
5165
0
  NEXTL(l);
5166
0
  cur = CUR_CHAR(l);
5167
5168
0
    }
5169
0
    buf[len] = 0;
5170
0
    if (cur == 0) {
5171
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5172
0
                       "Comment not terminated \n<!--%.50s\n", buf);
5173
0
    } else if (!IS_CHAR(cur)) {
5174
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5175
0
                          "xmlParseComment: invalid xmlChar value %d\n",
5176
0
                    cur);
5177
0
    } else {
5178
0
        NEXT;
5179
0
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5180
0
      (!ctxt->disableSAX))
5181
0
      ctxt->sax->comment(ctxt->userData, buf);
5182
0
    }
5183
0
    xmlFree(buf);
5184
0
    return;
5185
0
not_terminated:
5186
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5187
0
       "Comment not terminated\n", NULL);
5188
0
    xmlFree(buf);
5189
0
    return;
5190
0
}
5191
5192
/**
5193
 * xmlParseComment:
5194
 * @ctxt:  an XML parser context
5195
 *
5196
 * DEPRECATED: Internal function, don't use.
5197
 *
5198
 * Parse an XML (SGML) comment. Always consumes '<!'.
5199
 *
5200
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5201
 *  must not occur within comments. "
5202
 *
5203
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5204
 */
5205
void
5206
0
xmlParseComment(xmlParserCtxtPtr ctxt) {
5207
0
    xmlChar *buf = NULL;
5208
0
    size_t size = XML_PARSER_BUFFER_SIZE;
5209
0
    size_t len = 0;
5210
0
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5211
0
                       XML_MAX_HUGE_LENGTH :
5212
0
                       XML_MAX_TEXT_LENGTH;
5213
0
    const xmlChar *in;
5214
0
    size_t nbchar = 0;
5215
0
    int ccol;
5216
5217
    /*
5218
     * Check that there is a comment right here.
5219
     */
5220
0
    if ((RAW != '<') || (NXT(1) != '!'))
5221
0
        return;
5222
0
    SKIP(2);
5223
0
    if ((RAW != '-') || (NXT(1) != '-'))
5224
0
        return;
5225
0
    SKIP(2);
5226
0
    GROW;
5227
5228
    /*
5229
     * Accelerated common case where input don't need to be
5230
     * modified before passing it to the handler.
5231
     */
5232
0
    in = ctxt->input->cur;
5233
0
    do {
5234
0
  if (*in == 0xA) {
5235
0
      do {
5236
0
    ctxt->input->line++; ctxt->input->col = 1;
5237
0
    in++;
5238
0
      } while (*in == 0xA);
5239
0
  }
5240
0
get_more:
5241
0
        ccol = ctxt->input->col;
5242
0
  while (((*in > '-') && (*in <= 0x7F)) ||
5243
0
         ((*in >= 0x20) && (*in < '-')) ||
5244
0
         (*in == 0x09)) {
5245
0
        in++;
5246
0
        ccol++;
5247
0
  }
5248
0
  ctxt->input->col = ccol;
5249
0
  if (*in == 0xA) {
5250
0
      do {
5251
0
    ctxt->input->line++; ctxt->input->col = 1;
5252
0
    in++;
5253
0
      } while (*in == 0xA);
5254
0
      goto get_more;
5255
0
  }
5256
0
  nbchar = in - ctxt->input->cur;
5257
  /*
5258
   * save current set of data
5259
   */
5260
0
  if (nbchar > 0) {
5261
0
            if (buf == NULL) {
5262
0
                if ((*in == '-') && (in[1] == '-'))
5263
0
                    size = nbchar + 1;
5264
0
                else
5265
0
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5266
0
                buf = (xmlChar *) xmlMallocAtomic(size);
5267
0
                if (buf == NULL) {
5268
0
                    xmlErrMemory(ctxt);
5269
0
                    return;
5270
0
                }
5271
0
                len = 0;
5272
0
            } else if (len + nbchar + 1 >= size) {
5273
0
                xmlChar *new_buf;
5274
0
                size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5275
0
                new_buf = (xmlChar *) xmlRealloc(buf, size);
5276
0
                if (new_buf == NULL) {
5277
0
                    xmlFree (buf);
5278
0
                    xmlErrMemory(ctxt);
5279
0
                    return;
5280
0
                }
5281
0
                buf = new_buf;
5282
0
            }
5283
0
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5284
0
            len += nbchar;
5285
0
            buf[len] = 0;
5286
0
  }
5287
0
        if (len > maxLength) {
5288
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5289
0
                         "Comment too big found", NULL);
5290
0
            xmlFree (buf);
5291
0
            return;
5292
0
        }
5293
0
  ctxt->input->cur = in;
5294
0
  if (*in == 0xA) {
5295
0
      in++;
5296
0
      ctxt->input->line++; ctxt->input->col = 1;
5297
0
  }
5298
0
  if (*in == 0xD) {
5299
0
      in++;
5300
0
      if (*in == 0xA) {
5301
0
    ctxt->input->cur = in;
5302
0
    in++;
5303
0
    ctxt->input->line++; ctxt->input->col = 1;
5304
0
    goto get_more;
5305
0
      }
5306
0
      in--;
5307
0
  }
5308
0
  SHRINK;
5309
0
  GROW;
5310
0
  in = ctxt->input->cur;
5311
0
  if (*in == '-') {
5312
0
      if (in[1] == '-') {
5313
0
          if (in[2] == '>') {
5314
0
        SKIP(3);
5315
0
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5316
0
            (!ctxt->disableSAX)) {
5317
0
      if (buf != NULL)
5318
0
          ctxt->sax->comment(ctxt->userData, buf);
5319
0
      else
5320
0
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5321
0
        }
5322
0
        if (buf != NULL)
5323
0
            xmlFree(buf);
5324
0
        return;
5325
0
    }
5326
0
    if (buf != NULL) {
5327
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5328
0
                          "Double hyphen within comment: "
5329
0
                                      "<!--%.50s\n",
5330
0
              buf);
5331
0
    } else
5332
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5333
0
                          "Double hyphen within comment\n", NULL);
5334
0
    in++;
5335
0
    ctxt->input->col++;
5336
0
      }
5337
0
      in++;
5338
0
      ctxt->input->col++;
5339
0
      goto get_more;
5340
0
  }
5341
0
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5342
0
    xmlParseCommentComplex(ctxt, buf, len, size);
5343
0
    return;
5344
0
}
5345
5346
5347
/**
5348
 * xmlParsePITarget:
5349
 * @ctxt:  an XML parser context
5350
 *
5351
 * DEPRECATED: Internal function, don't use.
5352
 *
5353
 * parse the name of a PI
5354
 *
5355
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5356
 *
5357
 * Returns the PITarget name or NULL
5358
 */
5359
5360
const xmlChar *
5361
0
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5362
0
    const xmlChar *name;
5363
5364
0
    name = xmlParseName(ctxt);
5365
0
    if ((name != NULL) &&
5366
0
        ((name[0] == 'x') || (name[0] == 'X')) &&
5367
0
        ((name[1] == 'm') || (name[1] == 'M')) &&
5368
0
        ((name[2] == 'l') || (name[2] == 'L'))) {
5369
0
  int i;
5370
0
  if ((name[0] == 'x') && (name[1] == 'm') &&
5371
0
      (name[2] == 'l') && (name[3] == 0)) {
5372
0
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5373
0
     "XML declaration allowed only at the start of the document\n");
5374
0
      return(name);
5375
0
  } else if (name[3] == 0) {
5376
0
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5377
0
      return(name);
5378
0
  }
5379
0
  for (i = 0;;i++) {
5380
0
      if (xmlW3CPIs[i] == NULL) break;
5381
0
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5382
0
          return(name);
5383
0
  }
5384
0
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5385
0
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5386
0
          NULL, NULL);
5387
0
    }
5388
0
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5389
0
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5390
0
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5391
0
    }
5392
0
    return(name);
5393
0
}
5394
5395
#ifdef LIBXML_CATALOG_ENABLED
5396
/**
5397
 * xmlParseCatalogPI:
5398
 * @ctxt:  an XML parser context
5399
 * @catalog:  the PI value string
5400
 *
5401
 * parse an XML Catalog Processing Instruction.
5402
 *
5403
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5404
 *
5405
 * Occurs only if allowed by the user and if happening in the Misc
5406
 * part of the document before any doctype information
5407
 * This will add the given catalog to the parsing context in order
5408
 * to be used if there is a resolution need further down in the document
5409
 */
5410
5411
static void
5412
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5413
0
    xmlChar *URL = NULL;
5414
0
    const xmlChar *tmp, *base;
5415
0
    xmlChar marker;
5416
5417
0
    tmp = catalog;
5418
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5419
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5420
0
  goto error;
5421
0
    tmp += 7;
5422
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5423
0
    if (*tmp != '=') {
5424
0
  return;
5425
0
    }
5426
0
    tmp++;
5427
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5428
0
    marker = *tmp;
5429
0
    if ((marker != '\'') && (marker != '"'))
5430
0
  goto error;
5431
0
    tmp++;
5432
0
    base = tmp;
5433
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5434
0
    if (*tmp == 0)
5435
0
  goto error;
5436
0
    URL = xmlStrndup(base, tmp - base);
5437
0
    tmp++;
5438
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5439
0
    if (*tmp != 0)
5440
0
  goto error;
5441
5442
0
    if (URL != NULL) {
5443
        /*
5444
         * Unfortunately, the catalog API doesn't report OOM errors.
5445
         * xmlGetLastError isn't very helpful since we don't know
5446
         * where the last error came from. We'd have to reset it
5447
         * before this call and restore it afterwards.
5448
         */
5449
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5450
0
  xmlFree(URL);
5451
0
    }
5452
0
    return;
5453
5454
0
error:
5455
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5456
0
            "Catalog PI syntax error: %s\n",
5457
0
      catalog, NULL);
5458
0
    if (URL != NULL)
5459
0
  xmlFree(URL);
5460
0
}
5461
#endif
5462
5463
/**
5464
 * xmlParsePI:
5465
 * @ctxt:  an XML parser context
5466
 *
5467
 * DEPRECATED: Internal function, don't use.
5468
 *
5469
 * parse an XML Processing Instruction.
5470
 *
5471
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5472
 *
5473
 * The processing is transferred to SAX once parsed.
5474
 */
5475
5476
void
5477
0
xmlParsePI(xmlParserCtxtPtr ctxt) {
5478
0
    xmlChar *buf = NULL;
5479
0
    size_t len = 0;
5480
0
    size_t size = XML_PARSER_BUFFER_SIZE;
5481
0
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5482
0
                       XML_MAX_HUGE_LENGTH :
5483
0
                       XML_MAX_TEXT_LENGTH;
5484
0
    int cur, l;
5485
0
    const xmlChar *target;
5486
5487
0
    if ((RAW == '<') && (NXT(1) == '?')) {
5488
  /*
5489
   * this is a Processing Instruction.
5490
   */
5491
0
  SKIP(2);
5492
5493
  /*
5494
   * Parse the target name and check for special support like
5495
   * namespace.
5496
   */
5497
0
        target = xmlParsePITarget(ctxt);
5498
0
  if (target != NULL) {
5499
0
      if ((RAW == '?') && (NXT(1) == '>')) {
5500
0
    SKIP(2);
5501
5502
    /*
5503
     * SAX: PI detected.
5504
     */
5505
0
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5506
0
        (ctxt->sax->processingInstruction != NULL))
5507
0
        ctxt->sax->processingInstruction(ctxt->userData,
5508
0
                                         target, NULL);
5509
0
    return;
5510
0
      }
5511
0
      buf = (xmlChar *) xmlMallocAtomic(size);
5512
0
      if (buf == NULL) {
5513
0
    xmlErrMemory(ctxt);
5514
0
    return;
5515
0
      }
5516
0
      if (SKIP_BLANKS == 0) {
5517
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5518
0
        "ParsePI: PI %s space expected\n", target);
5519
0
      }
5520
0
      cur = CUR_CHAR(l);
5521
0
      while (IS_CHAR(cur) && /* checked */
5522
0
       ((cur != '?') || (NXT(1) != '>'))) {
5523
0
    if (len + 5 >= size) {
5524
0
        xmlChar *tmp;
5525
0
                    size_t new_size = size * 2;
5526
0
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5527
0
        if (tmp == NULL) {
5528
0
      xmlErrMemory(ctxt);
5529
0
      xmlFree(buf);
5530
0
      return;
5531
0
        }
5532
0
        buf = tmp;
5533
0
                    size = new_size;
5534
0
    }
5535
0
    COPY_BUF(buf, len, cur);
5536
0
                if (len > maxLength) {
5537
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5538
0
                                      "PI %s too big found", target);
5539
0
                    xmlFree(buf);
5540
0
                    return;
5541
0
                }
5542
0
    NEXTL(l);
5543
0
    cur = CUR_CHAR(l);
5544
0
      }
5545
0
      buf[len] = 0;
5546
0
      if (cur != '?') {
5547
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5548
0
          "ParsePI: PI %s never end ...\n", target);
5549
0
      } else {
5550
0
    SKIP(2);
5551
5552
0
#ifdef LIBXML_CATALOG_ENABLED
5553
0
    if ((ctxt->inSubset == 0) &&
5554
0
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5555
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5556
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5557
0
      (allow == XML_CATA_ALLOW_ALL))
5558
0
      xmlParseCatalogPI(ctxt, buf);
5559
0
    }
5560
0
#endif
5561
5562
5563
    /*
5564
     * SAX: PI detected.
5565
     */
5566
0
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5567
0
        (ctxt->sax->processingInstruction != NULL))
5568
0
        ctxt->sax->processingInstruction(ctxt->userData,
5569
0
                                         target, buf);
5570
0
      }
5571
0
      xmlFree(buf);
5572
0
  } else {
5573
0
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5574
0
  }
5575
0
    }
5576
0
}
5577
5578
/**
5579
 * xmlParseNotationDecl:
5580
 * @ctxt:  an XML parser context
5581
 *
5582
 * DEPRECATED: Internal function, don't use.
5583
 *
5584
 * Parse a notation declaration. Always consumes '<!'.
5585
 *
5586
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5587
 *
5588
 * Hence there is actually 3 choices:
5589
 *     'PUBLIC' S PubidLiteral
5590
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5591
 * and 'SYSTEM' S SystemLiteral
5592
 *
5593
 * See the NOTE on xmlParseExternalID().
5594
 */
5595
5596
void
5597
0
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5598
0
    const xmlChar *name;
5599
0
    xmlChar *Pubid;
5600
0
    xmlChar *Systemid;
5601
5602
0
    if ((CUR != '<') || (NXT(1) != '!'))
5603
0
        return;
5604
0
    SKIP(2);
5605
5606
0
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5607
0
  int inputid = ctxt->input->id;
5608
0
  SKIP(8);
5609
0
  if (SKIP_BLANKS_PE == 0) {
5610
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5611
0
         "Space required after '<!NOTATION'\n");
5612
0
      return;
5613
0
  }
5614
5615
0
        name = xmlParseName(ctxt);
5616
0
  if (name == NULL) {
5617
0
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5618
0
      return;
5619
0
  }
5620
0
  if (xmlStrchr(name, ':') != NULL) {
5621
0
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5622
0
         "colons are forbidden from notation names '%s'\n",
5623
0
         name, NULL, NULL);
5624
0
  }
5625
0
  if (SKIP_BLANKS_PE == 0) {
5626
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5627
0
         "Space required after the NOTATION name'\n");
5628
0
      return;
5629
0
  }
5630
5631
  /*
5632
   * Parse the IDs.
5633
   */
5634
0
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5635
0
  SKIP_BLANKS_PE;
5636
5637
0
  if (RAW == '>') {
5638
0
      if (inputid != ctxt->input->id) {
5639
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5640
0
                         "Notation declaration doesn't start and stop"
5641
0
                               " in the same entity\n");
5642
0
      }
5643
0
      NEXT;
5644
0
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5645
0
    (ctxt->sax->notationDecl != NULL))
5646
0
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5647
0
  } else {
5648
0
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5649
0
  }
5650
0
  if (Systemid != NULL) xmlFree(Systemid);
5651
0
  if (Pubid != NULL) xmlFree(Pubid);
5652
0
    }
5653
0
}
5654
5655
/**
5656
 * xmlParseEntityDecl:
5657
 * @ctxt:  an XML parser context
5658
 *
5659
 * DEPRECATED: Internal function, don't use.
5660
 *
5661
 * Parse an entity declaration. Always consumes '<!'.
5662
 *
5663
 * [70] EntityDecl ::= GEDecl | PEDecl
5664
 *
5665
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5666
 *
5667
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5668
 *
5669
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5670
 *
5671
 * [74] PEDef ::= EntityValue | ExternalID
5672
 *
5673
 * [76] NDataDecl ::= S 'NDATA' S Name
5674
 *
5675
 * [ VC: Notation Declared ]
5676
 * The Name must match the declared name of a notation.
5677
 */
5678
5679
void
5680
0
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5681
0
    const xmlChar *name = NULL;
5682
0
    xmlChar *value = NULL;
5683
0
    xmlChar *URI = NULL, *literal = NULL;
5684
0
    const xmlChar *ndata = NULL;
5685
0
    int isParameter = 0;
5686
0
    xmlChar *orig = NULL;
5687
5688
0
    if ((CUR != '<') || (NXT(1) != '!'))
5689
0
        return;
5690
0
    SKIP(2);
5691
5692
    /* GROW; done in the caller */
5693
0
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5694
0
  int inputid = ctxt->input->id;
5695
0
  SKIP(6);
5696
0
  if (SKIP_BLANKS_PE == 0) {
5697
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5698
0
         "Space required after '<!ENTITY'\n");
5699
0
  }
5700
5701
0
  if (RAW == '%') {
5702
0
      NEXT;
5703
0
      if (SKIP_BLANKS_PE == 0) {
5704
0
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5705
0
             "Space required after '%%'\n");
5706
0
      }
5707
0
      isParameter = 1;
5708
0
  }
5709
5710
0
        name = xmlParseName(ctxt);
5711
0
  if (name == NULL) {
5712
0
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5713
0
                     "xmlParseEntityDecl: no name\n");
5714
0
            return;
5715
0
  }
5716
0
  if (xmlStrchr(name, ':') != NULL) {
5717
0
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5718
0
         "colons are forbidden from entities names '%s'\n",
5719
0
         name, NULL, NULL);
5720
0
  }
5721
0
  if (SKIP_BLANKS_PE == 0) {
5722
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5723
0
         "Space required after the entity name\n");
5724
0
  }
5725
5726
  /*
5727
   * handle the various case of definitions...
5728
   */
5729
0
  if (isParameter) {
5730
0
      if ((RAW == '"') || (RAW == '\'')) {
5731
0
          value = xmlParseEntityValue(ctxt, &orig);
5732
0
    if (value) {
5733
0
        if ((ctxt->sax != NULL) &&
5734
0
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5735
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5736
0
                        XML_INTERNAL_PARAMETER_ENTITY,
5737
0
            NULL, NULL, value);
5738
0
    }
5739
0
      } else {
5740
0
          URI = xmlParseExternalID(ctxt, &literal, 1);
5741
0
    if ((URI == NULL) && (literal == NULL)) {
5742
0
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5743
0
    }
5744
0
    if (URI) {
5745
0
                    if (xmlStrchr(URI, '#')) {
5746
0
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5747
0
                    } else {
5748
0
                        if ((ctxt->sax != NULL) &&
5749
0
                            (!ctxt->disableSAX) &&
5750
0
                            (ctxt->sax->entityDecl != NULL))
5751
0
                            ctxt->sax->entityDecl(ctxt->userData, name,
5752
0
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5753
0
                                        literal, URI, NULL);
5754
0
                    }
5755
0
    }
5756
0
      }
5757
0
  } else {
5758
0
      if ((RAW == '"') || (RAW == '\'')) {
5759
0
          value = xmlParseEntityValue(ctxt, &orig);
5760
0
    if ((ctxt->sax != NULL) &&
5761
0
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5762
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5763
0
        XML_INTERNAL_GENERAL_ENTITY,
5764
0
        NULL, NULL, value);
5765
    /*
5766
     * For expat compatibility in SAX mode.
5767
     */
5768
0
    if ((ctxt->myDoc == NULL) ||
5769
0
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5770
0
        if (ctxt->myDoc == NULL) {
5771
0
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5772
0
      if (ctxt->myDoc == NULL) {
5773
0
          xmlErrMemory(ctxt);
5774
0
          goto done;
5775
0
      }
5776
0
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5777
0
        }
5778
0
        if (ctxt->myDoc->intSubset == NULL) {
5779
0
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5780
0
              BAD_CAST "fake", NULL, NULL);
5781
0
                        if (ctxt->myDoc->intSubset == NULL) {
5782
0
                            xmlErrMemory(ctxt);
5783
0
                            goto done;
5784
0
                        }
5785
0
                    }
5786
5787
0
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5788
0
                    NULL, NULL, value);
5789
0
    }
5790
0
      } else {
5791
0
          URI = xmlParseExternalID(ctxt, &literal, 1);
5792
0
    if ((URI == NULL) && (literal == NULL)) {
5793
0
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5794
0
    }
5795
0
    if (URI) {
5796
0
                    if (xmlStrchr(URI, '#')) {
5797
0
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5798
0
                    }
5799
0
    }
5800
0
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5801
0
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5802
0
           "Space required before 'NDATA'\n");
5803
0
    }
5804
0
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5805
0
        SKIP(5);
5806
0
        if (SKIP_BLANKS_PE == 0) {
5807
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5808
0
               "Space required after 'NDATA'\n");
5809
0
        }
5810
0
        ndata = xmlParseName(ctxt);
5811
0
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5812
0
            (ctxt->sax->unparsedEntityDecl != NULL))
5813
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5814
0
            literal, URI, ndata);
5815
0
    } else {
5816
0
        if ((ctxt->sax != NULL) &&
5817
0
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5818
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5819
0
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5820
0
            literal, URI, NULL);
5821
        /*
5822
         * For expat compatibility in SAX mode.
5823
         * assuming the entity replacement was asked for
5824
         */
5825
0
        if ((ctxt->replaceEntities != 0) &&
5826
0
      ((ctxt->myDoc == NULL) ||
5827
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5828
0
      if (ctxt->myDoc == NULL) {
5829
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5830
0
          if (ctxt->myDoc == NULL) {
5831
0
              xmlErrMemory(ctxt);
5832
0
        goto done;
5833
0
          }
5834
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5835
0
      }
5836
5837
0
      if (ctxt->myDoc->intSubset == NULL) {
5838
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5839
0
            BAD_CAST "fake", NULL, NULL);
5840
0
                            if (ctxt->myDoc->intSubset == NULL) {
5841
0
                                xmlErrMemory(ctxt);
5842
0
                                goto done;
5843
0
                            }
5844
0
                        }
5845
0
      xmlSAX2EntityDecl(ctxt, name,
5846
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5847
0
                  literal, URI, NULL);
5848
0
        }
5849
0
    }
5850
0
      }
5851
0
  }
5852
0
  SKIP_BLANKS_PE;
5853
0
  if (RAW != '>') {
5854
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5855
0
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5856
0
      xmlHaltParser(ctxt);
5857
0
  } else {
5858
0
      if (inputid != ctxt->input->id) {
5859
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5860
0
                         "Entity declaration doesn't start and stop in"
5861
0
                               " the same entity\n");
5862
0
      }
5863
0
      NEXT;
5864
0
  }
5865
0
  if (orig != NULL) {
5866
      /*
5867
       * Ugly mechanism to save the raw entity value.
5868
       */
5869
0
      xmlEntityPtr cur = NULL;
5870
5871
0
      if (isParameter) {
5872
0
          if ((ctxt->sax != NULL) &&
5873
0
        (ctxt->sax->getParameterEntity != NULL))
5874
0
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5875
0
      } else {
5876
0
          if ((ctxt->sax != NULL) &&
5877
0
        (ctxt->sax->getEntity != NULL))
5878
0
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5879
0
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5880
0
        cur = xmlSAX2GetEntity(ctxt, name);
5881
0
    }
5882
0
      }
5883
0
            if ((cur != NULL) && (cur->orig == NULL)) {
5884
0
    cur->orig = orig;
5885
0
                orig = NULL;
5886
0
      }
5887
0
  }
5888
5889
0
done:
5890
0
  if (value != NULL) xmlFree(value);
5891
0
  if (URI != NULL) xmlFree(URI);
5892
0
  if (literal != NULL) xmlFree(literal);
5893
0
        if (orig != NULL) xmlFree(orig);
5894
0
    }
5895
0
}
5896
5897
/**
5898
 * xmlParseDefaultDecl:
5899
 * @ctxt:  an XML parser context
5900
 * @value:  Receive a possible fixed default value for the attribute
5901
 *
5902
 * DEPRECATED: Internal function, don't use.
5903
 *
5904
 * Parse an attribute default declaration
5905
 *
5906
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5907
 *
5908
 * [ VC: Required Attribute ]
5909
 * if the default declaration is the keyword #REQUIRED, then the
5910
 * attribute must be specified for all elements of the type in the
5911
 * attribute-list declaration.
5912
 *
5913
 * [ VC: Attribute Default Legal ]
5914
 * The declared default value must meet the lexical constraints of
5915
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5916
 *
5917
 * [ VC: Fixed Attribute Default ]
5918
 * if an attribute has a default value declared with the #FIXED
5919
 * keyword, instances of that attribute must match the default value.
5920
 *
5921
 * [ WFC: No < in Attribute Values ]
5922
 * handled in xmlParseAttValue()
5923
 *
5924
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5925
 *          or XML_ATTRIBUTE_FIXED.
5926
 */
5927
5928
int
5929
0
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5930
0
    int val;
5931
0
    xmlChar *ret;
5932
5933
0
    *value = NULL;
5934
0
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5935
0
  SKIP(9);
5936
0
  return(XML_ATTRIBUTE_REQUIRED);
5937
0
    }
5938
0
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5939
0
  SKIP(8);
5940
0
  return(XML_ATTRIBUTE_IMPLIED);
5941
0
    }
5942
0
    val = XML_ATTRIBUTE_NONE;
5943
0
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5944
0
  SKIP(6);
5945
0
  val = XML_ATTRIBUTE_FIXED;
5946
0
  if (SKIP_BLANKS_PE == 0) {
5947
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5948
0
         "Space required after '#FIXED'\n");
5949
0
  }
5950
0
    }
5951
0
    ret = xmlParseAttValue(ctxt);
5952
0
    if (ret == NULL) {
5953
0
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5954
0
           "Attribute default value declaration error\n");
5955
0
    } else
5956
0
        *value = ret;
5957
0
    return(val);
5958
0
}
5959
5960
/**
5961
 * xmlParseNotationType:
5962
 * @ctxt:  an XML parser context
5963
 *
5964
 * DEPRECATED: Internal function, don't use.
5965
 *
5966
 * parse an Notation attribute type.
5967
 *
5968
 * Note: the leading 'NOTATION' S part has already being parsed...
5969
 *
5970
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5971
 *
5972
 * [ VC: Notation Attributes ]
5973
 * Values of this type must match one of the notation names included
5974
 * in the declaration; all notation names in the declaration must be declared.
5975
 *
5976
 * Returns: the notation attribute tree built while parsing
5977
 */
5978
5979
xmlEnumerationPtr
5980
0
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5981
0
    const xmlChar *name;
5982
0
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5983
5984
0
    if (RAW != '(') {
5985
0
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5986
0
  return(NULL);
5987
0
    }
5988
0
    do {
5989
0
        NEXT;
5990
0
  SKIP_BLANKS_PE;
5991
0
        name = xmlParseName(ctxt);
5992
0
  if (name == NULL) {
5993
0
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5994
0
         "Name expected in NOTATION declaration\n");
5995
0
            xmlFreeEnumeration(ret);
5996
0
      return(NULL);
5997
0
  }
5998
0
  tmp = ret;
5999
0
  while (tmp != NULL) {
6000
0
      if (xmlStrEqual(name, tmp->name)) {
6001
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6002
0
    "standalone: attribute notation value token %s duplicated\n",
6003
0
         name, NULL);
6004
0
    if (!xmlDictOwns(ctxt->dict, name))
6005
0
        xmlFree((xmlChar *) name);
6006
0
    break;
6007
0
      }
6008
0
      tmp = tmp->next;
6009
0
  }
6010
0
  if (tmp == NULL) {
6011
0
      cur = xmlCreateEnumeration(name);
6012
0
      if (cur == NULL) {
6013
0
                xmlErrMemory(ctxt);
6014
0
                xmlFreeEnumeration(ret);
6015
0
                return(NULL);
6016
0
            }
6017
0
      if (last == NULL) ret = last = cur;
6018
0
      else {
6019
0
    last->next = cur;
6020
0
    last = cur;
6021
0
      }
6022
0
  }
6023
0
  SKIP_BLANKS_PE;
6024
0
    } while (RAW == '|');
6025
0
    if (RAW != ')') {
6026
0
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6027
0
        xmlFreeEnumeration(ret);
6028
0
  return(NULL);
6029
0
    }
6030
0
    NEXT;
6031
0
    return(ret);
6032
0
}
6033
6034
/**
6035
 * xmlParseEnumerationType:
6036
 * @ctxt:  an XML parser context
6037
 *
6038
 * DEPRECATED: Internal function, don't use.
6039
 *
6040
 * parse an Enumeration attribute type.
6041
 *
6042
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6043
 *
6044
 * [ VC: Enumeration ]
6045
 * Values of this type must match one of the Nmtoken tokens in
6046
 * the declaration
6047
 *
6048
 * Returns: the enumeration attribute tree built while parsing
6049
 */
6050
6051
xmlEnumerationPtr
6052
0
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6053
0
    xmlChar *name;
6054
0
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6055
6056
0
    if (RAW != '(') {
6057
0
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6058
0
  return(NULL);
6059
0
    }
6060
0
    do {
6061
0
        NEXT;
6062
0
  SKIP_BLANKS_PE;
6063
0
        name = xmlParseNmtoken(ctxt);
6064
0
  if (name == NULL) {
6065
0
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6066
0
      return(ret);
6067
0
  }
6068
0
  tmp = ret;
6069
0
  while (tmp != NULL) {
6070
0
      if (xmlStrEqual(name, tmp->name)) {
6071
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6072
0
    "standalone: attribute enumeration value token %s duplicated\n",
6073
0
         name, NULL);
6074
0
    if (!xmlDictOwns(ctxt->dict, name))
6075
0
        xmlFree(name);
6076
0
    break;
6077
0
      }
6078
0
      tmp = tmp->next;
6079
0
  }
6080
0
  if (tmp == NULL) {
6081
0
      cur = xmlCreateEnumeration(name);
6082
0
      if (!xmlDictOwns(ctxt->dict, name))
6083
0
    xmlFree(name);
6084
0
      if (cur == NULL) {
6085
0
                xmlErrMemory(ctxt);
6086
0
                xmlFreeEnumeration(ret);
6087
0
                return(NULL);
6088
0
            }
6089
0
      if (last == NULL) ret = last = cur;
6090
0
      else {
6091
0
    last->next = cur;
6092
0
    last = cur;
6093
0
      }
6094
0
  }
6095
0
  SKIP_BLANKS_PE;
6096
0
    } while (RAW == '|');
6097
0
    if (RAW != ')') {
6098
0
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6099
0
  return(ret);
6100
0
    }
6101
0
    NEXT;
6102
0
    return(ret);
6103
0
}
6104
6105
/**
6106
 * xmlParseEnumeratedType:
6107
 * @ctxt:  an XML parser context
6108
 * @tree:  the enumeration tree built while parsing
6109
 *
6110
 * DEPRECATED: Internal function, don't use.
6111
 *
6112
 * parse an Enumerated attribute type.
6113
 *
6114
 * [57] EnumeratedType ::= NotationType | Enumeration
6115
 *
6116
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6117
 *
6118
 *
6119
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6120
 */
6121
6122
int
6123
0
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6124
0
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6125
0
  SKIP(8);
6126
0
  if (SKIP_BLANKS_PE == 0) {
6127
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6128
0
         "Space required after 'NOTATION'\n");
6129
0
      return(0);
6130
0
  }
6131
0
  *tree = xmlParseNotationType(ctxt);
6132
0
  if (*tree == NULL) return(0);
6133
0
  return(XML_ATTRIBUTE_NOTATION);
6134
0
    }
6135
0
    *tree = xmlParseEnumerationType(ctxt);
6136
0
    if (*tree == NULL) return(0);
6137
0
    return(XML_ATTRIBUTE_ENUMERATION);
6138
0
}
6139
6140
/**
6141
 * xmlParseAttributeType:
6142
 * @ctxt:  an XML parser context
6143
 * @tree:  the enumeration tree built while parsing
6144
 *
6145
 * DEPRECATED: Internal function, don't use.
6146
 *
6147
 * parse the Attribute list def for an element
6148
 *
6149
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6150
 *
6151
 * [55] StringType ::= 'CDATA'
6152
 *
6153
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6154
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6155
 *
6156
 * Validity constraints for attribute values syntax are checked in
6157
 * xmlValidateAttributeValue()
6158
 *
6159
 * [ VC: ID ]
6160
 * Values of type ID must match the Name production. A name must not
6161
 * appear more than once in an XML document as a value of this type;
6162
 * i.e., ID values must uniquely identify the elements which bear them.
6163
 *
6164
 * [ VC: One ID per Element Type ]
6165
 * No element type may have more than one ID attribute specified.
6166
 *
6167
 * [ VC: ID Attribute Default ]
6168
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6169
 *
6170
 * [ VC: IDREF ]
6171
 * Values of type IDREF must match the Name production, and values
6172
 * of type IDREFS must match Names; each IDREF Name must match the value
6173
 * of an ID attribute on some element in the XML document; i.e. IDREF
6174
 * values must match the value of some ID attribute.
6175
 *
6176
 * [ VC: Entity Name ]
6177
 * Values of type ENTITY must match the Name production, values
6178
 * of type ENTITIES must match Names; each Entity Name must match the
6179
 * name of an unparsed entity declared in the DTD.
6180
 *
6181
 * [ VC: Name Token ]
6182
 * Values of type NMTOKEN must match the Nmtoken production; values
6183
 * of type NMTOKENS must match Nmtokens.
6184
 *
6185
 * Returns the attribute type
6186
 */
6187
int
6188
0
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6189
0
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6190
0
  SKIP(5);
6191
0
  return(XML_ATTRIBUTE_CDATA);
6192
0
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6193
0
  SKIP(6);
6194
0
  return(XML_ATTRIBUTE_IDREFS);
6195
0
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6196
0
  SKIP(5);
6197
0
  return(XML_ATTRIBUTE_IDREF);
6198
0
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6199
0
        SKIP(2);
6200
0
  return(XML_ATTRIBUTE_ID);
6201
0
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6202
0
  SKIP(6);
6203
0
  return(XML_ATTRIBUTE_ENTITY);
6204
0
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6205
0
  SKIP(8);
6206
0
  return(XML_ATTRIBUTE_ENTITIES);
6207
0
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6208
0
  SKIP(8);
6209
0
  return(XML_ATTRIBUTE_NMTOKENS);
6210
0
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6211
0
  SKIP(7);
6212
0
  return(XML_ATTRIBUTE_NMTOKEN);
6213
0
     }
6214
0
     return(xmlParseEnumeratedType(ctxt, tree));
6215
0
}
6216
6217
/**
6218
 * xmlParseAttributeListDecl:
6219
 * @ctxt:  an XML parser context
6220
 *
6221
 * DEPRECATED: Internal function, don't use.
6222
 *
6223
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6224
 *
6225
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6226
 *
6227
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6228
 *
6229
 */
6230
void
6231
0
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6232
0
    const xmlChar *elemName;
6233
0
    const xmlChar *attrName;
6234
0
    xmlEnumerationPtr tree;
6235
6236
0
    if ((CUR != '<') || (NXT(1) != '!'))
6237
0
        return;
6238
0
    SKIP(2);
6239
6240
0
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6241
0
  int inputid = ctxt->input->id;
6242
6243
0
  SKIP(7);
6244
0
  if (SKIP_BLANKS_PE == 0) {
6245
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6246
0
                     "Space required after '<!ATTLIST'\n");
6247
0
  }
6248
0
        elemName = xmlParseName(ctxt);
6249
0
  if (elemName == NULL) {
6250
0
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6251
0
         "ATTLIST: no name for Element\n");
6252
0
      return;
6253
0
  }
6254
0
  SKIP_BLANKS_PE;
6255
0
  GROW;
6256
0
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6257
0
      int type;
6258
0
      int def;
6259
0
      xmlChar *defaultValue = NULL;
6260
6261
0
      GROW;
6262
0
            tree = NULL;
6263
0
      attrName = xmlParseName(ctxt);
6264
0
      if (attrName == NULL) {
6265
0
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6266
0
             "ATTLIST: no name for Attribute\n");
6267
0
    break;
6268
0
      }
6269
0
      GROW;
6270
0
      if (SKIP_BLANKS_PE == 0) {
6271
0
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6272
0
            "Space required after the attribute name\n");
6273
0
    break;
6274
0
      }
6275
6276
0
      type = xmlParseAttributeType(ctxt, &tree);
6277
0
      if (type <= 0) {
6278
0
          break;
6279
0
      }
6280
6281
0
      GROW;
6282
0
      if (SKIP_BLANKS_PE == 0) {
6283
0
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6284
0
             "Space required after the attribute type\n");
6285
0
          if (tree != NULL)
6286
0
        xmlFreeEnumeration(tree);
6287
0
    break;
6288
0
      }
6289
6290
0
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6291
0
      if (def <= 0) {
6292
0
                if (defaultValue != NULL)
6293
0
        xmlFree(defaultValue);
6294
0
          if (tree != NULL)
6295
0
        xmlFreeEnumeration(tree);
6296
0
          break;
6297
0
      }
6298
0
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6299
0
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6300
6301
0
      GROW;
6302
0
            if (RAW != '>') {
6303
0
    if (SKIP_BLANKS_PE == 0) {
6304
0
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6305
0
      "Space required after the attribute default value\n");
6306
0
        if (defaultValue != NULL)
6307
0
      xmlFree(defaultValue);
6308
0
        if (tree != NULL)
6309
0
      xmlFreeEnumeration(tree);
6310
0
        break;
6311
0
    }
6312
0
      }
6313
0
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6314
0
    (ctxt->sax->attributeDecl != NULL))
6315
0
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6316
0
                          type, def, defaultValue, tree);
6317
0
      else if (tree != NULL)
6318
0
    xmlFreeEnumeration(tree);
6319
6320
0
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6321
0
          (def != XML_ATTRIBUTE_IMPLIED) &&
6322
0
    (def != XML_ATTRIBUTE_REQUIRED)) {
6323
0
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6324
0
      }
6325
0
      if (ctxt->sax2) {
6326
0
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6327
0
      }
6328
0
      if (defaultValue != NULL)
6329
0
          xmlFree(defaultValue);
6330
0
      GROW;
6331
0
  }
6332
0
  if (RAW == '>') {
6333
0
      if (inputid != ctxt->input->id) {
6334
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6335
0
                               "Attribute list declaration doesn't start and"
6336
0
                               " stop in the same entity\n");
6337
0
      }
6338
0
      NEXT;
6339
0
  }
6340
0
    }
6341
0
}
6342
6343
/**
6344
 * xmlParseElementMixedContentDecl:
6345
 * @ctxt:  an XML parser context
6346
 * @inputchk:  the input used for the current entity, needed for boundary checks
6347
 *
6348
 * DEPRECATED: Internal function, don't use.
6349
 *
6350
 * parse the declaration for a Mixed Element content
6351
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6352
 *
6353
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6354
 *                '(' S? '#PCDATA' S? ')'
6355
 *
6356
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6357
 *
6358
 * [ VC: No Duplicate Types ]
6359
 * The same name must not appear more than once in a single
6360
 * mixed-content declaration.
6361
 *
6362
 * returns: the list of the xmlElementContentPtr describing the element choices
6363
 */
6364
xmlElementContentPtr
6365
0
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6366
0
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6367
0
    const xmlChar *elem = NULL;
6368
6369
0
    GROW;
6370
0
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6371
0
  SKIP(7);
6372
0
  SKIP_BLANKS_PE;
6373
0
  if (RAW == ')') {
6374
0
      if (ctxt->input->id != inputchk) {
6375
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6376
0
                               "Element content declaration doesn't start and"
6377
0
                               " stop in the same entity\n");
6378
0
      }
6379
0
      NEXT;
6380
0
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6381
0
      if (ret == NULL)
6382
0
                goto mem_error;
6383
0
      if (RAW == '*') {
6384
0
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6385
0
    NEXT;
6386
0
      }
6387
0
      return(ret);
6388
0
  }
6389
0
  if ((RAW == '(') || (RAW == '|')) {
6390
0
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6391
0
      if (ret == NULL)
6392
0
                goto mem_error;
6393
0
  }
6394
0
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6395
0
      NEXT;
6396
0
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6397
0
            if (n == NULL)
6398
0
                goto mem_error;
6399
0
      if (elem == NULL) {
6400
0
    n->c1 = cur;
6401
0
    if (cur != NULL)
6402
0
        cur->parent = n;
6403
0
    ret = cur = n;
6404
0
      } else {
6405
0
          cur->c2 = n;
6406
0
    n->parent = cur;
6407
0
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6408
0
                if (n->c1 == NULL)
6409
0
                    goto mem_error;
6410
0
    n->c1->parent = n;
6411
0
    cur = n;
6412
0
      }
6413
0
      SKIP_BLANKS_PE;
6414
0
      elem = xmlParseName(ctxt);
6415
0
      if (elem == NULL) {
6416
0
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6417
0
      "xmlParseElementMixedContentDecl : Name expected\n");
6418
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
0
    return(NULL);
6420
0
      }
6421
0
      SKIP_BLANKS_PE;
6422
0
      GROW;
6423
0
  }
6424
0
  if ((RAW == ')') && (NXT(1) == '*')) {
6425
0
      if (elem != NULL) {
6426
0
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6427
0
                                   XML_ELEMENT_CONTENT_ELEMENT);
6428
0
    if (cur->c2 == NULL)
6429
0
                    goto mem_error;
6430
0
    cur->c2->parent = cur;
6431
0
            }
6432
0
            if (ret != NULL)
6433
0
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6434
0
      if (ctxt->input->id != inputchk) {
6435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6436
0
                               "Element content declaration doesn't start and"
6437
0
                               " stop in the same entity\n");
6438
0
      }
6439
0
      SKIP(2);
6440
0
  } else {
6441
0
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6443
0
      return(NULL);
6444
0
  }
6445
6446
0
    } else {
6447
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6448
0
    }
6449
0
    return(ret);
6450
6451
0
mem_error:
6452
0
    xmlErrMemory(ctxt);
6453
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6454
0
    return(NULL);
6455
0
}
6456
6457
/**
6458
 * xmlParseElementChildrenContentDeclPriv:
6459
 * @ctxt:  an XML parser context
6460
 * @inputchk:  the input used for the current entity, needed for boundary checks
6461
 * @depth: the level of recursion
6462
 *
6463
 * parse the declaration for a Mixed Element content
6464
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6465
 *
6466
 *
6467
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6468
 *
6469
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6470
 *
6471
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6472
 *
6473
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6474
 *
6475
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6476
 * TODO Parameter-entity replacement text must be properly nested
6477
 *  with parenthesized groups. That is to say, if either of the
6478
 *  opening or closing parentheses in a choice, seq, or Mixed
6479
 *  construct is contained in the replacement text for a parameter
6480
 *  entity, both must be contained in the same replacement text. For
6481
 *  interoperability, if a parameter-entity reference appears in a
6482
 *  choice, seq, or Mixed construct, its replacement text should not
6483
 *  be empty, and neither the first nor last non-blank character of
6484
 *  the replacement text should be a connector (| or ,).
6485
 *
6486
 * Returns the tree of xmlElementContentPtr describing the element
6487
 *          hierarchy.
6488
 */
6489
static xmlElementContentPtr
6490
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6491
0
                                       int depth) {
6492
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6493
0
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6494
0
    const xmlChar *elem;
6495
0
    xmlChar type = 0;
6496
6497
0
    if (depth > maxDepth) {
6498
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6499
0
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6500
0
                "use XML_PARSE_HUGE\n", depth);
6501
0
  return(NULL);
6502
0
    }
6503
0
    SKIP_BLANKS_PE;
6504
0
    GROW;
6505
0
    if (RAW == '(') {
6506
0
  int inputid = ctxt->input->id;
6507
6508
        /* Recurse on first child */
6509
0
  NEXT;
6510
0
  SKIP_BLANKS_PE;
6511
0
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6512
0
                                                           depth + 1);
6513
0
        if (cur == NULL)
6514
0
            return(NULL);
6515
0
  SKIP_BLANKS_PE;
6516
0
  GROW;
6517
0
    } else {
6518
0
  elem = xmlParseName(ctxt);
6519
0
  if (elem == NULL) {
6520
0
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6521
0
      return(NULL);
6522
0
  }
6523
0
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6524
0
  if (cur == NULL) {
6525
0
      xmlErrMemory(ctxt);
6526
0
      return(NULL);
6527
0
  }
6528
0
  GROW;
6529
0
  if (RAW == '?') {
6530
0
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6531
0
      NEXT;
6532
0
  } else if (RAW == '*') {
6533
0
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6534
0
      NEXT;
6535
0
  } else if (RAW == '+') {
6536
0
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6537
0
      NEXT;
6538
0
  } else {
6539
0
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6540
0
  }
6541
0
  GROW;
6542
0
    }
6543
0
    SKIP_BLANKS_PE;
6544
0
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6545
        /*
6546
   * Each loop we parse one separator and one element.
6547
   */
6548
0
        if (RAW == ',') {
6549
0
      if (type == 0) type = CUR;
6550
6551
      /*
6552
       * Detect "Name | Name , Name" error
6553
       */
6554
0
      else if (type != CUR) {
6555
0
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6556
0
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6557
0
                      type);
6558
0
    if ((last != NULL) && (last != ret))
6559
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6560
0
    if (ret != NULL)
6561
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6562
0
    return(NULL);
6563
0
      }
6564
0
      NEXT;
6565
6566
0
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6567
0
      if (op == NULL) {
6568
0
                xmlErrMemory(ctxt);
6569
0
    if ((last != NULL) && (last != ret))
6570
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6571
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6572
0
    return(NULL);
6573
0
      }
6574
0
      if (last == NULL) {
6575
0
    op->c1 = ret;
6576
0
    if (ret != NULL)
6577
0
        ret->parent = op;
6578
0
    ret = cur = op;
6579
0
      } else {
6580
0
          cur->c2 = op;
6581
0
    if (op != NULL)
6582
0
        op->parent = cur;
6583
0
    op->c1 = last;
6584
0
    if (last != NULL)
6585
0
        last->parent = op;
6586
0
    cur =op;
6587
0
    last = NULL;
6588
0
      }
6589
0
  } else if (RAW == '|') {
6590
0
      if (type == 0) type = CUR;
6591
6592
      /*
6593
       * Detect "Name , Name | Name" error
6594
       */
6595
0
      else if (type != CUR) {
6596
0
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6597
0
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6598
0
          type);
6599
0
    if ((last != NULL) && (last != ret))
6600
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6601
0
    if (ret != NULL)
6602
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6603
0
    return(NULL);
6604
0
      }
6605
0
      NEXT;
6606
6607
0
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6608
0
      if (op == NULL) {
6609
0
                xmlErrMemory(ctxt);
6610
0
    if ((last != NULL) && (last != ret))
6611
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6612
0
    if (ret != NULL)
6613
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6614
0
    return(NULL);
6615
0
      }
6616
0
      if (last == NULL) {
6617
0
    op->c1 = ret;
6618
0
    if (ret != NULL)
6619
0
        ret->parent = op;
6620
0
    ret = cur = op;
6621
0
      } else {
6622
0
          cur->c2 = op;
6623
0
    if (op != NULL)
6624
0
        op->parent = cur;
6625
0
    op->c1 = last;
6626
0
    if (last != NULL)
6627
0
        last->parent = op;
6628
0
    cur =op;
6629
0
    last = NULL;
6630
0
      }
6631
0
  } else {
6632
0
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6633
0
      if ((last != NULL) && (last != ret))
6634
0
          xmlFreeDocElementContent(ctxt->myDoc, last);
6635
0
      if (ret != NULL)
6636
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6637
0
      return(NULL);
6638
0
  }
6639
0
  GROW;
6640
0
  SKIP_BLANKS_PE;
6641
0
  GROW;
6642
0
  if (RAW == '(') {
6643
0
      int inputid = ctxt->input->id;
6644
      /* Recurse on second child */
6645
0
      NEXT;
6646
0
      SKIP_BLANKS_PE;
6647
0
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6648
0
                                                          depth + 1);
6649
0
            if (last == NULL) {
6650
0
    if (ret != NULL)
6651
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6652
0
    return(NULL);
6653
0
            }
6654
0
      SKIP_BLANKS_PE;
6655
0
  } else {
6656
0
      elem = xmlParseName(ctxt);
6657
0
      if (elem == NULL) {
6658
0
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6659
0
    if (ret != NULL)
6660
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6661
0
    return(NULL);
6662
0
      }
6663
0
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6664
0
      if (last == NULL) {
6665
0
                xmlErrMemory(ctxt);
6666
0
    if (ret != NULL)
6667
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6668
0
    return(NULL);
6669
0
      }
6670
0
      if (RAW == '?') {
6671
0
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6672
0
    NEXT;
6673
0
      } else if (RAW == '*') {
6674
0
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6675
0
    NEXT;
6676
0
      } else if (RAW == '+') {
6677
0
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6678
0
    NEXT;
6679
0
      } else {
6680
0
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6681
0
      }
6682
0
  }
6683
0
  SKIP_BLANKS_PE;
6684
0
  GROW;
6685
0
    }
6686
0
    if ((cur != NULL) && (last != NULL)) {
6687
0
        cur->c2 = last;
6688
0
  if (last != NULL)
6689
0
      last->parent = cur;
6690
0
    }
6691
0
    if (ctxt->input->id != inputchk) {
6692
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6693
0
                       "Element content declaration doesn't start and stop in"
6694
0
                       " the same entity\n");
6695
0
    }
6696
0
    NEXT;
6697
0
    if (RAW == '?') {
6698
0
  if (ret != NULL) {
6699
0
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6700
0
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6701
0
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6702
0
      else
6703
0
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6704
0
  }
6705
0
  NEXT;
6706
0
    } else if (RAW == '*') {
6707
0
  if (ret != NULL) {
6708
0
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6709
0
      cur = ret;
6710
      /*
6711
       * Some normalization:
6712
       * (a | b* | c?)* == (a | b | c)*
6713
       */
6714
0
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6715
0
    if ((cur->c1 != NULL) &&
6716
0
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6717
0
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6718
0
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6719
0
    if ((cur->c2 != NULL) &&
6720
0
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6721
0
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6722
0
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6723
0
    cur = cur->c2;
6724
0
      }
6725
0
  }
6726
0
  NEXT;
6727
0
    } else if (RAW == '+') {
6728
0
  if (ret != NULL) {
6729
0
      int found = 0;
6730
6731
0
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6732
0
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6733
0
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6734
0
      else
6735
0
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6736
      /*
6737
       * Some normalization:
6738
       * (a | b*)+ == (a | b)*
6739
       * (a | b?)+ == (a | b)*
6740
       */
6741
0
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6742
0
    if ((cur->c1 != NULL) &&
6743
0
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6744
0
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6745
0
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6746
0
        found = 1;
6747
0
    }
6748
0
    if ((cur->c2 != NULL) &&
6749
0
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6750
0
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6751
0
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6752
0
        found = 1;
6753
0
    }
6754
0
    cur = cur->c2;
6755
0
      }
6756
0
      if (found)
6757
0
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6758
0
  }
6759
0
  NEXT;
6760
0
    }
6761
0
    return(ret);
6762
0
}
6763
6764
/**
6765
 * xmlParseElementChildrenContentDecl:
6766
 * @ctxt:  an XML parser context
6767
 * @inputchk:  the input used for the current entity, needed for boundary checks
6768
 *
6769
 * DEPRECATED: Internal function, don't use.
6770
 *
6771
 * parse the declaration for a Mixed Element content
6772
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6773
 *
6774
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6775
 *
6776
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6777
 *
6778
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6779
 *
6780
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6781
 *
6782
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6783
 * TODO Parameter-entity replacement text must be properly nested
6784
 *  with parenthesized groups. That is to say, if either of the
6785
 *  opening or closing parentheses in a choice, seq, or Mixed
6786
 *  construct is contained in the replacement text for a parameter
6787
 *  entity, both must be contained in the same replacement text. For
6788
 *  interoperability, if a parameter-entity reference appears in a
6789
 *  choice, seq, or Mixed construct, its replacement text should not
6790
 *  be empty, and neither the first nor last non-blank character of
6791
 *  the replacement text should be a connector (| or ,).
6792
 *
6793
 * Returns the tree of xmlElementContentPtr describing the element
6794
 *          hierarchy.
6795
 */
6796
xmlElementContentPtr
6797
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6798
    /* stub left for API/ABI compat */
6799
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6800
0
}
6801
6802
/**
6803
 * xmlParseElementContentDecl:
6804
 * @ctxt:  an XML parser context
6805
 * @name:  the name of the element being defined.
6806
 * @result:  the Element Content pointer will be stored here if any
6807
 *
6808
 * DEPRECATED: Internal function, don't use.
6809
 *
6810
 * parse the declaration for an Element content either Mixed or Children,
6811
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6812
 *
6813
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6814
 *
6815
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6816
 */
6817
6818
int
6819
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6820
0
                           xmlElementContentPtr *result) {
6821
6822
0
    xmlElementContentPtr tree = NULL;
6823
0
    int inputid = ctxt->input->id;
6824
0
    int res;
6825
6826
0
    *result = NULL;
6827
6828
0
    if (RAW != '(') {
6829
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6830
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6831
0
  return(-1);
6832
0
    }
6833
0
    NEXT;
6834
0
    GROW;
6835
0
    SKIP_BLANKS_PE;
6836
0
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6837
0
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6838
0
  res = XML_ELEMENT_TYPE_MIXED;
6839
0
    } else {
6840
0
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6841
0
  res = XML_ELEMENT_TYPE_ELEMENT;
6842
0
    }
6843
0
    SKIP_BLANKS_PE;
6844
0
    *result = tree;
6845
0
    return(res);
6846
0
}
6847
6848
/**
6849
 * xmlParseElementDecl:
6850
 * @ctxt:  an XML parser context
6851
 *
6852
 * DEPRECATED: Internal function, don't use.
6853
 *
6854
 * Parse an element declaration. Always consumes '<!'.
6855
 *
6856
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6857
 *
6858
 * [ VC: Unique Element Type Declaration ]
6859
 * No element type may be declared more than once
6860
 *
6861
 * Returns the type of the element, or -1 in case of error
6862
 */
6863
int
6864
0
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6865
0
    const xmlChar *name;
6866
0
    int ret = -1;
6867
0
    xmlElementContentPtr content  = NULL;
6868
6869
0
    if ((CUR != '<') || (NXT(1) != '!'))
6870
0
        return(ret);
6871
0
    SKIP(2);
6872
6873
    /* GROW; done in the caller */
6874
0
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6875
0
  int inputid = ctxt->input->id;
6876
6877
0
  SKIP(7);
6878
0
  if (SKIP_BLANKS_PE == 0) {
6879
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6880
0
               "Space required after 'ELEMENT'\n");
6881
0
      return(-1);
6882
0
  }
6883
0
        name = xmlParseName(ctxt);
6884
0
  if (name == NULL) {
6885
0
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6886
0
         "xmlParseElementDecl: no name for Element\n");
6887
0
      return(-1);
6888
0
  }
6889
0
  if (SKIP_BLANKS_PE == 0) {
6890
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6891
0
         "Space required after the element name\n");
6892
0
  }
6893
0
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6894
0
      SKIP(5);
6895
      /*
6896
       * Element must always be empty.
6897
       */
6898
0
      ret = XML_ELEMENT_TYPE_EMPTY;
6899
0
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6900
0
             (NXT(2) == 'Y')) {
6901
0
      SKIP(3);
6902
      /*
6903
       * Element is a generic container.
6904
       */
6905
0
      ret = XML_ELEMENT_TYPE_ANY;
6906
0
  } else if (RAW == '(') {
6907
0
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6908
0
  } else {
6909
      /*
6910
       * [ WFC: PEs in Internal Subset ] error handling.
6911
       */
6912
0
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6913
0
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6914
0
      return(-1);
6915
0
  }
6916
6917
0
  SKIP_BLANKS_PE;
6918
6919
0
  if (RAW != '>') {
6920
0
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6921
0
      if (content != NULL) {
6922
0
    xmlFreeDocElementContent(ctxt->myDoc, content);
6923
0
      }
6924
0
  } else {
6925
0
      if (inputid != ctxt->input->id) {
6926
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6927
0
                               "Element declaration doesn't start and stop in"
6928
0
                               " the same entity\n");
6929
0
      }
6930
6931
0
      NEXT;
6932
0
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6933
0
    (ctxt->sax->elementDecl != NULL)) {
6934
0
    if (content != NULL)
6935
0
        content->parent = NULL;
6936
0
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6937
0
                           content);
6938
0
    if ((content != NULL) && (content->parent == NULL)) {
6939
        /*
6940
         * this is a trick: if xmlAddElementDecl is called,
6941
         * instead of copying the full tree it is plugged directly
6942
         * if called from the parser. Avoid duplicating the
6943
         * interfaces or change the API/ABI
6944
         */
6945
0
        xmlFreeDocElementContent(ctxt->myDoc, content);
6946
0
    }
6947
0
      } else if (content != NULL) {
6948
0
    xmlFreeDocElementContent(ctxt->myDoc, content);
6949
0
      }
6950
0
  }
6951
0
    }
6952
0
    return(ret);
6953
0
}
6954
6955
/**
6956
 * xmlParseConditionalSections
6957
 * @ctxt:  an XML parser context
6958
 *
6959
 * Parse a conditional section. Always consumes '<!['.
6960
 *
6961
 * [61] conditionalSect ::= includeSect | ignoreSect
6962
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6963
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6964
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6965
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6966
 */
6967
6968
static void
6969
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6970
0
    int *inputIds = NULL;
6971
0
    size_t inputIdsSize = 0;
6972
0
    size_t depth = 0;
6973
6974
0
    while (PARSER_STOPPED(ctxt) == 0) {
6975
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6976
0
            int id = ctxt->input->id;
6977
6978
0
            SKIP(3);
6979
0
            SKIP_BLANKS_PE;
6980
6981
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6982
0
                SKIP(7);
6983
0
                SKIP_BLANKS_PE;
6984
0
                if (RAW != '[') {
6985
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6986
0
                    xmlHaltParser(ctxt);
6987
0
                    goto error;
6988
0
                }
6989
0
                if (ctxt->input->id != id) {
6990
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6991
0
                                   "All markup of the conditional section is"
6992
0
                                   " not in the same entity\n");
6993
0
                }
6994
0
                NEXT;
6995
6996
0
                if (inputIdsSize <= depth) {
6997
0
                    int *tmp;
6998
6999
0
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
7000
0
                    tmp = (int *) xmlRealloc(inputIds,
7001
0
                            inputIdsSize * sizeof(int));
7002
0
                    if (tmp == NULL) {
7003
0
                        xmlErrMemory(ctxt);
7004
0
                        goto error;
7005
0
                    }
7006
0
                    inputIds = tmp;
7007
0
                }
7008
0
                inputIds[depth] = id;
7009
0
                depth++;
7010
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7011
0
                size_t ignoreDepth = 0;
7012
7013
0
                SKIP(6);
7014
0
                SKIP_BLANKS_PE;
7015
0
                if (RAW != '[') {
7016
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7017
0
                    xmlHaltParser(ctxt);
7018
0
                    goto error;
7019
0
                }
7020
0
                if (ctxt->input->id != id) {
7021
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7022
0
                                   "All markup of the conditional section is"
7023
0
                                   " not in the same entity\n");
7024
0
                }
7025
0
                NEXT;
7026
7027
0
                while (PARSER_STOPPED(ctxt) == 0) {
7028
0
                    if (RAW == 0) {
7029
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7030
0
                        goto error;
7031
0
                    }
7032
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7033
0
                        SKIP(3);
7034
0
                        ignoreDepth++;
7035
                        /* Check for integer overflow */
7036
0
                        if (ignoreDepth == 0) {
7037
0
                            xmlErrMemory(ctxt);
7038
0
                            goto error;
7039
0
                        }
7040
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
7041
0
                               (NXT(2) == '>')) {
7042
0
                        SKIP(3);
7043
0
                        if (ignoreDepth == 0)
7044
0
                            break;
7045
0
                        ignoreDepth--;
7046
0
                    } else {
7047
0
                        NEXT;
7048
0
                    }
7049
0
                }
7050
7051
0
                if (ctxt->input->id != id) {
7052
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7053
0
                                   "All markup of the conditional section is"
7054
0
                                   " not in the same entity\n");
7055
0
                }
7056
0
            } else {
7057
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7058
0
                xmlHaltParser(ctxt);
7059
0
                goto error;
7060
0
            }
7061
0
        } else if ((depth > 0) &&
7062
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7063
0
            depth--;
7064
0
            if (ctxt->input->id != inputIds[depth]) {
7065
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7066
0
                               "All markup of the conditional section is not"
7067
0
                               " in the same entity\n");
7068
0
            }
7069
0
            SKIP(3);
7070
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7071
0
            xmlParseMarkupDecl(ctxt);
7072
0
        } else {
7073
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7074
0
            xmlHaltParser(ctxt);
7075
0
            goto error;
7076
0
        }
7077
7078
0
        if (depth == 0)
7079
0
            break;
7080
7081
0
        SKIP_BLANKS_PE;
7082
0
        SHRINK;
7083
0
        GROW;
7084
0
    }
7085
7086
0
error:
7087
0
    xmlFree(inputIds);
7088
0
}
7089
7090
/**
7091
 * xmlParseMarkupDecl:
7092
 * @ctxt:  an XML parser context
7093
 *
7094
 * DEPRECATED: Internal function, don't use.
7095
 *
7096
 * Parse markup declarations. Always consumes '<!' or '<?'.
7097
 *
7098
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7099
 *                     NotationDecl | PI | Comment
7100
 *
7101
 * [ VC: Proper Declaration/PE Nesting ]
7102
 * Parameter-entity replacement text must be properly nested with
7103
 * markup declarations. That is to say, if either the first character
7104
 * or the last character of a markup declaration (markupdecl above) is
7105
 * contained in the replacement text for a parameter-entity reference,
7106
 * both must be contained in the same replacement text.
7107
 *
7108
 * [ WFC: PEs in Internal Subset ]
7109
 * In the internal DTD subset, parameter-entity references can occur
7110
 * only where markup declarations can occur, not within markup declarations.
7111
 * (This does not apply to references that occur in external parameter
7112
 * entities or to the external subset.)
7113
 */
7114
void
7115
0
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7116
0
    GROW;
7117
0
    if (CUR == '<') {
7118
0
        if (NXT(1) == '!') {
7119
0
      switch (NXT(2)) {
7120
0
          case 'E':
7121
0
        if (NXT(3) == 'L')
7122
0
      xmlParseElementDecl(ctxt);
7123
0
        else if (NXT(3) == 'N')
7124
0
      xmlParseEntityDecl(ctxt);
7125
0
                    else
7126
0
                        SKIP(2);
7127
0
        break;
7128
0
          case 'A':
7129
0
        xmlParseAttributeListDecl(ctxt);
7130
0
        break;
7131
0
          case 'N':
7132
0
        xmlParseNotationDecl(ctxt);
7133
0
        break;
7134
0
          case '-':
7135
0
        xmlParseComment(ctxt);
7136
0
        break;
7137
0
    default:
7138
        /* there is an error but it will be detected later */
7139
0
                    SKIP(2);
7140
0
        break;
7141
0
      }
7142
0
  } else if (NXT(1) == '?') {
7143
0
      xmlParsePI(ctxt);
7144
0
  }
7145
0
    }
7146
0
}
7147
7148
/**
7149
 * xmlParseTextDecl:
7150
 * @ctxt:  an XML parser context
7151
 *
7152
 * DEPRECATED: Internal function, don't use.
7153
 *
7154
 * parse an XML declaration header for external entities
7155
 *
7156
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7157
 */
7158
7159
void
7160
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7161
0
    xmlChar *version;
7162
7163
    /*
7164
     * We know that '<?xml' is here.
7165
     */
7166
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7167
0
  SKIP(5);
7168
0
    } else {
7169
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7170
0
  return;
7171
0
    }
7172
7173
0
    if (SKIP_BLANKS == 0) {
7174
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7175
0
           "Space needed after '<?xml'\n");
7176
0
    }
7177
7178
    /*
7179
     * We may have the VersionInfo here.
7180
     */
7181
0
    version = xmlParseVersionInfo(ctxt);
7182
0
    if (version == NULL) {
7183
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7184
0
        if (version == NULL) {
7185
0
            xmlErrMemory(ctxt);
7186
0
            return;
7187
0
        }
7188
0
    } else {
7189
0
  if (SKIP_BLANKS == 0) {
7190
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7191
0
               "Space needed here\n");
7192
0
  }
7193
0
    }
7194
0
    ctxt->input->version = version;
7195
7196
    /*
7197
     * We must have the encoding declaration
7198
     */
7199
0
    xmlParseEncodingDecl(ctxt);
7200
7201
0
    SKIP_BLANKS;
7202
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7203
0
        SKIP(2);
7204
0
    } else if (RAW == '>') {
7205
        /* Deprecated old WD ... */
7206
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7207
0
  NEXT;
7208
0
    } else {
7209
0
        int c;
7210
7211
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7212
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7213
0
            NEXT;
7214
0
            if (c == '>')
7215
0
                break;
7216
0
        }
7217
0
    }
7218
0
}
7219
7220
/**
7221
 * xmlParseExternalSubset:
7222
 * @ctxt:  an XML parser context
7223
 * @ExternalID: the external identifier
7224
 * @SystemID: the system identifier (or URL)
7225
 *
7226
 * parse Markup declarations from an external subset
7227
 *
7228
 * [30] extSubset ::= textDecl? extSubsetDecl
7229
 *
7230
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7231
 */
7232
void
7233
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7234
0
                       const xmlChar *SystemID) {
7235
0
    int oldInputNr;
7236
7237
0
    xmlCtxtInitializeLate(ctxt);
7238
7239
0
    xmlDetectEncoding(ctxt);
7240
7241
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7242
0
  xmlParseTextDecl(ctxt);
7243
0
    }
7244
0
    if (ctxt->myDoc == NULL) {
7245
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7246
0
  if (ctxt->myDoc == NULL) {
7247
0
      xmlErrMemory(ctxt);
7248
0
      return;
7249
0
  }
7250
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7251
0
    }
7252
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7253
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7254
0
        xmlErrMemory(ctxt);
7255
0
    }
7256
7257
0
    ctxt->inSubset = 2;
7258
0
    oldInputNr = ctxt->inputNr;
7259
7260
0
    SKIP_BLANKS_PE;
7261
0
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7262
0
           (!PARSER_STOPPED(ctxt))) {
7263
0
  GROW;
7264
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7265
0
            xmlParseConditionalSections(ctxt);
7266
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7267
0
            xmlParseMarkupDecl(ctxt);
7268
0
        } else {
7269
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7270
0
            xmlHaltParser(ctxt);
7271
0
            return;
7272
0
        }
7273
0
        SKIP_BLANKS_PE;
7274
0
        SHRINK;
7275
0
    }
7276
7277
0
    while (ctxt->inputNr > oldInputNr)
7278
0
        xmlPopPE(ctxt);
7279
7280
0
    if (RAW != 0) {
7281
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7282
0
    }
7283
0
}
7284
7285
/**
7286
 * xmlParseReference:
7287
 * @ctxt:  an XML parser context
7288
 *
7289
 * DEPRECATED: Internal function, don't use.
7290
 *
7291
 * parse and handle entity references in content, depending on the SAX
7292
 * interface, this may end-up in a call to character() if this is a
7293
 * CharRef, a predefined entity, if there is no reference() callback.
7294
 * or if the parser was asked to switch to that mode.
7295
 *
7296
 * Always consumes '&'.
7297
 *
7298
 * [67] Reference ::= EntityRef | CharRef
7299
 */
7300
void
7301
0
xmlParseReference(xmlParserCtxtPtr ctxt) {
7302
0
    xmlEntityPtr ent = NULL;
7303
0
    const xmlChar *name;
7304
0
    xmlChar *val;
7305
7306
0
    if (RAW != '&')
7307
0
        return;
7308
7309
    /*
7310
     * Simple case of a CharRef
7311
     */
7312
0
    if (NXT(1) == '#') {
7313
0
  int i = 0;
7314
0
  xmlChar out[16];
7315
0
  int value = xmlParseCharRef(ctxt);
7316
7317
0
  if (value == 0)
7318
0
      return;
7319
7320
        /*
7321
         * Just encode the value in UTF-8
7322
         */
7323
0
        COPY_BUF(out, i, value);
7324
0
        out[i] = 0;
7325
0
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7326
0
            (!ctxt->disableSAX))
7327
0
            ctxt->sax->characters(ctxt->userData, out, i);
7328
0
  return;
7329
0
    }
7330
7331
    /*
7332
     * We are seeing an entity reference
7333
     */
7334
0
    name = xmlParseEntityRefInternal(ctxt);
7335
0
    if (name == NULL)
7336
0
        return;
7337
0
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7338
0
    if (ent == NULL) {
7339
        /*
7340
         * Create a reference for undeclared entities.
7341
         */
7342
0
        if ((ctxt->replaceEntities == 0) &&
7343
0
            (ctxt->sax != NULL) &&
7344
0
            (ctxt->disableSAX == 0) &&
7345
0
            (ctxt->sax->reference != NULL)) {
7346
0
            ctxt->sax->reference(ctxt->userData, name);
7347
0
        }
7348
0
        return;
7349
0
    }
7350
0
    if (!ctxt->wellFormed)
7351
0
  return;
7352
7353
    /* special case of predefined entities */
7354
0
    if ((ent->name == NULL) ||
7355
0
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7356
0
  val = ent->content;
7357
0
  if (val == NULL) return;
7358
  /*
7359
   * inline the entity.
7360
   */
7361
0
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7362
0
      (!ctxt->disableSAX))
7363
0
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7364
0
  return;
7365
0
    }
7366
7367
    /*
7368
     * The first reference to the entity trigger a parsing phase
7369
     * where the ent->children is filled with the result from
7370
     * the parsing.
7371
     * Note: external parsed entities will not be loaded, it is not
7372
     * required for a non-validating parser, unless the parsing option
7373
     * of validating, or substituting entities were given. Doing so is
7374
     * far more secure as the parser will only process data coming from
7375
     * the document entity by default.
7376
     *
7377
     * FIXME: This doesn't work correctly since entities can be
7378
     * expanded with different namespace declarations in scope.
7379
     * For example:
7380
     *
7381
     * <!DOCTYPE doc [
7382
     *   <!ENTITY ent "<ns:elem/>">
7383
     * ]>
7384
     * <doc>
7385
     *   <decl1 xmlns:ns="urn:ns1">
7386
     *     &ent;
7387
     *   </decl1>
7388
     *   <decl2 xmlns:ns="urn:ns2">
7389
     *     &ent;
7390
     *   </decl2>
7391
     * </doc>
7392
     *
7393
     * Proposed fix:
7394
     *
7395
     * - Ignore current namespace declarations when parsing the
7396
     *   entity. If a prefix can't be resolved, don't report an error
7397
     *   but mark it as unresolved.
7398
     * - Try to resolve these prefixes when expanding the entity.
7399
     *   This will require a specialized version of xmlStaticCopyNode
7400
     *   which can also make use of the namespace hash table to avoid
7401
     *   quadratic behavior.
7402
     *
7403
     * Alternatively, we could simply reparse the entity on each
7404
     * expansion like we already do with custom SAX callbacks.
7405
     * External entity content should be cached in this case.
7406
     */
7407
0
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7408
0
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7409
0
         ((ctxt->replaceEntities) ||
7410
0
          (ctxt->validate)))) {
7411
0
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7412
0
            xmlCtxtParseEntity(ctxt, ent);
7413
0
        } else if (ent->children == NULL) {
7414
            /*
7415
             * Probably running in SAX mode and the callbacks don't
7416
             * build the entity content. Parse the entity again.
7417
             *
7418
             * This will also be triggered in normal tree builder mode
7419
             * if an entity happens to be empty, causing unnecessary
7420
             * reloads. It's hard to come up with a reliable check in
7421
             * which mode we're running.
7422
             */
7423
0
            xmlCtxtParseEntity(ctxt, ent);
7424
0
        }
7425
0
    }
7426
7427
    /*
7428
     * We also check for amplification if entities aren't substituted.
7429
     * They might be expanded later.
7430
     */
7431
0
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7432
0
        return;
7433
7434
0
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7435
0
        return;
7436
7437
0
    if (ctxt->replaceEntities == 0) {
7438
  /*
7439
   * Create a reference
7440
   */
7441
0
        if (ctxt->sax->reference != NULL)
7442
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7443
0
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7444
0
        xmlNodePtr copy, cur;
7445
7446
        /*
7447
         * Seems we are generating the DOM content, copy the tree
7448
   */
7449
0
        cur = ent->children;
7450
7451
        /*
7452
         * Handle first text node with SAX to coalesce text efficiently
7453
         */
7454
0
        if ((cur->type == XML_TEXT_NODE) ||
7455
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7456
0
            int len = xmlStrlen(cur->content);
7457
7458
0
            if ((cur->type == XML_TEXT_NODE) ||
7459
0
                (ctxt->sax->cdataBlock == NULL)) {
7460
0
                if (ctxt->sax->characters != NULL)
7461
0
                    ctxt->sax->characters(ctxt, cur->content, len);
7462
0
            } else {
7463
0
                if (ctxt->sax->cdataBlock != NULL)
7464
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7465
0
            }
7466
7467
0
            cur = cur->next;
7468
0
        }
7469
7470
0
        while (cur != NULL) {
7471
0
            xmlNodePtr last;
7472
7473
            /*
7474
             * Handle last text node with SAX to coalesce text efficiently
7475
             */
7476
0
            if ((cur->next == NULL) &&
7477
0
                ((cur->type == XML_TEXT_NODE) ||
7478
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7479
0
                int len = xmlStrlen(cur->content);
7480
7481
0
                if ((cur->type == XML_TEXT_NODE) ||
7482
0
                    (ctxt->sax->cdataBlock == NULL)) {
7483
0
                    if (ctxt->sax->characters != NULL)
7484
0
                        ctxt->sax->characters(ctxt, cur->content, len);
7485
0
                } else {
7486
0
                    if (ctxt->sax->cdataBlock != NULL)
7487
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7488
0
                }
7489
7490
0
                break;
7491
0
            }
7492
7493
            /*
7494
             * Reset coalesce buffer stats only for non-text nodes.
7495
             */
7496
0
            ctxt->nodemem = 0;
7497
0
            ctxt->nodelen = 0;
7498
7499
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7500
7501
0
            if (copy == NULL) {
7502
0
                xmlErrMemory(ctxt);
7503
0
                break;
7504
0
            }
7505
7506
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7507
                /* Needed for reader */
7508
0
                copy->extra = cur->extra;
7509
                /* Maybe needed for reader */
7510
0
                copy->_private = cur->_private;
7511
0
            }
7512
7513
0
            copy->parent = ctxt->node;
7514
0
            last = ctxt->node->last;
7515
0
            if (last == NULL) {
7516
0
                ctxt->node->children = copy;
7517
0
            } else {
7518
0
                last->next = copy;
7519
0
                copy->prev = last;
7520
0
            }
7521
0
            ctxt->node->last = copy;
7522
7523
0
            cur = cur->next;
7524
0
        }
7525
0
    }
7526
0
}
7527
7528
static void
7529
0
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7530
    /*
7531
     * [ WFC: Entity Declared ]
7532
     * In a document without any DTD, a document with only an
7533
     * internal DTD subset which contains no parameter entity
7534
     * references, or a document with "standalone='yes'", the
7535
     * Name given in the entity reference must match that in an
7536
     * entity declaration, except that well-formed documents
7537
     * need not declare any of the following entities: amp, lt,
7538
     * gt, apos, quot.
7539
     * The declaration of a parameter entity must precede any
7540
     * reference to it.
7541
     * Similarly, the declaration of a general entity must
7542
     * precede any reference to it which appears in a default
7543
     * value in an attribute-list declaration. Note that if
7544
     * entities are declared in the external subset or in
7545
     * external parameter entities, a non-validating processor
7546
     * is not obligated to read and process their declarations;
7547
     * for such documents, the rule that an entity must be
7548
     * declared is a well-formedness constraint only if
7549
     * standalone='yes'.
7550
     */
7551
0
    if ((ctxt->standalone == 1) ||
7552
0
        ((ctxt->hasExternalSubset == 0) &&
7553
0
         (ctxt->hasPErefs == 0))) {
7554
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7555
0
                          "Entity '%s' not defined\n", name);
7556
0
    } else if (ctxt->validate) {
7557
        /*
7558
         * [ VC: Entity Declared ]
7559
         * In a document with an external subset or external
7560
         * parameter entities with "standalone='no'", ...
7561
         * ... The declaration of a parameter entity must
7562
         * precede any reference to it...
7563
         */
7564
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7565
0
                         "Entity '%s' not defined\n", name, NULL);
7566
0
    } else if ((ctxt->loadsubset) ||
7567
0
               ((ctxt->replaceEntities) &&
7568
0
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7569
        /*
7570
         * Also raise a non-fatal error
7571
         *
7572
         * - if the external subset is loaded and all entity declarations
7573
         *   should be available, or
7574
         * - entity substition was requested without restricting
7575
         *   external entity access.
7576
         */
7577
0
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7578
0
                     "Entity '%s' not defined\n", name);
7579
0
    } else {
7580
0
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7581
0
                      "Entity '%s' not defined\n", name, NULL);
7582
0
    }
7583
7584
0
    ctxt->valid = 0;
7585
0
}
7586
7587
static xmlEntityPtr
7588
0
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7589
0
    xmlEntityPtr ent;
7590
7591
    /*
7592
     * Predefined entities override any extra definition
7593
     */
7594
0
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7595
0
        ent = xmlGetPredefinedEntity(name);
7596
0
        if (ent != NULL)
7597
0
            return(ent);
7598
0
    }
7599
7600
    /*
7601
     * Ask first SAX for entity resolution, otherwise try the
7602
     * entities which may have stored in the parser context.
7603
     */
7604
0
    if (ctxt->sax != NULL) {
7605
0
  if (ctxt->sax->getEntity != NULL)
7606
0
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7607
0
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7608
0
      (ctxt->options & XML_PARSE_OLDSAX))
7609
0
      ent = xmlGetPredefinedEntity(name);
7610
0
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7611
0
      (ctxt->userData==ctxt)) {
7612
0
      ent = xmlSAX2GetEntity(ctxt, name);
7613
0
  }
7614
0
    }
7615
7616
0
    if (ent == NULL) {
7617
0
        xmlHandleUndeclaredEntity(ctxt, name);
7618
0
    }
7619
7620
    /*
7621
     * [ WFC: Parsed Entity ]
7622
     * An entity reference must not contain the name of an
7623
     * unparsed entity
7624
     */
7625
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7626
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7627
0
     "Entity reference to unparsed entity %s\n", name);
7628
0
        ent = NULL;
7629
0
    }
7630
7631
    /*
7632
     * [ WFC: No External Entity References ]
7633
     * Attribute values cannot contain direct or indirect
7634
     * entity references to external entities.
7635
     */
7636
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7637
0
        if (inAttr) {
7638
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7639
0
                 "Attribute references external entity '%s'\n", name);
7640
0
            ent = NULL;
7641
0
        }
7642
0
    }
7643
7644
0
    return(ent);
7645
0
}
7646
7647
/**
7648
 * xmlParseEntityRefInternal:
7649
 * @ctxt:  an XML parser context
7650
 * @inAttr:  whether we are in an attribute value
7651
 *
7652
 * Parse an entity reference. Always consumes '&'.
7653
 *
7654
 * [68] EntityRef ::= '&' Name ';'
7655
 *
7656
 * Returns the name, or NULL in case of error.
7657
 */
7658
static const xmlChar *
7659
0
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7660
0
    const xmlChar *name;
7661
7662
0
    GROW;
7663
7664
0
    if (RAW != '&')
7665
0
        return(NULL);
7666
0
    NEXT;
7667
0
    name = xmlParseName(ctxt);
7668
0
    if (name == NULL) {
7669
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7670
0
           "xmlParseEntityRef: no name\n");
7671
0
        return(NULL);
7672
0
    }
7673
0
    if (RAW != ';') {
7674
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7675
0
  return(NULL);
7676
0
    }
7677
0
    NEXT;
7678
7679
0
    return(name);
7680
0
}
7681
7682
/**
7683
 * xmlParseEntityRef:
7684
 * @ctxt:  an XML parser context
7685
 *
7686
 * DEPRECATED: Internal function, don't use.
7687
 *
7688
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7689
 */
7690
xmlEntityPtr
7691
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7692
0
    const xmlChar *name;
7693
7694
0
    if (ctxt == NULL)
7695
0
        return(NULL);
7696
7697
0
    name = xmlParseEntityRefInternal(ctxt);
7698
0
    if (name == NULL)
7699
0
        return(NULL);
7700
7701
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7702
0
}
7703
7704
/**
7705
 * xmlParseStringEntityRef:
7706
 * @ctxt:  an XML parser context
7707
 * @str:  a pointer to an index in the string
7708
 *
7709
 * parse ENTITY references declarations, but this version parses it from
7710
 * a string value.
7711
 *
7712
 * [68] EntityRef ::= '&' Name ';'
7713
 *
7714
 * [ WFC: Entity Declared ]
7715
 * In a document without any DTD, a document with only an internal DTD
7716
 * subset which contains no parameter entity references, or a document
7717
 * with "standalone='yes'", the Name given in the entity reference
7718
 * must match that in an entity declaration, except that well-formed
7719
 * documents need not declare any of the following entities: amp, lt,
7720
 * gt, apos, quot.  The declaration of a parameter entity must precede
7721
 * any reference to it.  Similarly, the declaration of a general entity
7722
 * must precede any reference to it which appears in a default value in an
7723
 * attribute-list declaration. Note that if entities are declared in the
7724
 * external subset or in external parameter entities, a non-validating
7725
 * processor is not obligated to read and process their declarations;
7726
 * for such documents, the rule that an entity must be declared is a
7727
 * well-formedness constraint only if standalone='yes'.
7728
 *
7729
 * [ WFC: Parsed Entity ]
7730
 * An entity reference must not contain the name of an unparsed entity
7731
 *
7732
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7733
 * is updated to the current location in the string.
7734
 */
7735
static xmlChar *
7736
0
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7737
0
    xmlChar *name;
7738
0
    const xmlChar *ptr;
7739
0
    xmlChar cur;
7740
7741
0
    if ((str == NULL) || (*str == NULL))
7742
0
        return(NULL);
7743
0
    ptr = *str;
7744
0
    cur = *ptr;
7745
0
    if (cur != '&')
7746
0
  return(NULL);
7747
7748
0
    ptr++;
7749
0
    name = xmlParseStringName(ctxt, &ptr);
7750
0
    if (name == NULL) {
7751
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7752
0
           "xmlParseStringEntityRef: no name\n");
7753
0
  *str = ptr;
7754
0
  return(NULL);
7755
0
    }
7756
0
    if (*ptr != ';') {
7757
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7758
0
        xmlFree(name);
7759
0
  *str = ptr;
7760
0
  return(NULL);
7761
0
    }
7762
0
    ptr++;
7763
7764
0
    *str = ptr;
7765
0
    return(name);
7766
0
}
7767
7768
/**
7769
 * xmlParsePEReference:
7770
 * @ctxt:  an XML parser context
7771
 *
7772
 * DEPRECATED: Internal function, don't use.
7773
 *
7774
 * Parse a parameter entity reference. Always consumes '%'.
7775
 *
7776
 * The entity content is handled directly by pushing it's content as
7777
 * a new input stream.
7778
 *
7779
 * [69] PEReference ::= '%' Name ';'
7780
 *
7781
 * [ WFC: No Recursion ]
7782
 * A parsed entity must not contain a recursive
7783
 * reference to itself, either directly or indirectly.
7784
 *
7785
 * [ WFC: Entity Declared ]
7786
 * In a document without any DTD, a document with only an internal DTD
7787
 * subset which contains no parameter entity references, or a document
7788
 * with "standalone='yes'", ...  ... The declaration of a parameter
7789
 * entity must precede any reference to it...
7790
 *
7791
 * [ VC: Entity Declared ]
7792
 * In a document with an external subset or external parameter entities
7793
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7794
 * must precede any reference to it...
7795
 *
7796
 * [ WFC: In DTD ]
7797
 * Parameter-entity references may only appear in the DTD.
7798
 * NOTE: misleading but this is handled.
7799
 */
7800
void
7801
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7802
0
{
7803
0
    const xmlChar *name;
7804
0
    xmlEntityPtr entity = NULL;
7805
0
    xmlParserInputPtr input;
7806
7807
0
    if (RAW != '%')
7808
0
        return;
7809
0
    NEXT;
7810
0
    name = xmlParseName(ctxt);
7811
0
    if (name == NULL) {
7812
0
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7813
0
  return;
7814
0
    }
7815
0
    if (RAW != ';') {
7816
0
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7817
0
        return;
7818
0
    }
7819
7820
0
    NEXT;
7821
7822
    /* Must be set before xmlHandleUndeclaredEntity */
7823
0
    ctxt->hasPErefs = 1;
7824
7825
    /*
7826
     * Request the entity from SAX
7827
     */
7828
0
    if ((ctxt->sax != NULL) &&
7829
0
  (ctxt->sax->getParameterEntity != NULL))
7830
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7831
7832
0
    if (entity == NULL) {
7833
0
        xmlHandleUndeclaredEntity(ctxt, name);
7834
0
    } else {
7835
  /*
7836
   * Internal checking in case the entity quest barfed
7837
   */
7838
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7839
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7840
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7841
0
      "Internal: %%%s; is not a parameter entity\n",
7842
0
        name, NULL);
7843
0
  } else {
7844
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7845
0
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7846
0
     ((ctxt->loadsubset == 0) &&
7847
0
      (ctxt->replaceEntities == 0) &&
7848
0
      (ctxt->validate == 0))))
7849
0
    return;
7850
7851
0
            if (entity->flags & XML_ENT_EXPANDING) {
7852
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7853
0
                xmlHaltParser(ctxt);
7854
0
                return;
7855
0
            }
7856
7857
0
      input = xmlNewEntityInputStream(ctxt, entity);
7858
0
      if (xmlPushInput(ctxt, input) < 0) {
7859
0
                xmlFreeInputStream(input);
7860
0
    return;
7861
0
            }
7862
7863
0
            entity->flags |= XML_ENT_EXPANDING;
7864
7865
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7866
0
                xmlDetectEncoding(ctxt);
7867
7868
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7869
0
                    (IS_BLANK_CH(NXT(5)))) {
7870
0
                    xmlParseTextDecl(ctxt);
7871
0
                }
7872
0
            }
7873
0
  }
7874
0
    }
7875
0
}
7876
7877
/**
7878
 * xmlLoadEntityContent:
7879
 * @ctxt:  an XML parser context
7880
 * @entity: an unloaded system entity
7881
 *
7882
 * Load the original content of the given system entity from the
7883
 * ExternalID/SystemID given. This is to be used for Included in Literal
7884
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7885
 *
7886
 * Returns 0 in case of success and -1 in case of failure
7887
 */
7888
static int
7889
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7890
0
    xmlParserInputPtr oldinput, input = NULL;
7891
0
    xmlParserInputPtr *oldinputTab;
7892
0
    const xmlChar *oldencoding;
7893
0
    xmlChar *content = NULL;
7894
0
    size_t length, i;
7895
0
    int oldinputNr, oldinputMax;
7896
0
    int ret = -1;
7897
0
    int res;
7898
7899
0
    if ((ctxt == NULL) || (entity == NULL) ||
7900
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7901
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7902
0
  (entity->content != NULL)) {
7903
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7904
0
              "xmlLoadEntityContent parameter error");
7905
0
        return(-1);
7906
0
    }
7907
7908
0
    input = xmlLoadExternalEntity((char *) entity->URI,
7909
0
           (char *) entity->ExternalID, ctxt);
7910
0
    if (input == NULL)
7911
0
        return(-1);
7912
7913
0
    oldinput = ctxt->input;
7914
0
    oldinputNr = ctxt->inputNr;
7915
0
    oldinputMax = ctxt->inputMax;
7916
0
    oldinputTab = ctxt->inputTab;
7917
0
    oldencoding = ctxt->encoding;
7918
7919
0
    ctxt->input = NULL;
7920
0
    ctxt->inputNr = 0;
7921
0
    ctxt->inputMax = 1;
7922
0
    ctxt->encoding = NULL;
7923
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7924
0
    if (ctxt->inputTab == NULL) {
7925
0
        xmlErrMemory(ctxt);
7926
0
        xmlFreeInputStream(input);
7927
0
        goto error;
7928
0
    }
7929
7930
0
    xmlBufResetInput(input->buf->buffer, input);
7931
7932
0
    inputPush(ctxt, input);
7933
7934
0
    xmlDetectEncoding(ctxt);
7935
7936
    /*
7937
     * Parse a possible text declaration first
7938
     */
7939
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7940
0
  xmlParseTextDecl(ctxt);
7941
        /*
7942
         * An XML-1.0 document can't reference an entity not XML-1.0
7943
         */
7944
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7945
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7946
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7947
0
                           "Version mismatch between document and entity\n");
7948
0
        }
7949
0
    }
7950
7951
0
    length = input->cur - input->base;
7952
0
    xmlBufShrink(input->buf->buffer, length);
7953
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7954
7955
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7956
0
        ;
7957
7958
0
    xmlBufResetInput(input->buf->buffer, input);
7959
7960
0
    if (res < 0) {
7961
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7962
0
        goto error;
7963
0
    }
7964
7965
0
    length = xmlBufUse(input->buf->buffer);
7966
0
    content = xmlBufDetach(input->buf->buffer);
7967
7968
0
    if (length > INT_MAX) {
7969
0
        xmlErrMemory(ctxt);
7970
0
        goto error;
7971
0
    }
7972
7973
0
    for (i = 0; i < length; ) {
7974
0
        int clen = length - i;
7975
0
        int c = xmlGetUTF8Char(content + i, &clen);
7976
7977
0
        if ((c < 0) || (!IS_CHAR(c))) {
7978
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7979
0
                              "xmlLoadEntityContent: invalid char value %d\n",
7980
0
                              content[i]);
7981
0
            goto error;
7982
0
        }
7983
0
        i += clen;
7984
0
    }
7985
7986
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7987
0
    entity->content = content;
7988
0
    entity->length = length;
7989
0
    content = NULL;
7990
0
    ret = 0;
7991
7992
0
error:
7993
0
    while (ctxt->inputNr > 0)
7994
0
        xmlFreeInputStream(inputPop(ctxt));
7995
0
    xmlFree(ctxt->inputTab);
7996
0
    xmlFree((xmlChar *) ctxt->encoding);
7997
7998
0
    ctxt->input = oldinput;
7999
0
    ctxt->inputNr = oldinputNr;
8000
0
    ctxt->inputMax = oldinputMax;
8001
0
    ctxt->inputTab = oldinputTab;
8002
0
    ctxt->encoding = oldencoding;
8003
8004
0
    xmlFree(content);
8005
8006
0
    return(ret);
8007
0
}
8008
8009
/**
8010
 * xmlParseStringPEReference:
8011
 * @ctxt:  an XML parser context
8012
 * @str:  a pointer to an index in the string
8013
 *
8014
 * parse PEReference declarations
8015
 *
8016
 * [69] PEReference ::= '%' Name ';'
8017
 *
8018
 * [ WFC: No Recursion ]
8019
 * A parsed entity must not contain a recursive
8020
 * reference to itself, either directly or indirectly.
8021
 *
8022
 * [ WFC: Entity Declared ]
8023
 * In a document without any DTD, a document with only an internal DTD
8024
 * subset which contains no parameter entity references, or a document
8025
 * with "standalone='yes'", ...  ... The declaration of a parameter
8026
 * entity must precede any reference to it...
8027
 *
8028
 * [ VC: Entity Declared ]
8029
 * In a document with an external subset or external parameter entities
8030
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8031
 * must precede any reference to it...
8032
 *
8033
 * [ WFC: In DTD ]
8034
 * Parameter-entity references may only appear in the DTD.
8035
 * NOTE: misleading but this is handled.
8036
 *
8037
 * Returns the string of the entity content.
8038
 *         str is updated to the current value of the index
8039
 */
8040
static xmlEntityPtr
8041
0
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8042
0
    const xmlChar *ptr;
8043
0
    xmlChar cur;
8044
0
    xmlChar *name;
8045
0
    xmlEntityPtr entity = NULL;
8046
8047
0
    if ((str == NULL) || (*str == NULL)) return(NULL);
8048
0
    ptr = *str;
8049
0
    cur = *ptr;
8050
0
    if (cur != '%')
8051
0
        return(NULL);
8052
0
    ptr++;
8053
0
    name = xmlParseStringName(ctxt, &ptr);
8054
0
    if (name == NULL) {
8055
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8056
0
           "xmlParseStringPEReference: no name\n");
8057
0
  *str = ptr;
8058
0
  return(NULL);
8059
0
    }
8060
0
    cur = *ptr;
8061
0
    if (cur != ';') {
8062
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8063
0
  xmlFree(name);
8064
0
  *str = ptr;
8065
0
  return(NULL);
8066
0
    }
8067
0
    ptr++;
8068
8069
    /* Must be set before xmlHandleUndeclaredEntity */
8070
0
    ctxt->hasPErefs = 1;
8071
8072
    /*
8073
     * Request the entity from SAX
8074
     */
8075
0
    if ((ctxt->sax != NULL) &&
8076
0
  (ctxt->sax->getParameterEntity != NULL))
8077
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8078
8079
0
    if (entity == NULL) {
8080
0
        xmlHandleUndeclaredEntity(ctxt, name);
8081
0
    } else {
8082
  /*
8083
   * Internal checking in case the entity quest barfed
8084
   */
8085
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8086
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8087
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8088
0
        "%%%s; is not a parameter entity\n",
8089
0
        name, NULL);
8090
0
  }
8091
0
    }
8092
8093
0
    xmlFree(name);
8094
0
    *str = ptr;
8095
0
    return(entity);
8096
0
}
8097
8098
/**
8099
 * xmlParseDocTypeDecl:
8100
 * @ctxt:  an XML parser context
8101
 *
8102
 * DEPRECATED: Internal function, don't use.
8103
 *
8104
 * parse a DOCTYPE declaration
8105
 *
8106
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8107
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8108
 *
8109
 * [ VC: Root Element Type ]
8110
 * The Name in the document type declaration must match the element
8111
 * type of the root element.
8112
 */
8113
8114
void
8115
0
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8116
0
    const xmlChar *name = NULL;
8117
0
    xmlChar *ExternalID = NULL;
8118
0
    xmlChar *URI = NULL;
8119
8120
    /*
8121
     * We know that '<!DOCTYPE' has been detected.
8122
     */
8123
0
    SKIP(9);
8124
8125
0
    SKIP_BLANKS;
8126
8127
    /*
8128
     * Parse the DOCTYPE name.
8129
     */
8130
0
    name = xmlParseName(ctxt);
8131
0
    if (name == NULL) {
8132
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8133
0
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8134
0
    }
8135
0
    ctxt->intSubName = name;
8136
8137
0
    SKIP_BLANKS;
8138
8139
    /*
8140
     * Check for SystemID and ExternalID
8141
     */
8142
0
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8143
8144
0
    if ((URI != NULL) || (ExternalID != NULL)) {
8145
0
        ctxt->hasExternalSubset = 1;
8146
0
    }
8147
0
    ctxt->extSubURI = URI;
8148
0
    ctxt->extSubSystem = ExternalID;
8149
8150
0
    SKIP_BLANKS;
8151
8152
    /*
8153
     * Create and update the internal subset.
8154
     */
8155
0
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8156
0
  (!ctxt->disableSAX))
8157
0
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8158
8159
    /*
8160
     * Is there any internal subset declarations ?
8161
     * they are handled separately in xmlParseInternalSubset()
8162
     */
8163
0
    if (RAW == '[')
8164
0
  return;
8165
8166
    /*
8167
     * We should be at the end of the DOCTYPE declaration.
8168
     */
8169
0
    if (RAW != '>') {
8170
0
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8171
0
    }
8172
0
    NEXT;
8173
0
}
8174
8175
/**
8176
 * xmlParseInternalSubset:
8177
 * @ctxt:  an XML parser context
8178
 *
8179
 * parse the internal subset declaration
8180
 *
8181
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8182
 */
8183
8184
static void
8185
0
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8186
    /*
8187
     * Is there any DTD definition ?
8188
     */
8189
0
    if (RAW == '[') {
8190
0
        int oldInputNr = ctxt->inputNr;
8191
8192
0
        NEXT;
8193
  /*
8194
   * Parse the succession of Markup declarations and
8195
   * PEReferences.
8196
   * Subsequence (markupdecl | PEReference | S)*
8197
   */
8198
0
  SKIP_BLANKS;
8199
0
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8200
0
               (PARSER_STOPPED(ctxt) == 0)) {
8201
8202
            /*
8203
             * Conditional sections are allowed from external entities included
8204
             * by PE References in the internal subset.
8205
             */
8206
0
            if ((PARSER_EXTERNAL(ctxt)) &&
8207
0
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8208
0
                xmlParseConditionalSections(ctxt);
8209
0
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8210
0
          xmlParseMarkupDecl(ctxt);
8211
0
            } else if (RAW == '%') {
8212
0
          xmlParsePEReference(ctxt);
8213
0
            } else {
8214
0
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8215
0
                break;
8216
0
            }
8217
0
      SKIP_BLANKS_PE;
8218
0
            SHRINK;
8219
0
            GROW;
8220
0
  }
8221
8222
0
        while (ctxt->inputNr > oldInputNr)
8223
0
            xmlPopPE(ctxt);
8224
8225
0
  if (RAW == ']') {
8226
0
      NEXT;
8227
0
      SKIP_BLANKS;
8228
0
  }
8229
0
    }
8230
8231
    /*
8232
     * We should be at the end of the DOCTYPE declaration.
8233
     */
8234
0
    if ((ctxt->wellFormed) && (RAW != '>')) {
8235
0
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8236
0
  return;
8237
0
    }
8238
0
    NEXT;
8239
0
}
8240
8241
#ifdef LIBXML_SAX1_ENABLED
8242
/**
8243
 * xmlParseAttribute:
8244
 * @ctxt:  an XML parser context
8245
 * @value:  a xmlChar ** used to store the value of the attribute
8246
 *
8247
 * DEPRECATED: Internal function, don't use.
8248
 *
8249
 * parse an attribute
8250
 *
8251
 * [41] Attribute ::= Name Eq AttValue
8252
 *
8253
 * [ WFC: No External Entity References ]
8254
 * Attribute values cannot contain direct or indirect entity references
8255
 * to external entities.
8256
 *
8257
 * [ WFC: No < in Attribute Values ]
8258
 * The replacement text of any entity referred to directly or indirectly in
8259
 * an attribute value (other than "&lt;") must not contain a <.
8260
 *
8261
 * [ VC: Attribute Value Type ]
8262
 * The attribute must have been declared; the value must be of the type
8263
 * declared for it.
8264
 *
8265
 * [25] Eq ::= S? '=' S?
8266
 *
8267
 * With namespace:
8268
 *
8269
 * [NS 11] Attribute ::= QName Eq AttValue
8270
 *
8271
 * Also the case QName == xmlns:??? is handled independently as a namespace
8272
 * definition.
8273
 *
8274
 * Returns the attribute name, and the value in *value.
8275
 */
8276
8277
const xmlChar *
8278
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8279
0
    const xmlChar *name;
8280
0
    xmlChar *val;
8281
8282
0
    *value = NULL;
8283
0
    GROW;
8284
0
    name = xmlParseName(ctxt);
8285
0
    if (name == NULL) {
8286
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8287
0
                 "error parsing attribute name\n");
8288
0
        return(NULL);
8289
0
    }
8290
8291
    /*
8292
     * read the value
8293
     */
8294
0
    SKIP_BLANKS;
8295
0
    if (RAW == '=') {
8296
0
        NEXT;
8297
0
  SKIP_BLANKS;
8298
0
  val = xmlParseAttValue(ctxt);
8299
0
    } else {
8300
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8301
0
         "Specification mandates value for attribute %s\n", name);
8302
0
  return(name);
8303
0
    }
8304
8305
    /*
8306
     * Check that xml:lang conforms to the specification
8307
     * No more registered as an error, just generate a warning now
8308
     * since this was deprecated in XML second edition
8309
     */
8310
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8311
0
  if (!xmlCheckLanguageID(val)) {
8312
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8313
0
              "Malformed value for xml:lang : %s\n",
8314
0
        val, NULL);
8315
0
  }
8316
0
    }
8317
8318
    /*
8319
     * Check that xml:space conforms to the specification
8320
     */
8321
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8322
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8323
0
      *(ctxt->space) = 0;
8324
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8325
0
      *(ctxt->space) = 1;
8326
0
  else {
8327
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8328
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8329
0
                                 val, NULL);
8330
0
  }
8331
0
    }
8332
8333
0
    *value = val;
8334
0
    return(name);
8335
0
}
8336
8337
/**
8338
 * xmlParseStartTag:
8339
 * @ctxt:  an XML parser context
8340
 *
8341
 * DEPRECATED: Internal function, don't use.
8342
 *
8343
 * Parse a start tag. Always consumes '<'.
8344
 *
8345
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8346
 *
8347
 * [ WFC: Unique Att Spec ]
8348
 * No attribute name may appear more than once in the same start-tag or
8349
 * empty-element tag.
8350
 *
8351
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8352
 *
8353
 * [ WFC: Unique Att Spec ]
8354
 * No attribute name may appear more than once in the same start-tag or
8355
 * empty-element tag.
8356
 *
8357
 * With namespace:
8358
 *
8359
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8360
 *
8361
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8362
 *
8363
 * Returns the element name parsed
8364
 */
8365
8366
const xmlChar *
8367
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8368
0
    const xmlChar *name;
8369
0
    const xmlChar *attname;
8370
0
    xmlChar *attvalue;
8371
0
    const xmlChar **atts = ctxt->atts;
8372
0
    int nbatts = 0;
8373
0
    int maxatts = ctxt->maxatts;
8374
0
    int i;
8375
8376
0
    if (RAW != '<') return(NULL);
8377
0
    NEXT1;
8378
8379
0
    name = xmlParseName(ctxt);
8380
0
    if (name == NULL) {
8381
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8382
0
       "xmlParseStartTag: invalid element name\n");
8383
0
        return(NULL);
8384
0
    }
8385
8386
    /*
8387
     * Now parse the attributes, it ends up with the ending
8388
     *
8389
     * (S Attribute)* S?
8390
     */
8391
0
    SKIP_BLANKS;
8392
0
    GROW;
8393
8394
0
    while (((RAW != '>') &&
8395
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8396
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8397
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8398
0
        if (attname == NULL)
8399
0
      break;
8400
0
        if (attvalue != NULL) {
8401
      /*
8402
       * [ WFC: Unique Att Spec ]
8403
       * No attribute name may appear more than once in the same
8404
       * start-tag or empty-element tag.
8405
       */
8406
0
      for (i = 0; i < nbatts;i += 2) {
8407
0
          if (xmlStrEqual(atts[i], attname)) {
8408
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8409
0
        xmlFree(attvalue);
8410
0
        goto failed;
8411
0
    }
8412
0
      }
8413
      /*
8414
       * Add the pair to atts
8415
       */
8416
0
      if (atts == NULL) {
8417
0
          maxatts = 22; /* allow for 10 attrs by default */
8418
0
          atts = (const xmlChar **)
8419
0
           xmlMalloc(maxatts * sizeof(xmlChar *));
8420
0
    if (atts == NULL) {
8421
0
        xmlErrMemory(ctxt);
8422
0
        if (attvalue != NULL)
8423
0
      xmlFree(attvalue);
8424
0
        goto failed;
8425
0
    }
8426
0
    ctxt->atts = atts;
8427
0
    ctxt->maxatts = maxatts;
8428
0
      } else if (nbatts + 4 > maxatts) {
8429
0
          const xmlChar **n;
8430
8431
0
          maxatts *= 2;
8432
0
          n = (const xmlChar **) xmlRealloc((void *) atts,
8433
0
               maxatts * sizeof(const xmlChar *));
8434
0
    if (n == NULL) {
8435
0
        xmlErrMemory(ctxt);
8436
0
        if (attvalue != NULL)
8437
0
      xmlFree(attvalue);
8438
0
        goto failed;
8439
0
    }
8440
0
    atts = n;
8441
0
    ctxt->atts = atts;
8442
0
    ctxt->maxatts = maxatts;
8443
0
      }
8444
0
      atts[nbatts++] = attname;
8445
0
      atts[nbatts++] = attvalue;
8446
0
      atts[nbatts] = NULL;
8447
0
      atts[nbatts + 1] = NULL;
8448
0
  } else {
8449
0
      if (attvalue != NULL)
8450
0
    xmlFree(attvalue);
8451
0
  }
8452
8453
0
failed:
8454
8455
0
  GROW
8456
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8457
0
      break;
8458
0
  if (SKIP_BLANKS == 0) {
8459
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8460
0
         "attributes construct error\n");
8461
0
  }
8462
0
  SHRINK;
8463
0
        GROW;
8464
0
    }
8465
8466
    /*
8467
     * SAX: Start of Element !
8468
     */
8469
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8470
0
  (!ctxt->disableSAX)) {
8471
0
  if (nbatts > 0)
8472
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8473
0
  else
8474
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8475
0
    }
8476
8477
0
    if (atts != NULL) {
8478
        /* Free only the content strings */
8479
0
        for (i = 1;i < nbatts;i+=2)
8480
0
      if (atts[i] != NULL)
8481
0
         xmlFree((xmlChar *) atts[i]);
8482
0
    }
8483
0
    return(name);
8484
0
}
8485
8486
/**
8487
 * xmlParseEndTag1:
8488
 * @ctxt:  an XML parser context
8489
 * @line:  line of the start tag
8490
 * @nsNr:  number of namespaces on the start tag
8491
 *
8492
 * Parse an end tag. Always consumes '</'.
8493
 *
8494
 * [42] ETag ::= '</' Name S? '>'
8495
 *
8496
 * With namespace
8497
 *
8498
 * [NS 9] ETag ::= '</' QName S? '>'
8499
 */
8500
8501
static void
8502
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8503
0
    const xmlChar *name;
8504
8505
0
    GROW;
8506
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8507
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8508
0
           "xmlParseEndTag: '</' not found\n");
8509
0
  return;
8510
0
    }
8511
0
    SKIP(2);
8512
8513
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8514
8515
    /*
8516
     * We should definitely be at the ending "S? '>'" part
8517
     */
8518
0
    GROW;
8519
0
    SKIP_BLANKS;
8520
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8521
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8522
0
    } else
8523
0
  NEXT1;
8524
8525
    /*
8526
     * [ WFC: Element Type Match ]
8527
     * The Name in an element's end-tag must match the element type in the
8528
     * start-tag.
8529
     *
8530
     */
8531
0
    if (name != (xmlChar*)1) {
8532
0
        if (name == NULL) name = BAD_CAST "unparsable";
8533
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8534
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8535
0
                    ctxt->name, line, name);
8536
0
    }
8537
8538
    /*
8539
     * SAX: End of Tag
8540
     */
8541
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8542
0
  (!ctxt->disableSAX))
8543
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8544
8545
0
    namePop(ctxt);
8546
0
    spacePop(ctxt);
8547
0
    return;
8548
0
}
8549
8550
/**
8551
 * xmlParseEndTag:
8552
 * @ctxt:  an XML parser context
8553
 *
8554
 * DEPRECATED: Internal function, don't use.
8555
 *
8556
 * parse an end of tag
8557
 *
8558
 * [42] ETag ::= '</' Name S? '>'
8559
 *
8560
 * With namespace
8561
 *
8562
 * [NS 9] ETag ::= '</' QName S? '>'
8563
 */
8564
8565
void
8566
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8567
0
    xmlParseEndTag1(ctxt, 0);
8568
0
}
8569
#endif /* LIBXML_SAX1_ENABLED */
8570
8571
/************************************************************************
8572
 *                  *
8573
 *          SAX 2 specific operations       *
8574
 *                  *
8575
 ************************************************************************/
8576
8577
/**
8578
 * xmlParseQNameHashed:
8579
 * @ctxt:  an XML parser context
8580
 * @prefix:  pointer to store the prefix part
8581
 *
8582
 * parse an XML Namespace QName
8583
 *
8584
 * [6]  QName  ::= (Prefix ':')? LocalPart
8585
 * [7]  Prefix  ::= NCName
8586
 * [8]  LocalPart  ::= NCName
8587
 *
8588
 * Returns the Name parsed or NULL
8589
 */
8590
8591
static xmlHashedString
8592
0
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8593
0
    xmlHashedString l, p;
8594
0
    int start, isNCName = 0;
8595
8596
0
    l.name = NULL;
8597
0
    p.name = NULL;
8598
8599
0
    GROW;
8600
0
    start = CUR_PTR - BASE_PTR;
8601
8602
0
    l = xmlParseNCName(ctxt);
8603
0
    if (l.name != NULL) {
8604
0
        isNCName = 1;
8605
0
        if (CUR == ':') {
8606
0
            NEXT;
8607
0
            p = l;
8608
0
            l = xmlParseNCName(ctxt);
8609
0
        }
8610
0
    }
8611
0
    if ((l.name == NULL) || (CUR == ':')) {
8612
0
        xmlChar *tmp;
8613
8614
0
        l.name = NULL;
8615
0
        p.name = NULL;
8616
0
        if ((isNCName == 0) && (CUR != ':'))
8617
0
            return(l);
8618
0
        tmp = xmlParseNmtoken(ctxt);
8619
0
        if (tmp != NULL)
8620
0
            xmlFree(tmp);
8621
0
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8622
0
                                CUR_PTR - (BASE_PTR + start));
8623
0
        if (l.name == NULL) {
8624
0
            xmlErrMemory(ctxt);
8625
0
            return(l);
8626
0
        }
8627
0
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8628
0
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8629
0
    }
8630
8631
0
    *prefix = p;
8632
0
    return(l);
8633
0
}
8634
8635
/**
8636
 * xmlParseQName:
8637
 * @ctxt:  an XML parser context
8638
 * @prefix:  pointer to store the prefix part
8639
 *
8640
 * parse an XML Namespace QName
8641
 *
8642
 * [6]  QName  ::= (Prefix ':')? LocalPart
8643
 * [7]  Prefix  ::= NCName
8644
 * [8]  LocalPart  ::= NCName
8645
 *
8646
 * Returns the Name parsed or NULL
8647
 */
8648
8649
static const xmlChar *
8650
0
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8651
0
    xmlHashedString n, p;
8652
8653
0
    n = xmlParseQNameHashed(ctxt, &p);
8654
0
    if (n.name == NULL)
8655
0
        return(NULL);
8656
0
    *prefix = p.name;
8657
0
    return(n.name);
8658
0
}
8659
8660
/**
8661
 * xmlParseQNameAndCompare:
8662
 * @ctxt:  an XML parser context
8663
 * @name:  the localname
8664
 * @prefix:  the prefix, if any.
8665
 *
8666
 * parse an XML name and compares for match
8667
 * (specialized for endtag parsing)
8668
 *
8669
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8670
 * and the name for mismatch
8671
 */
8672
8673
static const xmlChar *
8674
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8675
0
                        xmlChar const *prefix) {
8676
0
    const xmlChar *cmp;
8677
0
    const xmlChar *in;
8678
0
    const xmlChar *ret;
8679
0
    const xmlChar *prefix2;
8680
8681
0
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8682
8683
0
    GROW;
8684
0
    in = ctxt->input->cur;
8685
8686
0
    cmp = prefix;
8687
0
    while (*in != 0 && *in == *cmp) {
8688
0
  ++in;
8689
0
  ++cmp;
8690
0
    }
8691
0
    if ((*cmp == 0) && (*in == ':')) {
8692
0
        in++;
8693
0
  cmp = name;
8694
0
  while (*in != 0 && *in == *cmp) {
8695
0
      ++in;
8696
0
      ++cmp;
8697
0
  }
8698
0
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8699
      /* success */
8700
0
            ctxt->input->col += in - ctxt->input->cur;
8701
0
      ctxt->input->cur = in;
8702
0
      return((const xmlChar*) 1);
8703
0
  }
8704
0
    }
8705
    /*
8706
     * all strings coms from the dictionary, equality can be done directly
8707
     */
8708
0
    ret = xmlParseQName (ctxt, &prefix2);
8709
0
    if (ret == NULL)
8710
0
        return(NULL);
8711
0
    if ((ret == name) && (prefix == prefix2))
8712
0
  return((const xmlChar*) 1);
8713
0
    return ret;
8714
0
}
8715
8716
/**
8717
 * xmlParseAttribute2:
8718
 * @ctxt:  an XML parser context
8719
 * @pref:  the element prefix
8720
 * @elem:  the element name
8721
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8722
 * @value:  a xmlChar ** used to store the value of the attribute
8723
 * @len:  an int * to save the length of the attribute
8724
 * @alloc:  an int * to indicate if the attribute was allocated
8725
 *
8726
 * parse an attribute in the new SAX2 framework.
8727
 *
8728
 * Returns the attribute name, and the value in *value, .
8729
 */
8730
8731
static xmlHashedString
8732
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8733
                   const xmlChar * pref, const xmlChar * elem,
8734
                   xmlHashedString * hprefix, xmlChar ** value,
8735
                   int *len, int *alloc)
8736
0
{
8737
0
    xmlHashedString hname;
8738
0
    const xmlChar *prefix, *name;
8739
0
    xmlChar *val = NULL, *internal_val = NULL;
8740
0
    int normalize = 0;
8741
0
    int isNamespace;
8742
8743
0
    *value = NULL;
8744
0
    GROW;
8745
0
    hname = xmlParseQNameHashed(ctxt, hprefix);
8746
0
    if (hname.name == NULL) {
8747
0
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8748
0
                       "error parsing attribute name\n");
8749
0
        return(hname);
8750
0
    }
8751
0
    name = hname.name;
8752
0
    if (hprefix->name != NULL)
8753
0
        prefix = hprefix->name;
8754
0
    else
8755
0
        prefix = NULL;
8756
8757
    /*
8758
     * get the type if needed
8759
     */
8760
0
    if (ctxt->attsSpecial != NULL) {
8761
0
        int type;
8762
8763
0
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8764
0
                                                 pref, elem,
8765
0
                                                 prefix, name);
8766
0
        if (type != 0)
8767
0
            normalize = 1;
8768
0
    }
8769
8770
    /*
8771
     * read the value
8772
     */
8773
0
    SKIP_BLANKS;
8774
0
    if (RAW == '=') {
8775
0
        NEXT;
8776
0
        SKIP_BLANKS;
8777
0
        isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8778
0
                       (prefix == ctxt->str_xmlns));
8779
0
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8780
0
                                       isNamespace);
8781
0
        if (val == NULL)
8782
0
            goto error;
8783
0
    } else {
8784
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8785
0
                          "Specification mandates value for attribute %s\n",
8786
0
                          name);
8787
0
        goto error;
8788
0
    }
8789
8790
0
    if (prefix == ctxt->str_xml) {
8791
        /*
8792
         * Check that xml:lang conforms to the specification
8793
         * No more registered as an error, just generate a warning now
8794
         * since this was deprecated in XML second edition
8795
         */
8796
0
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8797
0
            internal_val = xmlStrndup(val, *len);
8798
0
            if (internal_val == NULL)
8799
0
                goto mem_error;
8800
0
            if (!xmlCheckLanguageID(internal_val)) {
8801
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8802
0
                              "Malformed value for xml:lang : %s\n",
8803
0
                              internal_val, NULL);
8804
0
            }
8805
0
        }
8806
8807
        /*
8808
         * Check that xml:space conforms to the specification
8809
         */
8810
0
        if (xmlStrEqual(name, BAD_CAST "space")) {
8811
0
            internal_val = xmlStrndup(val, *len);
8812
0
            if (internal_val == NULL)
8813
0
                goto mem_error;
8814
0
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8815
0
                *(ctxt->space) = 0;
8816
0
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8817
0
                *(ctxt->space) = 1;
8818
0
            else {
8819
0
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8820
0
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8821
0
                              internal_val, NULL);
8822
0
            }
8823
0
        }
8824
0
        if (internal_val) {
8825
0
            xmlFree(internal_val);
8826
0
        }
8827
0
    }
8828
8829
0
    *value = val;
8830
0
    return (hname);
8831
8832
0
mem_error:
8833
0
    xmlErrMemory(ctxt);
8834
0
error:
8835
0
    if ((val != NULL) && (*alloc != 0))
8836
0
        xmlFree(val);
8837
0
    return(hname);
8838
0
}
8839
8840
/**
8841
 * xmlAttrHashInsert:
8842
 * @ctxt: parser context
8843
 * @size: size of the hash table
8844
 * @name: attribute name
8845
 * @uri: namespace uri
8846
 * @hashValue: combined hash value of name and uri
8847
 * @aindex: attribute index (this is a multiple of 5)
8848
 *
8849
 * Inserts a new attribute into the hash table.
8850
 *
8851
 * Returns INT_MAX if no existing attribute was found, the attribute
8852
 * index if an attribute was found, -1 if a memory allocation failed.
8853
 */
8854
static int
8855
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8856
0
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8857
0
    xmlAttrHashBucket *table = ctxt->attrHash;
8858
0
    xmlAttrHashBucket *bucket;
8859
0
    unsigned hindex;
8860
8861
0
    hindex = hashValue & (size - 1);
8862
0
    bucket = &table[hindex];
8863
8864
0
    while (bucket->index >= 0) {
8865
0
        const xmlChar **atts = &ctxt->atts[bucket->index];
8866
8867
0
        if (name == atts[0]) {
8868
0
            int nsIndex = (int) (ptrdiff_t) atts[2];
8869
8870
0
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8871
0
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8872
0
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8873
0
                return(bucket->index);
8874
0
        }
8875
8876
0
        hindex++;
8877
0
        bucket++;
8878
0
        if (hindex >= size) {
8879
0
            hindex = 0;
8880
0
            bucket = table;
8881
0
        }
8882
0
    }
8883
8884
0
    bucket->index = aindex;
8885
8886
0
    return(INT_MAX);
8887
0
}
8888
8889
/**
8890
 * xmlParseStartTag2:
8891
 * @ctxt:  an XML parser context
8892
 *
8893
 * Parse a start tag. Always consumes '<'.
8894
 *
8895
 * This routine is called when running SAX2 parsing
8896
 *
8897
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8898
 *
8899
 * [ WFC: Unique Att Spec ]
8900
 * No attribute name may appear more than once in the same start-tag or
8901
 * empty-element tag.
8902
 *
8903
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8904
 *
8905
 * [ WFC: Unique Att Spec ]
8906
 * No attribute name may appear more than once in the same start-tag or
8907
 * empty-element tag.
8908
 *
8909
 * With namespace:
8910
 *
8911
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8912
 *
8913
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8914
 *
8915
 * Returns the element name parsed
8916
 */
8917
8918
static const xmlChar *
8919
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8920
0
                  const xmlChar **URI, int *nbNsPtr) {
8921
0
    xmlHashedString hlocalname;
8922
0
    xmlHashedString hprefix;
8923
0
    xmlHashedString hattname;
8924
0
    xmlHashedString haprefix;
8925
0
    const xmlChar *localname;
8926
0
    const xmlChar *prefix;
8927
0
    const xmlChar *attname;
8928
0
    const xmlChar *aprefix;
8929
0
    const xmlChar *uri;
8930
0
    xmlChar *attvalue = NULL;
8931
0
    const xmlChar **atts = ctxt->atts;
8932
0
    unsigned attrHashSize = 0;
8933
0
    int maxatts = ctxt->maxatts;
8934
0
    int nratts, nbatts, nbdef;
8935
0
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8936
0
    int alloc = 0;
8937
8938
0
    if (RAW != '<') return(NULL);
8939
0
    NEXT1;
8940
8941
0
    nbatts = 0;
8942
0
    nratts = 0;
8943
0
    nbdef = 0;
8944
0
    nbNs = 0;
8945
0
    nbTotalDef = 0;
8946
0
    attval = 0;
8947
8948
0
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8949
0
        xmlErrMemory(ctxt);
8950
0
        return(NULL);
8951
0
    }
8952
8953
0
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8954
0
    if (hlocalname.name == NULL) {
8955
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8956
0
           "StartTag: invalid element name\n");
8957
0
        return(NULL);
8958
0
    }
8959
0
    localname = hlocalname.name;
8960
0
    prefix = hprefix.name;
8961
8962
    /*
8963
     * Now parse the attributes, it ends up with the ending
8964
     *
8965
     * (S Attribute)* S?
8966
     */
8967
0
    SKIP_BLANKS;
8968
0
    GROW;
8969
8970
    /*
8971
     * The ctxt->atts array will be ultimately passed to the SAX callback
8972
     * containing five xmlChar pointers for each attribute:
8973
     *
8974
     * [0] attribute name
8975
     * [1] attribute prefix
8976
     * [2] namespace URI
8977
     * [3] attribute value
8978
     * [4] end of attribute value
8979
     *
8980
     * To save memory, we reuse this array temporarily and store integers
8981
     * in these pointer variables.
8982
     *
8983
     * [0] attribute name
8984
     * [1] attribute prefix
8985
     * [2] hash value of attribute prefix, and later namespace index
8986
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8987
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8988
     *
8989
     * The ctxt->attallocs array contains an additional unsigned int for
8990
     * each attribute, containing the hash value of the attribute name
8991
     * and the alloc flag in bit 31.
8992
     */
8993
8994
0
    while (((RAW != '>') &&
8995
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8996
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8997
0
  int len = -1;
8998
8999
0
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
9000
0
                                          &haprefix, &attvalue, &len,
9001
0
                                          &alloc);
9002
0
        if (hattname.name == NULL)
9003
0
      break;
9004
0
        if (attvalue == NULL)
9005
0
            goto next_attr;
9006
0
        attname = hattname.name;
9007
0
        aprefix = haprefix.name;
9008
0
  if (len < 0) len = xmlStrlen(attvalue);
9009
9010
0
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9011
0
            xmlHashedString huri;
9012
0
            xmlURIPtr parsedUri;
9013
9014
0
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9015
0
            uri = huri.name;
9016
0
            if (uri == NULL) {
9017
0
                xmlErrMemory(ctxt);
9018
0
                goto next_attr;
9019
0
            }
9020
0
            if (*uri != 0) {
9021
0
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9022
0
                    xmlErrMemory(ctxt);
9023
0
                    goto next_attr;
9024
0
                }
9025
0
                if (parsedUri == NULL) {
9026
0
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9027
0
                             "xmlns: '%s' is not a valid URI\n",
9028
0
                                       uri, NULL, NULL);
9029
0
                } else {
9030
0
                    if (parsedUri->scheme == NULL) {
9031
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9032
0
                                  "xmlns: URI %s is not absolute\n",
9033
0
                                  uri, NULL, NULL);
9034
0
                    }
9035
0
                    xmlFreeURI(parsedUri);
9036
0
                }
9037
0
                if (uri == ctxt->str_xml_ns) {
9038
0
                    if (attname != ctxt->str_xml) {
9039
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9040
0
                     "xml namespace URI cannot be the default namespace\n",
9041
0
                                 NULL, NULL, NULL);
9042
0
                    }
9043
0
                    goto next_attr;
9044
0
                }
9045
0
                if ((len == 29) &&
9046
0
                    (xmlStrEqual(uri,
9047
0
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9048
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9049
0
                         "reuse of the xmlns namespace name is forbidden\n",
9050
0
                             NULL, NULL, NULL);
9051
0
                    goto next_attr;
9052
0
                }
9053
0
            }
9054
9055
0
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9056
0
                nbNs++;
9057
0
        } else if (aprefix == ctxt->str_xmlns) {
9058
0
            xmlHashedString huri;
9059
0
            xmlURIPtr parsedUri;
9060
9061
0
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9062
0
            uri = huri.name;
9063
0
            if (uri == NULL) {
9064
0
                xmlErrMemory(ctxt);
9065
0
                goto next_attr;
9066
0
            }
9067
9068
0
            if (attname == ctxt->str_xml) {
9069
0
                if (uri != ctxt->str_xml_ns) {
9070
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9071
0
                             "xml namespace prefix mapped to wrong URI\n",
9072
0
                             NULL, NULL, NULL);
9073
0
                }
9074
                /*
9075
                 * Do not keep a namespace definition node
9076
                 */
9077
0
                goto next_attr;
9078
0
            }
9079
0
            if (uri == ctxt->str_xml_ns) {
9080
0
                if (attname != ctxt->str_xml) {
9081
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9082
0
                             "xml namespace URI mapped to wrong prefix\n",
9083
0
                             NULL, NULL, NULL);
9084
0
                }
9085
0
                goto next_attr;
9086
0
            }
9087
0
            if (attname == ctxt->str_xmlns) {
9088
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9089
0
                         "redefinition of the xmlns prefix is forbidden\n",
9090
0
                         NULL, NULL, NULL);
9091
0
                goto next_attr;
9092
0
            }
9093
0
            if ((len == 29) &&
9094
0
                (xmlStrEqual(uri,
9095
0
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9096
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9097
0
                         "reuse of the xmlns namespace name is forbidden\n",
9098
0
                         NULL, NULL, NULL);
9099
0
                goto next_attr;
9100
0
            }
9101
0
            if ((uri == NULL) || (uri[0] == 0)) {
9102
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9103
0
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9104
0
                              attname, NULL, NULL);
9105
0
                goto next_attr;
9106
0
            } else {
9107
0
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9108
0
                    xmlErrMemory(ctxt);
9109
0
                    goto next_attr;
9110
0
                }
9111
0
                if (parsedUri == NULL) {
9112
0
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9113
0
                         "xmlns:%s: '%s' is not a valid URI\n",
9114
0
                                       attname, uri, NULL);
9115
0
                } else {
9116
0
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9117
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9118
0
                                  "xmlns:%s: URI %s is not absolute\n",
9119
0
                                  attname, uri, NULL);
9120
0
                    }
9121
0
                    xmlFreeURI(parsedUri);
9122
0
                }
9123
0
            }
9124
9125
0
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9126
0
                nbNs++;
9127
0
        } else {
9128
            /*
9129
             * Populate attributes array, see above for repurposing
9130
             * of xmlChar pointers.
9131
             */
9132
0
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9133
0
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9134
0
                    goto next_attr;
9135
0
                }
9136
0
                maxatts = ctxt->maxatts;
9137
0
                atts = ctxt->atts;
9138
0
            }
9139
0
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9140
0
                                        ((unsigned) alloc << 31);
9141
0
            atts[nbatts++] = attname;
9142
0
            atts[nbatts++] = aprefix;
9143
0
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9144
0
            if (alloc) {
9145
0
                atts[nbatts++] = attvalue;
9146
0
                attvalue += len;
9147
0
                atts[nbatts++] = attvalue;
9148
0
            } else {
9149
                /*
9150
                 * attvalue points into the input buffer which can be
9151
                 * reallocated. Store differences to input->base instead.
9152
                 * The pointers will be reconstructed later.
9153
                 */
9154
0
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9155
0
                attvalue += len;
9156
0
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9157
0
            }
9158
            /*
9159
             * tag if some deallocation is needed
9160
             */
9161
0
            if (alloc != 0) attval = 1;
9162
0
            attvalue = NULL; /* moved into atts */
9163
0
        }
9164
9165
0
next_attr:
9166
0
        if ((attvalue != NULL) && (alloc != 0)) {
9167
0
            xmlFree(attvalue);
9168
0
            attvalue = NULL;
9169
0
        }
9170
9171
0
  GROW
9172
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9173
0
      break;
9174
0
  if (SKIP_BLANKS == 0) {
9175
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9176
0
         "attributes construct error\n");
9177
0
      break;
9178
0
  }
9179
0
        GROW;
9180
0
    }
9181
9182
    /*
9183
     * Namespaces from default attributes
9184
     */
9185
0
    if (ctxt->attsDefault != NULL) {
9186
0
        xmlDefAttrsPtr defaults;
9187
9188
0
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9189
0
  if (defaults != NULL) {
9190
0
      for (i = 0; i < defaults->nbAttrs; i++) {
9191
0
                xmlDefAttr *attr = &defaults->attrs[i];
9192
9193
0
          attname = attr->name.name;
9194
0
    aprefix = attr->prefix.name;
9195
9196
0
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9197
0
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9198
9199
0
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9200
0
                        nbNs++;
9201
0
    } else if (aprefix == ctxt->str_xmlns) {
9202
0
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9203
9204
0
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9205
0
                                      NULL, 1) > 0)
9206
0
                        nbNs++;
9207
0
    } else {
9208
0
                    nbTotalDef += 1;
9209
0
                }
9210
0
      }
9211
0
  }
9212
0
    }
9213
9214
    /*
9215
     * Resolve attribute namespaces
9216
     */
9217
0
    for (i = 0; i < nbatts; i += 5) {
9218
0
        attname = atts[i];
9219
0
        aprefix = atts[i+1];
9220
9221
        /*
9222
  * The default namespace does not apply to attribute names.
9223
  */
9224
0
  if (aprefix == NULL) {
9225
0
            nsIndex = NS_INDEX_EMPTY;
9226
0
        } else if (aprefix == ctxt->str_xml) {
9227
0
            nsIndex = NS_INDEX_XML;
9228
0
        } else {
9229
0
            haprefix.name = aprefix;
9230
0
            haprefix.hashValue = (size_t) atts[i+2];
9231
0
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9232
9233
0
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9234
0
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9235
0
        "Namespace prefix %s for %s on %s is not defined\n",
9236
0
        aprefix, attname, localname);
9237
0
                nsIndex = NS_INDEX_EMPTY;
9238
0
            }
9239
0
        }
9240
9241
0
        atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9242
0
    }
9243
9244
    /*
9245
     * Maximum number of attributes including default attributes.
9246
     */
9247
0
    maxAtts = nratts + nbTotalDef;
9248
9249
    /*
9250
     * Verify that attribute names are unique.
9251
     */
9252
0
    if (maxAtts > 1) {
9253
0
        attrHashSize = 4;
9254
0
        while (attrHashSize / 2 < (unsigned) maxAtts)
9255
0
            attrHashSize *= 2;
9256
9257
0
        if (attrHashSize > ctxt->attrHashMax) {
9258
0
            xmlAttrHashBucket *tmp;
9259
9260
0
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9261
0
            if (tmp == NULL) {
9262
0
                xmlErrMemory(ctxt);
9263
0
                goto done;
9264
0
            }
9265
9266
0
            ctxt->attrHash = tmp;
9267
0
            ctxt->attrHashMax = attrHashSize;
9268
0
        }
9269
9270
0
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9271
9272
0
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9273
0
            const xmlChar *nsuri;
9274
0
            unsigned hashValue, nameHashValue, uriHashValue;
9275
0
            int res;
9276
9277
0
            attname = atts[i];
9278
0
            aprefix = atts[i+1];
9279
0
            nsIndex = (ptrdiff_t) atts[i+2];
9280
            /* Hash values always have bit 31 set, see dict.c */
9281
0
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9282
9283
0
            if (nsIndex == NS_INDEX_EMPTY) {
9284
                /*
9285
                 * Prefix with empty namespace means an undeclared
9286
                 * prefix which was already reported above.
9287
                 */
9288
0
                if (aprefix != NULL)
9289
0
                    continue;
9290
0
                nsuri = NULL;
9291
0
                uriHashValue = URI_HASH_EMPTY;
9292
0
            } else if (nsIndex == NS_INDEX_XML) {
9293
0
                nsuri = ctxt->str_xml_ns;
9294
0
                uriHashValue = URI_HASH_XML;
9295
0
            } else {
9296
0
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9297
0
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9298
0
            }
9299
9300
0
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9301
0
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9302
0
                                    hashValue, i);
9303
0
            if (res < 0)
9304
0
                continue;
9305
9306
            /*
9307
             * [ WFC: Unique Att Spec ]
9308
             * No attribute name may appear more than once in the same
9309
             * start-tag or empty-element tag.
9310
             * As extended by the Namespace in XML REC.
9311
             */
9312
0
            if (res < INT_MAX) {
9313
0
                if (aprefix == atts[res+1]) {
9314
0
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9315
0
                } else {
9316
0
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9317
0
                             "Namespaced Attribute %s in '%s' redefined\n",
9318
0
                             attname, nsuri, NULL);
9319
0
                }
9320
0
            }
9321
0
        }
9322
0
    }
9323
9324
    /*
9325
     * Default attributes
9326
     */
9327
0
    if (ctxt->attsDefault != NULL) {
9328
0
        xmlDefAttrsPtr defaults;
9329
9330
0
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9331
0
  if (defaults != NULL) {
9332
0
      for (i = 0; i < defaults->nbAttrs; i++) {
9333
0
                xmlDefAttr *attr = &defaults->attrs[i];
9334
0
                const xmlChar *nsuri;
9335
0
                unsigned hashValue, uriHashValue;
9336
0
                int res;
9337
9338
0
          attname = attr->name.name;
9339
0
    aprefix = attr->prefix.name;
9340
9341
0
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9342
0
                    continue;
9343
0
    if (aprefix == ctxt->str_xmlns)
9344
0
                    continue;
9345
9346
0
                if (aprefix == NULL) {
9347
0
                    nsIndex = NS_INDEX_EMPTY;
9348
0
                    nsuri = NULL;
9349
0
                    uriHashValue = URI_HASH_EMPTY;
9350
0
                } if (aprefix == ctxt->str_xml) {
9351
0
                    nsIndex = NS_INDEX_XML;
9352
0
                    nsuri = ctxt->str_xml_ns;
9353
0
                    uriHashValue = URI_HASH_XML;
9354
0
                } else if (aprefix != NULL) {
9355
0
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9356
0
                    if ((nsIndex == INT_MAX) ||
9357
0
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9358
0
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9359
0
                                 "Namespace prefix %s for %s on %s is not "
9360
0
                                 "defined\n",
9361
0
                                 aprefix, attname, localname);
9362
0
                        nsIndex = NS_INDEX_EMPTY;
9363
0
                        nsuri = NULL;
9364
0
                        uriHashValue = URI_HASH_EMPTY;
9365
0
                    } else {
9366
0
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9367
0
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9368
0
                    }
9369
0
                }
9370
9371
                /*
9372
                 * Check whether the attribute exists
9373
                 */
9374
0
                if (maxAtts > 1) {
9375
0
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9376
0
                                                   uriHashValue);
9377
0
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9378
0
                                            hashValue, nbatts);
9379
0
                    if (res < 0)
9380
0
                        continue;
9381
0
                    if (res < INT_MAX) {
9382
0
                        if (aprefix == atts[res+1])
9383
0
                            continue;
9384
0
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9385
0
                                 "Namespaced Attribute %s in '%s' redefined\n",
9386
0
                                 attname, nsuri, NULL);
9387
0
                    }
9388
0
                }
9389
9390
0
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9391
9392
0
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9393
0
                    if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9394
0
                        localname = NULL;
9395
0
                        goto done;
9396
0
                    }
9397
0
                    maxatts = ctxt->maxatts;
9398
0
                    atts = ctxt->atts;
9399
0
                }
9400
9401
0
                atts[nbatts++] = attname;
9402
0
                atts[nbatts++] = aprefix;
9403
0
                atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9404
0
                atts[nbatts++] = attr->value.name;
9405
0
                atts[nbatts++] = attr->valueEnd;
9406
0
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9407
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9408
0
                            "standalone: attribute %s on %s defaulted "
9409
0
                            "from external subset\n",
9410
0
                            attname, localname);
9411
0
                }
9412
0
                nbdef++;
9413
0
      }
9414
0
  }
9415
0
    }
9416
9417
    /*
9418
     * Reconstruct attribute pointers
9419
     */
9420
0
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9421
        /* namespace URI */
9422
0
        nsIndex = (ptrdiff_t) atts[i+2];
9423
0
        if (nsIndex == INT_MAX)
9424
0
            atts[i+2] = NULL;
9425
0
        else if (nsIndex == INT_MAX - 1)
9426
0
            atts[i+2] = ctxt->str_xml_ns;
9427
0
        else
9428
0
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9429
9430
0
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9431
0
            atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3];  /* value */
9432
0
            atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4];  /* valuend */
9433
0
        }
9434
0
    }
9435
9436
0
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9437
0
    if ((prefix != NULL) && (uri == NULL)) {
9438
0
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9439
0
           "Namespace prefix %s on %s is not defined\n",
9440
0
     prefix, localname, NULL);
9441
0
    }
9442
0
    *pref = prefix;
9443
0
    *URI = uri;
9444
9445
    /*
9446
     * SAX callback
9447
     */
9448
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9449
0
  (!ctxt->disableSAX)) {
9450
0
  if (nbNs > 0)
9451
0
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9452
0
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9453
0
        nbatts / 5, nbdef, atts);
9454
0
  else
9455
0
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9456
0
                          0, NULL, nbatts / 5, nbdef, atts);
9457
0
    }
9458
9459
0
done:
9460
    /*
9461
     * Free allocated attribute values
9462
     */
9463
0
    if (attval != 0) {
9464
0
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9465
0
      if (ctxt->attallocs[j] & 0x80000000)
9466
0
          xmlFree((xmlChar *) atts[i+3]);
9467
0
    }
9468
9469
0
    *nbNsPtr = nbNs;
9470
0
    return(localname);
9471
0
}
9472
9473
/**
9474
 * xmlParseEndTag2:
9475
 * @ctxt:  an XML parser context
9476
 * @line:  line of the start tag
9477
 * @nsNr:  number of namespaces on the start tag
9478
 *
9479
 * Parse an end tag. Always consumes '</'.
9480
 *
9481
 * [42] ETag ::= '</' Name S? '>'
9482
 *
9483
 * With namespace
9484
 *
9485
 * [NS 9] ETag ::= '</' QName S? '>'
9486
 */
9487
9488
static void
9489
0
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9490
0
    const xmlChar *name;
9491
9492
0
    GROW;
9493
0
    if ((RAW != '<') || (NXT(1) != '/')) {
9494
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9495
0
  return;
9496
0
    }
9497
0
    SKIP(2);
9498
9499
0
    if (tag->prefix == NULL)
9500
0
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9501
0
    else
9502
0
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9503
9504
    /*
9505
     * We should definitely be at the ending "S? '>'" part
9506
     */
9507
0
    GROW;
9508
0
    SKIP_BLANKS;
9509
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9510
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9511
0
    } else
9512
0
  NEXT1;
9513
9514
    /*
9515
     * [ WFC: Element Type Match ]
9516
     * The Name in an element's end-tag must match the element type in the
9517
     * start-tag.
9518
     *
9519
     */
9520
0
    if (name != (xmlChar*)1) {
9521
0
        if (name == NULL) name = BAD_CAST "unparsable";
9522
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9523
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
9524
0
                    ctxt->name, tag->line, name);
9525
0
    }
9526
9527
    /*
9528
     * SAX: End of Tag
9529
     */
9530
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9531
0
  (!ctxt->disableSAX))
9532
0
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9533
0
                                tag->URI);
9534
9535
0
    spacePop(ctxt);
9536
0
    if (tag->nsNr != 0)
9537
0
  xmlParserNsPop(ctxt, tag->nsNr);
9538
0
}
9539
9540
/**
9541
 * xmlParseCDSect:
9542
 * @ctxt:  an XML parser context
9543
 *
9544
 * DEPRECATED: Internal function, don't use.
9545
 *
9546
 * Parse escaped pure raw content. Always consumes '<!['.
9547
 *
9548
 * [18] CDSect ::= CDStart CData CDEnd
9549
 *
9550
 * [19] CDStart ::= '<![CDATA['
9551
 *
9552
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9553
 *
9554
 * [21] CDEnd ::= ']]>'
9555
 */
9556
void
9557
0
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9558
0
    xmlChar *buf = NULL;
9559
0
    int len = 0;
9560
0
    int size = XML_PARSER_BUFFER_SIZE;
9561
0
    int r, rl;
9562
0
    int s, sl;
9563
0
    int cur, l;
9564
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9565
0
                    XML_MAX_HUGE_LENGTH :
9566
0
                    XML_MAX_TEXT_LENGTH;
9567
9568
0
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9569
0
        return;
9570
0
    SKIP(3);
9571
9572
0
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9573
0
        return;
9574
0
    SKIP(6);
9575
9576
0
    r = CUR_CHAR(rl);
9577
0
    if (!IS_CHAR(r)) {
9578
0
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9579
0
        goto out;
9580
0
    }
9581
0
    NEXTL(rl);
9582
0
    s = CUR_CHAR(sl);
9583
0
    if (!IS_CHAR(s)) {
9584
0
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9585
0
        goto out;
9586
0
    }
9587
0
    NEXTL(sl);
9588
0
    cur = CUR_CHAR(l);
9589
0
    buf = (xmlChar *) xmlMallocAtomic(size);
9590
0
    if (buf == NULL) {
9591
0
  xmlErrMemory(ctxt);
9592
0
        goto out;
9593
0
    }
9594
0
    while (IS_CHAR(cur) &&
9595
0
           ((r != ']') || (s != ']') || (cur != '>'))) {
9596
0
  if (len + 5 >= size) {
9597
0
      xmlChar *tmp;
9598
9599
0
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9600
0
      if (tmp == NULL) {
9601
0
    xmlErrMemory(ctxt);
9602
0
                goto out;
9603
0
      }
9604
0
      buf = tmp;
9605
0
      size *= 2;
9606
0
  }
9607
0
  COPY_BUF(buf, len, r);
9608
0
        if (len > maxLength) {
9609
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9610
0
                           "CData section too big found\n");
9611
0
            goto out;
9612
0
        }
9613
0
  r = s;
9614
0
  rl = sl;
9615
0
  s = cur;
9616
0
  sl = l;
9617
0
  NEXTL(l);
9618
0
  cur = CUR_CHAR(l);
9619
0
    }
9620
0
    buf[len] = 0;
9621
0
    if (cur != '>') {
9622
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9623
0
                       "CData section not finished\n%.50s\n", buf);
9624
0
        goto out;
9625
0
    }
9626
0
    NEXTL(l);
9627
9628
    /*
9629
     * OK the buffer is to be consumed as cdata.
9630
     */
9631
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9632
0
  if (ctxt->sax->cdataBlock != NULL)
9633
0
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9634
0
  else if (ctxt->sax->characters != NULL)
9635
0
      ctxt->sax->characters(ctxt->userData, buf, len);
9636
0
    }
9637
9638
0
out:
9639
0
    xmlFree(buf);
9640
0
}
9641
9642
/**
9643
 * xmlParseContentInternal:
9644
 * @ctxt:  an XML parser context
9645
 *
9646
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9647
 * unexpected EOF to the caller.
9648
 */
9649
9650
static void
9651
0
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9652
0
    int oldNameNr = ctxt->nameNr;
9653
0
    int oldSpaceNr = ctxt->spaceNr;
9654
0
    int oldNodeNr = ctxt->nodeNr;
9655
9656
0
    GROW;
9657
0
    while ((ctxt->input->cur < ctxt->input->end) &&
9658
0
     (PARSER_STOPPED(ctxt) == 0)) {
9659
0
  const xmlChar *cur = ctxt->input->cur;
9660
9661
  /*
9662
   * First case : a Processing Instruction.
9663
   */
9664
0
  if ((*cur == '<') && (cur[1] == '?')) {
9665
0
      xmlParsePI(ctxt);
9666
0
  }
9667
9668
  /*
9669
   * Second case : a CDSection
9670
   */
9671
  /* 2.6.0 test was *cur not RAW */
9672
0
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9673
0
      xmlParseCDSect(ctxt);
9674
0
  }
9675
9676
  /*
9677
   * Third case :  a comment
9678
   */
9679
0
  else if ((*cur == '<') && (NXT(1) == '!') &&
9680
0
     (NXT(2) == '-') && (NXT(3) == '-')) {
9681
0
      xmlParseComment(ctxt);
9682
0
  }
9683
9684
  /*
9685
   * Fourth case :  a sub-element.
9686
   */
9687
0
  else if (*cur == '<') {
9688
0
            if (NXT(1) == '/') {
9689
0
                if (ctxt->nameNr <= oldNameNr)
9690
0
                    break;
9691
0
          xmlParseElementEnd(ctxt);
9692
0
            } else {
9693
0
          xmlParseElementStart(ctxt);
9694
0
            }
9695
0
  }
9696
9697
  /*
9698
   * Fifth case : a reference. If if has not been resolved,
9699
   *    parsing returns it's Name, create the node
9700
   */
9701
9702
0
  else if (*cur == '&') {
9703
0
      xmlParseReference(ctxt);
9704
0
  }
9705
9706
  /*
9707
   * Last case, text. Note that References are handled directly.
9708
   */
9709
0
  else {
9710
0
      xmlParseCharDataInternal(ctxt, 0);
9711
0
  }
9712
9713
0
  SHRINK;
9714
0
  GROW;
9715
0
    }
9716
9717
0
    if ((ctxt->nameNr > oldNameNr) &&
9718
0
        (ctxt->input->cur >= ctxt->input->end) &&
9719
0
        (ctxt->wellFormed)) {
9720
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9721
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9722
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9723
0
                "Premature end of data in tag %s line %d\n",
9724
0
                name, line, NULL);
9725
0
    }
9726
9727
    /*
9728
     * Clean up in error case
9729
     */
9730
9731
0
    while (ctxt->nodeNr > oldNodeNr)
9732
0
        nodePop(ctxt);
9733
9734
0
    while (ctxt->nameNr > oldNameNr) {
9735
0
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9736
9737
0
        if (tag->nsNr != 0)
9738
0
            xmlParserNsPop(ctxt, tag->nsNr);
9739
9740
0
        namePop(ctxt);
9741
0
    }
9742
9743
0
    while (ctxt->spaceNr > oldSpaceNr)
9744
0
        spacePop(ctxt);
9745
0
}
9746
9747
/**
9748
 * xmlParseContent:
9749
 * @ctxt:  an XML parser context
9750
 *
9751
 * Parse XML element content. This is useful if you're only interested
9752
 * in custom SAX callbacks. If you want a node list, use
9753
 * xmlParseInNodeContext.
9754
 */
9755
void
9756
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9757
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9758
0
        return;
9759
9760
0
    xmlCtxtInitializeLate(ctxt);
9761
9762
0
    xmlParseContentInternal(ctxt);
9763
9764
0
    if (ctxt->input->cur < ctxt->input->end)
9765
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9766
0
}
9767
9768
/**
9769
 * xmlParseElement:
9770
 * @ctxt:  an XML parser context
9771
 *
9772
 * DEPRECATED: Internal function, don't use.
9773
 *
9774
 * parse an XML element
9775
 *
9776
 * [39] element ::= EmptyElemTag | STag content ETag
9777
 *
9778
 * [ WFC: Element Type Match ]
9779
 * The Name in an element's end-tag must match the element type in the
9780
 * start-tag.
9781
 *
9782
 */
9783
9784
void
9785
0
xmlParseElement(xmlParserCtxtPtr ctxt) {
9786
0
    if (xmlParseElementStart(ctxt) != 0)
9787
0
        return;
9788
9789
0
    xmlParseContentInternal(ctxt);
9790
9791
0
    if (ctxt->input->cur >= ctxt->input->end) {
9792
0
        if (ctxt->wellFormed) {
9793
0
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9794
0
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9795
0
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9796
0
                    "Premature end of data in tag %s line %d\n",
9797
0
                    name, line, NULL);
9798
0
        }
9799
0
        return;
9800
0
    }
9801
9802
0
    xmlParseElementEnd(ctxt);
9803
0
}
9804
9805
/**
9806
 * xmlParseElementStart:
9807
 * @ctxt:  an XML parser context
9808
 *
9809
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9810
 * opening tag was parsed, 1 if an empty element was parsed.
9811
 *
9812
 * Always consumes '<'.
9813
 */
9814
static int
9815
0
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9816
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9817
0
    const xmlChar *name;
9818
0
    const xmlChar *prefix = NULL;
9819
0
    const xmlChar *URI = NULL;
9820
0
    xmlParserNodeInfo node_info;
9821
0
    int line;
9822
0
    xmlNodePtr cur;
9823
0
    int nbNs = 0;
9824
9825
0
    if (ctxt->nameNr > maxDepth) {
9826
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9827
0
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9828
0
                ctxt->nameNr);
9829
0
  xmlHaltParser(ctxt);
9830
0
  return(-1);
9831
0
    }
9832
9833
    /* Capture start position */
9834
0
    if (ctxt->record_info) {
9835
0
        node_info.begin_pos = ctxt->input->consumed +
9836
0
                          (CUR_PTR - ctxt->input->base);
9837
0
  node_info.begin_line = ctxt->input->line;
9838
0
    }
9839
9840
0
    if (ctxt->spaceNr == 0)
9841
0
  spacePush(ctxt, -1);
9842
0
    else if (*ctxt->space == -2)
9843
0
  spacePush(ctxt, -1);
9844
0
    else
9845
0
  spacePush(ctxt, *ctxt->space);
9846
9847
0
    line = ctxt->input->line;
9848
0
#ifdef LIBXML_SAX1_ENABLED
9849
0
    if (ctxt->sax2)
9850
0
#endif /* LIBXML_SAX1_ENABLED */
9851
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9852
0
#ifdef LIBXML_SAX1_ENABLED
9853
0
    else
9854
0
  name = xmlParseStartTag(ctxt);
9855
0
#endif /* LIBXML_SAX1_ENABLED */
9856
0
    if (name == NULL) {
9857
0
  spacePop(ctxt);
9858
0
        return(-1);
9859
0
    }
9860
0
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9861
0
    cur = ctxt->node;
9862
9863
0
#ifdef LIBXML_VALID_ENABLED
9864
    /*
9865
     * [ VC: Root Element Type ]
9866
     * The Name in the document type declaration must match the element
9867
     * type of the root element.
9868
     */
9869
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9870
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9871
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9872
0
#endif /* LIBXML_VALID_ENABLED */
9873
9874
    /*
9875
     * Check for an Empty Element.
9876
     */
9877
0
    if ((RAW == '/') && (NXT(1) == '>')) {
9878
0
        SKIP(2);
9879
0
  if (ctxt->sax2) {
9880
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9881
0
    (!ctxt->disableSAX))
9882
0
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9883
0
#ifdef LIBXML_SAX1_ENABLED
9884
0
  } else {
9885
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9886
0
    (!ctxt->disableSAX))
9887
0
    ctxt->sax->endElement(ctxt->userData, name);
9888
0
#endif /* LIBXML_SAX1_ENABLED */
9889
0
  }
9890
0
  namePop(ctxt);
9891
0
  spacePop(ctxt);
9892
0
  if (nbNs > 0)
9893
0
      xmlParserNsPop(ctxt, nbNs);
9894
0
  if (cur != NULL && ctxt->record_info) {
9895
0
            node_info.node = cur;
9896
0
            node_info.end_pos = ctxt->input->consumed +
9897
0
                                (CUR_PTR - ctxt->input->base);
9898
0
            node_info.end_line = ctxt->input->line;
9899
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9900
0
  }
9901
0
  return(1);
9902
0
    }
9903
0
    if (RAW == '>') {
9904
0
        NEXT1;
9905
0
        if (cur != NULL && ctxt->record_info) {
9906
0
            node_info.node = cur;
9907
0
            node_info.end_pos = 0;
9908
0
            node_info.end_line = 0;
9909
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9910
0
        }
9911
0
    } else {
9912
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9913
0
         "Couldn't find end of Start Tag %s line %d\n",
9914
0
                    name, line, NULL);
9915
9916
  /*
9917
   * end of parsing of this node.
9918
   */
9919
0
  nodePop(ctxt);
9920
0
  namePop(ctxt);
9921
0
  spacePop(ctxt);
9922
0
  if (nbNs > 0)
9923
0
      xmlParserNsPop(ctxt, nbNs);
9924
0
  return(-1);
9925
0
    }
9926
9927
0
    return(0);
9928
0
}
9929
9930
/**
9931
 * xmlParseElementEnd:
9932
 * @ctxt:  an XML parser context
9933
 *
9934
 * Parse the end of an XML element. Always consumes '</'.
9935
 */
9936
static void
9937
0
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9938
0
    xmlNodePtr cur = ctxt->node;
9939
9940
0
    if (ctxt->nameNr <= 0) {
9941
0
        if ((RAW == '<') && (NXT(1) == '/'))
9942
0
            SKIP(2);
9943
0
        return;
9944
0
    }
9945
9946
    /*
9947
     * parse the end of tag: '</' should be here.
9948
     */
9949
0
    if (ctxt->sax2) {
9950
0
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9951
0
  namePop(ctxt);
9952
0
    }
9953
0
#ifdef LIBXML_SAX1_ENABLED
9954
0
    else
9955
0
  xmlParseEndTag1(ctxt, 0);
9956
0
#endif /* LIBXML_SAX1_ENABLED */
9957
9958
    /*
9959
     * Capture end position
9960
     */
9961
0
    if (cur != NULL && ctxt->record_info) {
9962
0
        xmlParserNodeInfoPtr node_info;
9963
9964
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9965
0
        if (node_info != NULL) {
9966
0
            node_info->end_pos = ctxt->input->consumed +
9967
0
                                 (CUR_PTR - ctxt->input->base);
9968
0
            node_info->end_line = ctxt->input->line;
9969
0
        }
9970
0
    }
9971
0
}
9972
9973
/**
9974
 * xmlParseVersionNum:
9975
 * @ctxt:  an XML parser context
9976
 *
9977
 * DEPRECATED: Internal function, don't use.
9978
 *
9979
 * parse the XML version value.
9980
 *
9981
 * [26] VersionNum ::= '1.' [0-9]+
9982
 *
9983
 * In practice allow [0-9].[0-9]+ at that level
9984
 *
9985
 * Returns the string giving the XML version number, or NULL
9986
 */
9987
xmlChar *
9988
0
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9989
0
    xmlChar *buf = NULL;
9990
0
    int len = 0;
9991
0
    int size = 10;
9992
0
    xmlChar cur;
9993
9994
0
    buf = (xmlChar *) xmlMallocAtomic(size);
9995
0
    if (buf == NULL) {
9996
0
  xmlErrMemory(ctxt);
9997
0
  return(NULL);
9998
0
    }
9999
0
    cur = CUR;
10000
0
    if (!((cur >= '0') && (cur <= '9'))) {
10001
0
  xmlFree(buf);
10002
0
  return(NULL);
10003
0
    }
10004
0
    buf[len++] = cur;
10005
0
    NEXT;
10006
0
    cur=CUR;
10007
0
    if (cur != '.') {
10008
0
  xmlFree(buf);
10009
0
  return(NULL);
10010
0
    }
10011
0
    buf[len++] = cur;
10012
0
    NEXT;
10013
0
    cur=CUR;
10014
0
    while ((cur >= '0') && (cur <= '9')) {
10015
0
  if (len + 1 >= size) {
10016
0
      xmlChar *tmp;
10017
10018
0
      size *= 2;
10019
0
      tmp = (xmlChar *) xmlRealloc(buf, size);
10020
0
      if (tmp == NULL) {
10021
0
          xmlFree(buf);
10022
0
    xmlErrMemory(ctxt);
10023
0
    return(NULL);
10024
0
      }
10025
0
      buf = tmp;
10026
0
  }
10027
0
  buf[len++] = cur;
10028
0
  NEXT;
10029
0
  cur=CUR;
10030
0
    }
10031
0
    buf[len] = 0;
10032
0
    return(buf);
10033
0
}
10034
10035
/**
10036
 * xmlParseVersionInfo:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * DEPRECATED: Internal function, don't use.
10040
 *
10041
 * parse the XML version.
10042
 *
10043
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10044
 *
10045
 * [25] Eq ::= S? '=' S?
10046
 *
10047
 * Returns the version string, e.g. "1.0"
10048
 */
10049
10050
xmlChar *
10051
0
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10052
0
    xmlChar *version = NULL;
10053
10054
0
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10055
0
  SKIP(7);
10056
0
  SKIP_BLANKS;
10057
0
  if (RAW != '=') {
10058
0
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10059
0
      return(NULL);
10060
0
        }
10061
0
  NEXT;
10062
0
  SKIP_BLANKS;
10063
0
  if (RAW == '"') {
10064
0
      NEXT;
10065
0
      version = xmlParseVersionNum(ctxt);
10066
0
      if (RAW != '"') {
10067
0
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10068
0
      } else
10069
0
          NEXT;
10070
0
  } else if (RAW == '\''){
10071
0
      NEXT;
10072
0
      version = xmlParseVersionNum(ctxt);
10073
0
      if (RAW != '\'') {
10074
0
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10075
0
      } else
10076
0
          NEXT;
10077
0
  } else {
10078
0
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10079
0
  }
10080
0
    }
10081
0
    return(version);
10082
0
}
10083
10084
/**
10085
 * xmlParseEncName:
10086
 * @ctxt:  an XML parser context
10087
 *
10088
 * DEPRECATED: Internal function, don't use.
10089
 *
10090
 * parse the XML encoding name
10091
 *
10092
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10093
 *
10094
 * Returns the encoding name value or NULL
10095
 */
10096
xmlChar *
10097
0
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10098
0
    xmlChar *buf = NULL;
10099
0
    int len = 0;
10100
0
    int size = 10;
10101
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10102
0
                    XML_MAX_TEXT_LENGTH :
10103
0
                    XML_MAX_NAME_LENGTH;
10104
0
    xmlChar cur;
10105
10106
0
    cur = CUR;
10107
0
    if (((cur >= 'a') && (cur <= 'z')) ||
10108
0
        ((cur >= 'A') && (cur <= 'Z'))) {
10109
0
  buf = (xmlChar *) xmlMallocAtomic(size);
10110
0
  if (buf == NULL) {
10111
0
      xmlErrMemory(ctxt);
10112
0
      return(NULL);
10113
0
  }
10114
10115
0
  buf[len++] = cur;
10116
0
  NEXT;
10117
0
  cur = CUR;
10118
0
  while (((cur >= 'a') && (cur <= 'z')) ||
10119
0
         ((cur >= 'A') && (cur <= 'Z')) ||
10120
0
         ((cur >= '0') && (cur <= '9')) ||
10121
0
         (cur == '.') || (cur == '_') ||
10122
0
         (cur == '-')) {
10123
0
      if (len + 1 >= size) {
10124
0
          xmlChar *tmp;
10125
10126
0
    size *= 2;
10127
0
    tmp = (xmlChar *) xmlRealloc(buf, size);
10128
0
    if (tmp == NULL) {
10129
0
        xmlErrMemory(ctxt);
10130
0
        xmlFree(buf);
10131
0
        return(NULL);
10132
0
    }
10133
0
    buf = tmp;
10134
0
      }
10135
0
      buf[len++] = cur;
10136
0
            if (len > maxLength) {
10137
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10138
0
                xmlFree(buf);
10139
0
                return(NULL);
10140
0
            }
10141
0
      NEXT;
10142
0
      cur = CUR;
10143
0
        }
10144
0
  buf[len] = 0;
10145
0
    } else {
10146
0
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10147
0
    }
10148
0
    return(buf);
10149
0
}
10150
10151
/**
10152
 * xmlParseEncodingDecl:
10153
 * @ctxt:  an XML parser context
10154
 *
10155
 * DEPRECATED: Internal function, don't use.
10156
 *
10157
 * parse the XML encoding declaration
10158
 *
10159
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10160
 *
10161
 * this setups the conversion filters.
10162
 *
10163
 * Returns the encoding value or NULL
10164
 */
10165
10166
const xmlChar *
10167
0
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10168
0
    xmlChar *encoding = NULL;
10169
10170
0
    SKIP_BLANKS;
10171
0
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10172
0
        return(NULL);
10173
10174
0
    SKIP(8);
10175
0
    SKIP_BLANKS;
10176
0
    if (RAW != '=') {
10177
0
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10178
0
        return(NULL);
10179
0
    }
10180
0
    NEXT;
10181
0
    SKIP_BLANKS;
10182
0
    if (RAW == '"') {
10183
0
        NEXT;
10184
0
        encoding = xmlParseEncName(ctxt);
10185
0
        if (RAW != '"') {
10186
0
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10187
0
            xmlFree((xmlChar *) encoding);
10188
0
            return(NULL);
10189
0
        } else
10190
0
            NEXT;
10191
0
    } else if (RAW == '\''){
10192
0
        NEXT;
10193
0
        encoding = xmlParseEncName(ctxt);
10194
0
        if (RAW != '\'') {
10195
0
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10196
0
            xmlFree((xmlChar *) encoding);
10197
0
            return(NULL);
10198
0
        } else
10199
0
            NEXT;
10200
0
    } else {
10201
0
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10202
0
    }
10203
10204
0
    if (encoding == NULL)
10205
0
        return(NULL);
10206
10207
0
    xmlSetDeclaredEncoding(ctxt, encoding);
10208
10209
0
    return(ctxt->encoding);
10210
0
}
10211
10212
/**
10213
 * xmlParseSDDecl:
10214
 * @ctxt:  an XML parser context
10215
 *
10216
 * DEPRECATED: Internal function, don't use.
10217
 *
10218
 * parse the XML standalone declaration
10219
 *
10220
 * [32] SDDecl ::= S 'standalone' Eq
10221
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10222
 *
10223
 * [ VC: Standalone Document Declaration ]
10224
 * TODO The standalone document declaration must have the value "no"
10225
 * if any external markup declarations contain declarations of:
10226
 *  - attributes with default values, if elements to which these
10227
 *    attributes apply appear in the document without specifications
10228
 *    of values for these attributes, or
10229
 *  - entities (other than amp, lt, gt, apos, quot), if references
10230
 *    to those entities appear in the document, or
10231
 *  - attributes with values subject to normalization, where the
10232
 *    attribute appears in the document with a value which will change
10233
 *    as a result of normalization, or
10234
 *  - element types with element content, if white space occurs directly
10235
 *    within any instance of those types.
10236
 *
10237
 * Returns:
10238
 *   1 if standalone="yes"
10239
 *   0 if standalone="no"
10240
 *  -2 if standalone attribute is missing or invalid
10241
 *    (A standalone value of -2 means that the XML declaration was found,
10242
 *     but no value was specified for the standalone attribute).
10243
 */
10244
10245
int
10246
0
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10247
0
    int standalone = -2;
10248
10249
0
    SKIP_BLANKS;
10250
0
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10251
0
  SKIP(10);
10252
0
        SKIP_BLANKS;
10253
0
  if (RAW != '=') {
10254
0
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10255
0
      return(standalone);
10256
0
        }
10257
0
  NEXT;
10258
0
  SKIP_BLANKS;
10259
0
        if (RAW == '\''){
10260
0
      NEXT;
10261
0
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10262
0
          standalone = 0;
10263
0
                SKIP(2);
10264
0
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10265
0
                 (NXT(2) == 's')) {
10266
0
          standalone = 1;
10267
0
    SKIP(3);
10268
0
            } else {
10269
0
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10270
0
      }
10271
0
      if (RAW != '\'') {
10272
0
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10273
0
      } else
10274
0
          NEXT;
10275
0
  } else if (RAW == '"'){
10276
0
      NEXT;
10277
0
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10278
0
          standalone = 0;
10279
0
    SKIP(2);
10280
0
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10281
0
                 (NXT(2) == 's')) {
10282
0
          standalone = 1;
10283
0
                SKIP(3);
10284
0
            } else {
10285
0
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10286
0
      }
10287
0
      if (RAW != '"') {
10288
0
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10289
0
      } else
10290
0
          NEXT;
10291
0
  } else {
10292
0
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10293
0
        }
10294
0
    }
10295
0
    return(standalone);
10296
0
}
10297
10298
/**
10299
 * xmlParseXMLDecl:
10300
 * @ctxt:  an XML parser context
10301
 *
10302
 * DEPRECATED: Internal function, don't use.
10303
 *
10304
 * parse an XML declaration header
10305
 *
10306
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10307
 */
10308
10309
void
10310
0
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10311
0
    xmlChar *version;
10312
10313
    /*
10314
     * This value for standalone indicates that the document has an
10315
     * XML declaration but it does not have a standalone attribute.
10316
     * It will be overwritten later if a standalone attribute is found.
10317
     */
10318
10319
0
    ctxt->standalone = -2;
10320
10321
    /*
10322
     * We know that '<?xml' is here.
10323
     */
10324
0
    SKIP(5);
10325
10326
0
    if (!IS_BLANK_CH(RAW)) {
10327
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10328
0
                 "Blank needed after '<?xml'\n");
10329
0
    }
10330
0
    SKIP_BLANKS;
10331
10332
    /*
10333
     * We must have the VersionInfo here.
10334
     */
10335
0
    version = xmlParseVersionInfo(ctxt);
10336
0
    if (version == NULL) {
10337
0
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10338
0
    } else {
10339
0
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10340
      /*
10341
       * Changed here for XML-1.0 5th edition
10342
       */
10343
0
      if (ctxt->options & XML_PARSE_OLD10) {
10344
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10345
0
                "Unsupported version '%s'\n",
10346
0
                version);
10347
0
      } else {
10348
0
          if ((version[0] == '1') && ((version[1] == '.'))) {
10349
0
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10350
0
                      "Unsupported version '%s'\n",
10351
0
          version, NULL);
10352
0
    } else {
10353
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10354
0
              "Unsupported version '%s'\n",
10355
0
              version);
10356
0
    }
10357
0
      }
10358
0
  }
10359
0
  if (ctxt->version != NULL)
10360
0
      xmlFree((void *) ctxt->version);
10361
0
  ctxt->version = version;
10362
0
    }
10363
10364
    /*
10365
     * We may have the encoding declaration
10366
     */
10367
0
    if (!IS_BLANK_CH(RAW)) {
10368
0
        if ((RAW == '?') && (NXT(1) == '>')) {
10369
0
      SKIP(2);
10370
0
      return;
10371
0
  }
10372
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10373
0
    }
10374
0
    xmlParseEncodingDecl(ctxt);
10375
10376
    /*
10377
     * We may have the standalone status.
10378
     */
10379
0
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10380
0
        if ((RAW == '?') && (NXT(1) == '>')) {
10381
0
      SKIP(2);
10382
0
      return;
10383
0
  }
10384
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10385
0
    }
10386
10387
    /*
10388
     * We can grow the input buffer freely at that point
10389
     */
10390
0
    GROW;
10391
10392
0
    SKIP_BLANKS;
10393
0
    ctxt->standalone = xmlParseSDDecl(ctxt);
10394
10395
0
    SKIP_BLANKS;
10396
0
    if ((RAW == '?') && (NXT(1) == '>')) {
10397
0
        SKIP(2);
10398
0
    } else if (RAW == '>') {
10399
        /* Deprecated old WD ... */
10400
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10401
0
  NEXT;
10402
0
    } else {
10403
0
        int c;
10404
10405
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10406
0
        while ((PARSER_STOPPED(ctxt) == 0) &&
10407
0
               ((c = CUR) != 0)) {
10408
0
            NEXT;
10409
0
            if (c == '>')
10410
0
                break;
10411
0
        }
10412
0
    }
10413
0
}
10414
10415
/**
10416
 * xmlParseMisc:
10417
 * @ctxt:  an XML parser context
10418
 *
10419
 * DEPRECATED: Internal function, don't use.
10420
 *
10421
 * parse an XML Misc* optional field.
10422
 *
10423
 * [27] Misc ::= Comment | PI |  S
10424
 */
10425
10426
void
10427
0
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10428
0
    while (PARSER_STOPPED(ctxt) == 0) {
10429
0
        SKIP_BLANKS;
10430
0
        GROW;
10431
0
        if ((RAW == '<') && (NXT(1) == '?')) {
10432
0
      xmlParsePI(ctxt);
10433
0
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10434
0
      xmlParseComment(ctxt);
10435
0
        } else {
10436
0
            break;
10437
0
        }
10438
0
    }
10439
0
}
10440
10441
static void
10442
0
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10443
0
    xmlDocPtr doc;
10444
10445
    /*
10446
     * SAX: end of the document processing.
10447
     */
10448
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10449
0
        ctxt->sax->endDocument(ctxt->userData);
10450
10451
0
    doc = ctxt->myDoc;
10452
0
    if (doc != NULL) {
10453
0
        if (ctxt->wellFormed) {
10454
0
            doc->properties |= XML_DOC_WELLFORMED;
10455
0
            if (ctxt->valid)
10456
0
                doc->properties |= XML_DOC_DTDVALID;
10457
0
            if (ctxt->nsWellFormed)
10458
0
                doc->properties |= XML_DOC_NSVALID;
10459
0
        }
10460
10461
0
        if (ctxt->options & XML_PARSE_OLD10)
10462
0
            doc->properties |= XML_DOC_OLD10;
10463
10464
        /*
10465
         * Remove locally kept entity definitions if the tree was not built
10466
         */
10467
0
  if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10468
0
            xmlFreeDoc(doc);
10469
0
            ctxt->myDoc = NULL;
10470
0
        }
10471
0
    }
10472
0
}
10473
10474
/**
10475
 * xmlParseDocument:
10476
 * @ctxt:  an XML parser context
10477
 *
10478
 * Parse an XML document and invoke the SAX handlers. This is useful
10479
 * if you're only interested in custom SAX callbacks. If you want a
10480
 * document tree, use xmlCtxtParseDocument.
10481
 *
10482
 * Returns 0, -1 in case of error.
10483
 */
10484
10485
int
10486
0
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10487
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10488
0
        return(-1);
10489
10490
0
    GROW;
10491
10492
    /*
10493
     * SAX: detecting the level.
10494
     */
10495
0
    xmlCtxtInitializeLate(ctxt);
10496
10497
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10498
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10499
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10500
0
    }
10501
10502
0
    xmlDetectEncoding(ctxt);
10503
10504
0
    if (CUR == 0) {
10505
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10506
0
  return(-1);
10507
0
    }
10508
10509
0
    GROW;
10510
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10511
10512
  /*
10513
   * Note that we will switch encoding on the fly.
10514
   */
10515
0
  xmlParseXMLDecl(ctxt);
10516
0
  SKIP_BLANKS;
10517
0
    } else {
10518
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10519
0
        if (ctxt->version == NULL) {
10520
0
            xmlErrMemory(ctxt);
10521
0
            return(-1);
10522
0
        }
10523
0
    }
10524
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10525
0
        ctxt->sax->startDocument(ctxt->userData);
10526
0
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10527
0
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10528
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10529
0
    }
10530
10531
    /*
10532
     * The Misc part of the Prolog
10533
     */
10534
0
    xmlParseMisc(ctxt);
10535
10536
    /*
10537
     * Then possibly doc type declaration(s) and more Misc
10538
     * (doctypedecl Misc*)?
10539
     */
10540
0
    GROW;
10541
0
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10542
10543
0
  ctxt->inSubset = 1;
10544
0
  xmlParseDocTypeDecl(ctxt);
10545
0
  if (RAW == '[') {
10546
0
      xmlParseInternalSubset(ctxt);
10547
0
  }
10548
10549
  /*
10550
   * Create and update the external subset.
10551
   */
10552
0
  ctxt->inSubset = 2;
10553
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10554
0
      (!ctxt->disableSAX))
10555
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10556
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10557
0
  ctxt->inSubset = 0;
10558
10559
0
        xmlCleanSpecialAttr(ctxt);
10560
10561
0
  xmlParseMisc(ctxt);
10562
0
    }
10563
10564
    /*
10565
     * Time to start parsing the tree itself
10566
     */
10567
0
    GROW;
10568
0
    if (RAW != '<') {
10569
0
        if (ctxt->wellFormed)
10570
0
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10571
0
                           "Start tag expected, '<' not found\n");
10572
0
    } else {
10573
0
  xmlParseElement(ctxt);
10574
10575
  /*
10576
   * The Misc part at the end
10577
   */
10578
0
  xmlParseMisc(ctxt);
10579
10580
0
        if (ctxt->input->cur < ctxt->input->end) {
10581
0
            if (ctxt->wellFormed)
10582
0
          xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10583
0
        } else if ((ctxt->input->buf != NULL) &&
10584
0
                   (ctxt->input->buf->encoder != NULL) &&
10585
0
                   (ctxt->input->buf->error == 0) &&
10586
0
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10587
0
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10588
0
                           "Truncated multi-byte sequence at EOF\n");
10589
0
        }
10590
0
    }
10591
10592
0
    ctxt->instate = XML_PARSER_EOF;
10593
0
    xmlFinishDocument(ctxt);
10594
10595
0
    if (! ctxt->wellFormed) {
10596
0
  ctxt->valid = 0;
10597
0
  return(-1);
10598
0
    }
10599
10600
0
    return(0);
10601
0
}
10602
10603
/**
10604
 * xmlParseExtParsedEnt:
10605
 * @ctxt:  an XML parser context
10606
 *
10607
 * parse a general parsed entity
10608
 * An external general parsed entity is well-formed if it matches the
10609
 * production labeled extParsedEnt.
10610
 *
10611
 * [78] extParsedEnt ::= TextDecl? content
10612
 *
10613
 * Returns 0, -1 in case of error. the parser context is augmented
10614
 *                as a result of the parsing.
10615
 */
10616
10617
int
10618
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10619
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10620
0
        return(-1);
10621
10622
0
    xmlCtxtInitializeLate(ctxt);
10623
10624
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10625
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10626
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10627
0
    }
10628
10629
0
    xmlDetectEncoding(ctxt);
10630
10631
0
    if (CUR == 0) {
10632
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10633
0
    }
10634
10635
    /*
10636
     * Check for the XMLDecl in the Prolog.
10637
     */
10638
0
    GROW;
10639
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10640
10641
  /*
10642
   * Note that we will switch encoding on the fly.
10643
   */
10644
0
  xmlParseXMLDecl(ctxt);
10645
0
  SKIP_BLANKS;
10646
0
    } else {
10647
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10648
0
    }
10649
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10650
0
        ctxt->sax->startDocument(ctxt->userData);
10651
10652
    /*
10653
     * Doing validity checking on chunk doesn't make sense
10654
     */
10655
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10656
0
    ctxt->validate = 0;
10657
0
    ctxt->depth = 0;
10658
10659
0
    xmlParseContentInternal(ctxt);
10660
10661
0
    if (ctxt->input->cur < ctxt->input->end)
10662
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10663
10664
    /*
10665
     * SAX: end of the document processing.
10666
     */
10667
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10668
0
        ctxt->sax->endDocument(ctxt->userData);
10669
10670
0
    if (! ctxt->wellFormed) return(-1);
10671
0
    return(0);
10672
0
}
10673
10674
#ifdef LIBXML_PUSH_ENABLED
10675
/************************************************************************
10676
 *                  *
10677
 *    Progressive parsing interfaces        *
10678
 *                  *
10679
 ************************************************************************/
10680
10681
/**
10682
 * xmlParseLookupChar:
10683
 * @ctxt:  an XML parser context
10684
 * @c:  character
10685
 *
10686
 * Check whether the input buffer contains a character.
10687
 */
10688
static int
10689
0
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10690
0
    const xmlChar *cur;
10691
10692
0
    if (ctxt->checkIndex == 0) {
10693
0
        cur = ctxt->input->cur + 1;
10694
0
    } else {
10695
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10696
0
    }
10697
10698
0
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10699
0
        size_t index = ctxt->input->end - ctxt->input->cur;
10700
10701
0
        if (index > LONG_MAX) {
10702
0
            ctxt->checkIndex = 0;
10703
0
            return(1);
10704
0
        }
10705
0
        ctxt->checkIndex = index;
10706
0
        return(0);
10707
0
    } else {
10708
0
        ctxt->checkIndex = 0;
10709
0
        return(1);
10710
0
    }
10711
0
}
10712
10713
/**
10714
 * xmlParseLookupString:
10715
 * @ctxt:  an XML parser context
10716
 * @startDelta: delta to apply at the start
10717
 * @str:  string
10718
 * @strLen:  length of string
10719
 *
10720
 * Check whether the input buffer contains a string.
10721
 */
10722
static const xmlChar *
10723
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10724
0
                     const char *str, size_t strLen) {
10725
0
    const xmlChar *cur, *term;
10726
10727
0
    if (ctxt->checkIndex == 0) {
10728
0
        cur = ctxt->input->cur + startDelta;
10729
0
    } else {
10730
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10731
0
    }
10732
10733
0
    term = BAD_CAST strstr((const char *) cur, str);
10734
0
    if (term == NULL) {
10735
0
        const xmlChar *end = ctxt->input->end;
10736
0
        size_t index;
10737
10738
        /* Rescan (strLen - 1) characters. */
10739
0
        if ((size_t) (end - cur) < strLen)
10740
0
            end = cur;
10741
0
        else
10742
0
            end -= strLen - 1;
10743
0
        index = end - ctxt->input->cur;
10744
0
        if (index > LONG_MAX) {
10745
0
            ctxt->checkIndex = 0;
10746
0
            return(ctxt->input->end - strLen);
10747
0
        }
10748
0
        ctxt->checkIndex = index;
10749
0
    } else {
10750
0
        ctxt->checkIndex = 0;
10751
0
    }
10752
10753
0
    return(term);
10754
0
}
10755
10756
/**
10757
 * xmlParseLookupCharData:
10758
 * @ctxt:  an XML parser context
10759
 *
10760
 * Check whether the input buffer contains terminated char data.
10761
 */
10762
static int
10763
0
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10764
0
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10765
0
    const xmlChar *end = ctxt->input->end;
10766
0
    size_t index;
10767
10768
0
    while (cur < end) {
10769
0
        if ((*cur == '<') || (*cur == '&')) {
10770
0
            ctxt->checkIndex = 0;
10771
0
            return(1);
10772
0
        }
10773
0
        cur++;
10774
0
    }
10775
10776
0
    index = cur - ctxt->input->cur;
10777
0
    if (index > LONG_MAX) {
10778
0
        ctxt->checkIndex = 0;
10779
0
        return(1);
10780
0
    }
10781
0
    ctxt->checkIndex = index;
10782
0
    return(0);
10783
0
}
10784
10785
/**
10786
 * xmlParseLookupGt:
10787
 * @ctxt:  an XML parser context
10788
 *
10789
 * Check whether there's enough data in the input buffer to finish parsing
10790
 * a start tag. This has to take quotes into account.
10791
 */
10792
static int
10793
0
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10794
0
    const xmlChar *cur;
10795
0
    const xmlChar *end = ctxt->input->end;
10796
0
    int state = ctxt->endCheckState;
10797
0
    size_t index;
10798
10799
0
    if (ctxt->checkIndex == 0)
10800
0
        cur = ctxt->input->cur + 1;
10801
0
    else
10802
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10803
10804
0
    while (cur < end) {
10805
0
        if (state) {
10806
0
            if (*cur == state)
10807
0
                state = 0;
10808
0
        } else if (*cur == '\'' || *cur == '"') {
10809
0
            state = *cur;
10810
0
        } else if (*cur == '>') {
10811
0
            ctxt->checkIndex = 0;
10812
0
            ctxt->endCheckState = 0;
10813
0
            return(1);
10814
0
        }
10815
0
        cur++;
10816
0
    }
10817
10818
0
    index = cur - ctxt->input->cur;
10819
0
    if (index > LONG_MAX) {
10820
0
        ctxt->checkIndex = 0;
10821
0
        ctxt->endCheckState = 0;
10822
0
        return(1);
10823
0
    }
10824
0
    ctxt->checkIndex = index;
10825
0
    ctxt->endCheckState = state;
10826
0
    return(0);
10827
0
}
10828
10829
/**
10830
 * xmlParseLookupInternalSubset:
10831
 * @ctxt:  an XML parser context
10832
 *
10833
 * Check whether there's enough data in the input buffer to finish parsing
10834
 * the internal subset.
10835
 */
10836
static int
10837
0
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10838
    /*
10839
     * Sorry, but progressive parsing of the internal subset is not
10840
     * supported. We first check that the full content of the internal
10841
     * subset is available and parsing is launched only at that point.
10842
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10843
     * not in a ']]>' sequence which are conditional sections.
10844
     */
10845
0
    const xmlChar *cur, *start;
10846
0
    const xmlChar *end = ctxt->input->end;
10847
0
    int state = ctxt->endCheckState;
10848
0
    size_t index;
10849
10850
0
    if (ctxt->checkIndex == 0) {
10851
0
        cur = ctxt->input->cur + 1;
10852
0
    } else {
10853
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10854
0
    }
10855
0
    start = cur;
10856
10857
0
    while (cur < end) {
10858
0
        if (state == '-') {
10859
0
            if ((*cur == '-') &&
10860
0
                (cur[1] == '-') &&
10861
0
                (cur[2] == '>')) {
10862
0
                state = 0;
10863
0
                cur += 3;
10864
0
                start = cur;
10865
0
                continue;
10866
0
            }
10867
0
        }
10868
0
        else if (state == ']') {
10869
0
            if (*cur == '>') {
10870
0
                ctxt->checkIndex = 0;
10871
0
                ctxt->endCheckState = 0;
10872
0
                return(1);
10873
0
            }
10874
0
            if (IS_BLANK_CH(*cur)) {
10875
0
                state = ' ';
10876
0
            } else if (*cur != ']') {
10877
0
                state = 0;
10878
0
                start = cur;
10879
0
                continue;
10880
0
            }
10881
0
        }
10882
0
        else if (state == ' ') {
10883
0
            if (*cur == '>') {
10884
0
                ctxt->checkIndex = 0;
10885
0
                ctxt->endCheckState = 0;
10886
0
                return(1);
10887
0
            }
10888
0
            if (!IS_BLANK_CH(*cur)) {
10889
0
                state = 0;
10890
0
                start = cur;
10891
0
                continue;
10892
0
            }
10893
0
        }
10894
0
        else if (state != 0) {
10895
0
            if (*cur == state) {
10896
0
                state = 0;
10897
0
                start = cur + 1;
10898
0
            }
10899
0
        }
10900
0
        else if (*cur == '<') {
10901
0
            if ((cur[1] == '!') &&
10902
0
                (cur[2] == '-') &&
10903
0
                (cur[3] == '-')) {
10904
0
                state = '-';
10905
0
                cur += 4;
10906
                /* Don't treat <!--> as comment */
10907
0
                start = cur;
10908
0
                continue;
10909
0
            }
10910
0
        }
10911
0
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10912
0
            state = *cur;
10913
0
        }
10914
10915
0
        cur++;
10916
0
    }
10917
10918
    /*
10919
     * Rescan the three last characters to detect "<!--" and "-->"
10920
     * split across chunks.
10921
     */
10922
0
    if ((state == 0) || (state == '-')) {
10923
0
        if (cur - start < 3)
10924
0
            cur = start;
10925
0
        else
10926
0
            cur -= 3;
10927
0
    }
10928
0
    index = cur - ctxt->input->cur;
10929
0
    if (index > LONG_MAX) {
10930
0
        ctxt->checkIndex = 0;
10931
0
        ctxt->endCheckState = 0;
10932
0
        return(1);
10933
0
    }
10934
0
    ctxt->checkIndex = index;
10935
0
    ctxt->endCheckState = state;
10936
0
    return(0);
10937
0
}
10938
10939
/**
10940
 * xmlCheckCdataPush:
10941
 * @cur: pointer to the block of characters
10942
 * @len: length of the block in bytes
10943
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
10944
 *
10945
 * Check that the block of characters is okay as SCdata content [20]
10946
 *
10947
 * Returns the number of bytes to pass if okay, a negative index where an
10948
 *         UTF-8 error occurred otherwise
10949
 */
10950
static int
10951
0
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
10952
0
    int ix;
10953
0
    unsigned char c;
10954
0
    int codepoint;
10955
10956
0
    if ((utf == NULL) || (len <= 0))
10957
0
        return(0);
10958
10959
0
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
10960
0
        c = utf[ix];
10961
0
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10962
0
      if (c >= 0x20)
10963
0
    ix++;
10964
0
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10965
0
          ix++;
10966
0
      else
10967
0
          return(-ix);
10968
0
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10969
0
      if (ix + 2 > len) return(complete ? -ix : ix);
10970
0
      if ((utf[ix+1] & 0xc0 ) != 0x80)
10971
0
          return(-ix);
10972
0
      codepoint = (utf[ix] & 0x1f) << 6;
10973
0
      codepoint |= utf[ix+1] & 0x3f;
10974
0
      if (!xmlIsCharQ(codepoint))
10975
0
          return(-ix);
10976
0
      ix += 2;
10977
0
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10978
0
      if (ix + 3 > len) return(complete ? -ix : ix);
10979
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
10980
0
          ((utf[ix+2] & 0xc0) != 0x80))
10981
0
        return(-ix);
10982
0
      codepoint = (utf[ix] & 0xf) << 12;
10983
0
      codepoint |= (utf[ix+1] & 0x3f) << 6;
10984
0
      codepoint |= utf[ix+2] & 0x3f;
10985
0
      if (!xmlIsCharQ(codepoint))
10986
0
          return(-ix);
10987
0
      ix += 3;
10988
0
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10989
0
      if (ix + 4 > len) return(complete ? -ix : ix);
10990
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
10991
0
          ((utf[ix+2] & 0xc0) != 0x80) ||
10992
0
    ((utf[ix+3] & 0xc0) != 0x80))
10993
0
        return(-ix);
10994
0
      codepoint = (utf[ix] & 0x7) << 18;
10995
0
      codepoint |= (utf[ix+1] & 0x3f) << 12;
10996
0
      codepoint |= (utf[ix+2] & 0x3f) << 6;
10997
0
      codepoint |= utf[ix+3] & 0x3f;
10998
0
      if (!xmlIsCharQ(codepoint))
10999
0
          return(-ix);
11000
0
      ix += 4;
11001
0
  } else       /* unknown encoding */
11002
0
      return(-ix);
11003
0
      }
11004
0
      return(ix);
11005
0
}
11006
11007
/**
11008
 * xmlParseTryOrFinish:
11009
 * @ctxt:  an XML parser context
11010
 * @terminate:  last chunk indicator
11011
 *
11012
 * Try to progress on parsing
11013
 *
11014
 * Returns zero if no parsing was possible
11015
 */
11016
static int
11017
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11018
0
    int ret = 0;
11019
0
    size_t avail;
11020
0
    xmlChar cur, next;
11021
11022
0
    if (ctxt->input == NULL)
11023
0
        return(0);
11024
11025
0
    if ((ctxt->input != NULL) &&
11026
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11027
0
        xmlParserShrink(ctxt);
11028
0
    }
11029
11030
0
    while (ctxt->disableSAX == 0) {
11031
0
        avail = ctxt->input->end - ctxt->input->cur;
11032
0
        if (avail < 1)
11033
0
      goto done;
11034
0
        switch (ctxt->instate) {
11035
0
            case XML_PARSER_EOF:
11036
          /*
11037
     * Document parsing is done !
11038
     */
11039
0
          goto done;
11040
0
            case XML_PARSER_START:
11041
                /*
11042
                 * Very first chars read from the document flow.
11043
                 */
11044
0
                if ((!terminate) && (avail < 4))
11045
0
                    goto done;
11046
11047
                /*
11048
                 * We need more bytes to detect EBCDIC code pages.
11049
                 * See xmlDetectEBCDIC.
11050
                 */
11051
0
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11052
0
                    (!terminate) && (avail < 200))
11053
0
                    goto done;
11054
11055
0
                xmlDetectEncoding(ctxt);
11056
0
                ctxt->instate = XML_PARSER_XML_DECL;
11057
0
    break;
11058
11059
0
            case XML_PARSER_XML_DECL:
11060
0
    if ((!terminate) && (avail < 2))
11061
0
        goto done;
11062
0
    cur = ctxt->input->cur[0];
11063
0
    next = ctxt->input->cur[1];
11064
0
          if ((cur == '<') && (next == '?')) {
11065
        /* PI or XML decl */
11066
0
        if ((!terminate) &&
11067
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11068
0
      goto done;
11069
0
        if ((ctxt->input->cur[2] == 'x') &&
11070
0
      (ctxt->input->cur[3] == 'm') &&
11071
0
      (ctxt->input->cur[4] == 'l') &&
11072
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11073
0
      ret += 5;
11074
0
      xmlParseXMLDecl(ctxt);
11075
0
        } else {
11076
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11077
0
                        if (ctxt->version == NULL) {
11078
0
                            xmlErrMemory(ctxt);
11079
0
                            break;
11080
0
                        }
11081
0
        }
11082
0
    } else {
11083
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11084
0
        if (ctxt->version == NULL) {
11085
0
            xmlErrMemory(ctxt);
11086
0
      break;
11087
0
        }
11088
0
    }
11089
0
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11090
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
11091
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
11092
0
                }
11093
0
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11094
0
                    (!ctxt->disableSAX))
11095
0
                    ctxt->sax->startDocument(ctxt->userData);
11096
0
                ctxt->instate = XML_PARSER_MISC;
11097
0
    break;
11098
0
            case XML_PARSER_START_TAG: {
11099
0
          const xmlChar *name;
11100
0
    const xmlChar *prefix = NULL;
11101
0
    const xmlChar *URI = NULL;
11102
0
                int line = ctxt->input->line;
11103
0
    int nbNs = 0;
11104
11105
0
    if ((!terminate) && (avail < 2))
11106
0
        goto done;
11107
0
    cur = ctxt->input->cur[0];
11108
0
          if (cur != '<') {
11109
0
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11110
0
                                   "Start tag expected, '<' not found");
11111
0
                    ctxt->instate = XML_PARSER_EOF;
11112
0
                    xmlFinishDocument(ctxt);
11113
0
        goto done;
11114
0
    }
11115
0
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11116
0
                    goto done;
11117
0
    if (ctxt->spaceNr == 0)
11118
0
        spacePush(ctxt, -1);
11119
0
    else if (*ctxt->space == -2)
11120
0
        spacePush(ctxt, -1);
11121
0
    else
11122
0
        spacePush(ctxt, *ctxt->space);
11123
0
#ifdef LIBXML_SAX1_ENABLED
11124
0
    if (ctxt->sax2)
11125
0
#endif /* LIBXML_SAX1_ENABLED */
11126
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11127
0
#ifdef LIBXML_SAX1_ENABLED
11128
0
    else
11129
0
        name = xmlParseStartTag(ctxt);
11130
0
#endif /* LIBXML_SAX1_ENABLED */
11131
0
    if (name == NULL) {
11132
0
        spacePop(ctxt);
11133
0
                    ctxt->instate = XML_PARSER_EOF;
11134
0
                    xmlFinishDocument(ctxt);
11135
0
        goto done;
11136
0
    }
11137
0
#ifdef LIBXML_VALID_ENABLED
11138
    /*
11139
     * [ VC: Root Element Type ]
11140
     * The Name in the document type declaration must match
11141
     * the element type of the root element.
11142
     */
11143
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11144
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11145
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11146
0
#endif /* LIBXML_VALID_ENABLED */
11147
11148
    /*
11149
     * Check for an Empty Element.
11150
     */
11151
0
    if ((RAW == '/') && (NXT(1) == '>')) {
11152
0
        SKIP(2);
11153
11154
0
        if (ctxt->sax2) {
11155
0
      if ((ctxt->sax != NULL) &&
11156
0
          (ctxt->sax->endElementNs != NULL) &&
11157
0
          (!ctxt->disableSAX))
11158
0
          ctxt->sax->endElementNs(ctxt->userData, name,
11159
0
                                  prefix, URI);
11160
0
      if (nbNs > 0)
11161
0
          xmlParserNsPop(ctxt, nbNs);
11162
0
#ifdef LIBXML_SAX1_ENABLED
11163
0
        } else {
11164
0
      if ((ctxt->sax != NULL) &&
11165
0
          (ctxt->sax->endElement != NULL) &&
11166
0
          (!ctxt->disableSAX))
11167
0
          ctxt->sax->endElement(ctxt->userData, name);
11168
0
#endif /* LIBXML_SAX1_ENABLED */
11169
0
        }
11170
0
        spacePop(ctxt);
11171
0
    } else if (RAW == '>') {
11172
0
        NEXT;
11173
0
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11174
0
    } else {
11175
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11176
0
           "Couldn't find end of Start Tag %s\n",
11177
0
           name);
11178
0
        nodePop(ctxt);
11179
0
        spacePop(ctxt);
11180
0
                    if (nbNs > 0)
11181
0
                        xmlParserNsPop(ctxt, nbNs);
11182
0
    }
11183
11184
0
                if (ctxt->nameNr == 0)
11185
0
                    ctxt->instate = XML_PARSER_EPILOG;
11186
0
                else
11187
0
                    ctxt->instate = XML_PARSER_CONTENT;
11188
0
                break;
11189
0
      }
11190
0
            case XML_PARSER_CONTENT: {
11191
0
    cur = ctxt->input->cur[0];
11192
11193
0
    if (cur == '<') {
11194
0
                    if ((!terminate) && (avail < 2))
11195
0
                        goto done;
11196
0
        next = ctxt->input->cur[1];
11197
11198
0
                    if (next == '/') {
11199
0
                        ctxt->instate = XML_PARSER_END_TAG;
11200
0
                        break;
11201
0
                    } else if (next == '?') {
11202
0
                        if ((!terminate) &&
11203
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11204
0
                            goto done;
11205
0
                        xmlParsePI(ctxt);
11206
0
                        ctxt->instate = XML_PARSER_CONTENT;
11207
0
                        break;
11208
0
                    } else if (next == '!') {
11209
0
                        if ((!terminate) && (avail < 3))
11210
0
                            goto done;
11211
0
                        next = ctxt->input->cur[2];
11212
11213
0
                        if (next == '-') {
11214
0
                            if ((!terminate) && (avail < 4))
11215
0
                                goto done;
11216
0
                            if (ctxt->input->cur[3] == '-') {
11217
0
                                if ((!terminate) &&
11218
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11219
0
                                    goto done;
11220
0
                                xmlParseComment(ctxt);
11221
0
                                ctxt->instate = XML_PARSER_CONTENT;
11222
0
                                break;
11223
0
                            }
11224
0
                        } else if (next == '[') {
11225
0
                            if ((!terminate) && (avail < 9))
11226
0
                                goto done;
11227
0
                            if ((ctxt->input->cur[2] == '[') &&
11228
0
                                (ctxt->input->cur[3] == 'C') &&
11229
0
                                (ctxt->input->cur[4] == 'D') &&
11230
0
                                (ctxt->input->cur[5] == 'A') &&
11231
0
                                (ctxt->input->cur[6] == 'T') &&
11232
0
                                (ctxt->input->cur[7] == 'A') &&
11233
0
                                (ctxt->input->cur[8] == '[')) {
11234
0
                                SKIP(9);
11235
0
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11236
0
                                break;
11237
0
                            }
11238
0
                        }
11239
0
                    }
11240
0
    } else if (cur == '&') {
11241
0
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11242
0
      goto done;
11243
0
        xmlParseReference(ctxt);
11244
0
                    break;
11245
0
    } else {
11246
        /* TODO Avoid the extra copy, handle directly !!! */
11247
        /*
11248
         * Goal of the following test is:
11249
         *  - minimize calls to the SAX 'character' callback
11250
         *    when they are mergeable
11251
         *  - handle an problem for isBlank when we only parse
11252
         *    a sequence of blank chars and the next one is
11253
         *    not available to check against '<' presence.
11254
         *  - tries to homogenize the differences in SAX
11255
         *    callbacks between the push and pull versions
11256
         *    of the parser.
11257
         */
11258
0
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11259
0
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11260
0
          goto done;
11261
0
                    }
11262
0
                    ctxt->checkIndex = 0;
11263
0
        xmlParseCharDataInternal(ctxt, !terminate);
11264
0
                    break;
11265
0
    }
11266
11267
0
                ctxt->instate = XML_PARSER_START_TAG;
11268
0
    break;
11269
0
      }
11270
0
            case XML_PARSER_END_TAG:
11271
0
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11272
0
        goto done;
11273
0
    if (ctxt->sax2) {
11274
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11275
0
        nameNsPop(ctxt);
11276
0
    }
11277
0
#ifdef LIBXML_SAX1_ENABLED
11278
0
      else
11279
0
        xmlParseEndTag1(ctxt, 0);
11280
0
#endif /* LIBXML_SAX1_ENABLED */
11281
0
    if (ctxt->nameNr == 0) {
11282
0
        ctxt->instate = XML_PARSER_EPILOG;
11283
0
    } else {
11284
0
        ctxt->instate = XML_PARSER_CONTENT;
11285
0
    }
11286
0
    break;
11287
0
            case XML_PARSER_CDATA_SECTION: {
11288
          /*
11289
     * The Push mode need to have the SAX callback for
11290
     * cdataBlock merge back contiguous callbacks.
11291
     */
11292
0
    const xmlChar *term;
11293
11294
0
                if (terminate) {
11295
                    /*
11296
                     * Don't call xmlParseLookupString. If 'terminate'
11297
                     * is set, checkIndex is invalid.
11298
                     */
11299
0
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11300
0
                                           "]]>");
11301
0
                } else {
11302
0
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11303
0
                }
11304
11305
0
    if (term == NULL) {
11306
0
        int tmp, size;
11307
11308
0
                    if (terminate) {
11309
                        /* Unfinished CDATA section */
11310
0
                        size = ctxt->input->end - ctxt->input->cur;
11311
0
                    } else {
11312
0
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11313
0
                            goto done;
11314
0
                        ctxt->checkIndex = 0;
11315
                        /* XXX: Why don't we pass the full buffer? */
11316
0
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11317
0
                    }
11318
0
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11319
0
                    if (tmp <= 0) {
11320
0
                        tmp = -tmp;
11321
0
                        ctxt->input->cur += tmp;
11322
0
                        goto encoding_error;
11323
0
                    }
11324
0
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11325
0
                        if (ctxt->sax->cdataBlock != NULL)
11326
0
                            ctxt->sax->cdataBlock(ctxt->userData,
11327
0
                                                  ctxt->input->cur, tmp);
11328
0
                        else if (ctxt->sax->characters != NULL)
11329
0
                            ctxt->sax->characters(ctxt->userData,
11330
0
                                                  ctxt->input->cur, tmp);
11331
0
                    }
11332
0
                    SKIPL(tmp);
11333
0
    } else {
11334
0
                    int base = term - CUR_PTR;
11335
0
        int tmp;
11336
11337
0
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11338
0
        if ((tmp < 0) || (tmp != base)) {
11339
0
      tmp = -tmp;
11340
0
      ctxt->input->cur += tmp;
11341
0
      goto encoding_error;
11342
0
        }
11343
0
        if ((ctxt->sax != NULL) && (base == 0) &&
11344
0
            (ctxt->sax->cdataBlock != NULL) &&
11345
0
            (!ctxt->disableSAX)) {
11346
      /*
11347
       * Special case to provide identical behaviour
11348
       * between pull and push parsers on enpty CDATA
11349
       * sections
11350
       */
11351
0
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11352
0
           (!strncmp((const char *)&ctxt->input->cur[-9],
11353
0
                     "<![CDATA[", 9)))
11354
0
           ctxt->sax->cdataBlock(ctxt->userData,
11355
0
                                 BAD_CAST "", 0);
11356
0
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11357
0
      (!ctxt->disableSAX)) {
11358
0
      if (ctxt->sax->cdataBlock != NULL)
11359
0
          ctxt->sax->cdataBlock(ctxt->userData,
11360
0
              ctxt->input->cur, base);
11361
0
      else if (ctxt->sax->characters != NULL)
11362
0
          ctxt->sax->characters(ctxt->userData,
11363
0
              ctxt->input->cur, base);
11364
0
        }
11365
0
        SKIPL(base + 3);
11366
0
        ctxt->instate = XML_PARSER_CONTENT;
11367
0
    }
11368
0
    break;
11369
0
      }
11370
0
            case XML_PARSER_MISC:
11371
0
            case XML_PARSER_PROLOG:
11372
0
            case XML_PARSER_EPILOG:
11373
0
    SKIP_BLANKS;
11374
0
                avail = ctxt->input->end - ctxt->input->cur;
11375
0
    if (avail < 1)
11376
0
        goto done;
11377
0
    if (ctxt->input->cur[0] == '<') {
11378
0
                    if ((!terminate) && (avail < 2))
11379
0
                        goto done;
11380
0
                    next = ctxt->input->cur[1];
11381
0
                    if (next == '?') {
11382
0
                        if ((!terminate) &&
11383
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11384
0
                            goto done;
11385
0
                        xmlParsePI(ctxt);
11386
0
                        break;
11387
0
                    } else if (next == '!') {
11388
0
                        if ((!terminate) && (avail < 3))
11389
0
                            goto done;
11390
11391
0
                        if (ctxt->input->cur[2] == '-') {
11392
0
                            if ((!terminate) && (avail < 4))
11393
0
                                goto done;
11394
0
                            if (ctxt->input->cur[3] == '-') {
11395
0
                                if ((!terminate) &&
11396
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11397
0
                                    goto done;
11398
0
                                xmlParseComment(ctxt);
11399
0
                                break;
11400
0
                            }
11401
0
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11402
0
                            if ((!terminate) && (avail < 9))
11403
0
                                goto done;
11404
0
                            if ((ctxt->input->cur[2] == 'D') &&
11405
0
                                (ctxt->input->cur[3] == 'O') &&
11406
0
                                (ctxt->input->cur[4] == 'C') &&
11407
0
                                (ctxt->input->cur[5] == 'T') &&
11408
0
                                (ctxt->input->cur[6] == 'Y') &&
11409
0
                                (ctxt->input->cur[7] == 'P') &&
11410
0
                                (ctxt->input->cur[8] == 'E')) {
11411
0
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11412
0
                                    goto done;
11413
0
                                ctxt->inSubset = 1;
11414
0
                                xmlParseDocTypeDecl(ctxt);
11415
0
                                if (RAW == '[') {
11416
0
                                    ctxt->instate = XML_PARSER_DTD;
11417
0
                                } else {
11418
                                    /*
11419
                                     * Create and update the external subset.
11420
                                     */
11421
0
                                    ctxt->inSubset = 2;
11422
0
                                    if ((ctxt->sax != NULL) &&
11423
0
                                        (!ctxt->disableSAX) &&
11424
0
                                        (ctxt->sax->externalSubset != NULL))
11425
0
                                        ctxt->sax->externalSubset(
11426
0
                                                ctxt->userData,
11427
0
                                                ctxt->intSubName,
11428
0
                                                ctxt->extSubSystem,
11429
0
                                                ctxt->extSubURI);
11430
0
                                    ctxt->inSubset = 0;
11431
0
                                    xmlCleanSpecialAttr(ctxt);
11432
0
                                    ctxt->instate = XML_PARSER_PROLOG;
11433
0
                                }
11434
0
                                break;
11435
0
                            }
11436
0
                        }
11437
0
                    }
11438
0
                }
11439
11440
0
                if (ctxt->instate == XML_PARSER_EPILOG) {
11441
0
                    if (ctxt->errNo == XML_ERR_OK)
11442
0
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11443
0
        ctxt->instate = XML_PARSER_EOF;
11444
0
                    xmlFinishDocument(ctxt);
11445
0
                } else {
11446
0
        ctxt->instate = XML_PARSER_START_TAG;
11447
0
    }
11448
0
    break;
11449
0
            case XML_PARSER_DTD: {
11450
0
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11451
0
                    goto done;
11452
0
    xmlParseInternalSubset(ctxt);
11453
0
    ctxt->inSubset = 2;
11454
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11455
0
        (ctxt->sax->externalSubset != NULL))
11456
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11457
0
          ctxt->extSubSystem, ctxt->extSubURI);
11458
0
    ctxt->inSubset = 0;
11459
0
    xmlCleanSpecialAttr(ctxt);
11460
0
    ctxt->instate = XML_PARSER_PROLOG;
11461
0
                break;
11462
0
      }
11463
0
            default:
11464
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11465
0
      "PP: internal error\n");
11466
0
    ctxt->instate = XML_PARSER_EOF;
11467
0
    break;
11468
0
  }
11469
0
    }
11470
0
done:
11471
0
    return(ret);
11472
0
encoding_error:
11473
    /* Only report the first error */
11474
0
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
11475
0
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
11476
0
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
11477
0
    }
11478
0
    return(0);
11479
0
}
11480
11481
/**
11482
 * xmlParseChunk:
11483
 * @ctxt:  an XML parser context
11484
 * @chunk:  chunk of memory
11485
 * @size:  size of chunk in bytes
11486
 * @terminate:  last chunk indicator
11487
 *
11488
 * Parse a chunk of memory in push parser mode.
11489
 *
11490
 * Assumes that the parser context was initialized with
11491
 * xmlCreatePushParserCtxt.
11492
 *
11493
 * The last chunk, which will often be empty, must be marked with
11494
 * the @terminate flag. With the default SAX callbacks, the resulting
11495
 * document will be available in ctxt->myDoc. This pointer will not
11496
 * be freed by the library.
11497
 *
11498
 * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11499
 * The push parser doesn't support recovery mode.
11500
 *
11501
 * Returns an xmlParserErrors code (0 on success).
11502
 */
11503
int
11504
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11505
0
              int terminate) {
11506
0
    size_t curBase;
11507
0
    size_t maxLength;
11508
0
    int end_in_lf = 0;
11509
11510
0
    if ((ctxt == NULL) || (size < 0))
11511
0
        return(XML_ERR_ARGUMENT);
11512
0
    if (ctxt->disableSAX != 0)
11513
0
        return(ctxt->errNo);
11514
0
    if (ctxt->input == NULL)
11515
0
        return(XML_ERR_INTERNAL_ERROR);
11516
11517
0
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11518
0
    if (ctxt->instate == XML_PARSER_START)
11519
0
        xmlCtxtInitializeLate(ctxt);
11520
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11521
0
        (chunk[size - 1] == '\r')) {
11522
0
  end_in_lf = 1;
11523
0
  size--;
11524
0
    }
11525
11526
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11527
0
        (ctxt->input->buf != NULL))  {
11528
0
  size_t pos = ctxt->input->cur - ctxt->input->base;
11529
0
  int res;
11530
11531
0
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11532
0
        xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11533
0
  if (res < 0) {
11534
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11535
0
      xmlHaltParser(ctxt);
11536
0
      return(ctxt->errNo);
11537
0
  }
11538
0
    }
11539
11540
0
    xmlParseTryOrFinish(ctxt, terminate);
11541
11542
0
    curBase = ctxt->input->cur - ctxt->input->base;
11543
0
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11544
0
                XML_MAX_HUGE_LENGTH :
11545
0
                XML_MAX_LOOKUP_LIMIT;
11546
0
    if (curBase > maxLength) {
11547
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11548
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11549
0
        xmlHaltParser(ctxt);
11550
0
    }
11551
11552
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11553
0
        return(ctxt->errNo);
11554
11555
0
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11556
0
        (ctxt->input->buf != NULL)) {
11557
0
  size_t pos = ctxt->input->cur - ctxt->input->base;
11558
0
        int res;
11559
11560
0
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11561
0
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11562
0
        if (res < 0) {
11563
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11564
0
            xmlHaltParser(ctxt);
11565
0
            return(ctxt->errNo);
11566
0
        }
11567
0
    }
11568
0
    if (terminate) {
11569
  /*
11570
   * Check for termination
11571
   */
11572
0
        if ((ctxt->instate != XML_PARSER_EOF) &&
11573
0
            (ctxt->instate != XML_PARSER_EPILOG)) {
11574
0
            if (ctxt->nameNr > 0) {
11575
0
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11576
0
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11577
0
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11578
0
                        "Premature end of data in tag %s line %d\n",
11579
0
                        name, line, NULL);
11580
0
            } else if (ctxt->instate == XML_PARSER_START) {
11581
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11582
0
            } else {
11583
0
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11584
0
                               "Start tag expected, '<' not found\n");
11585
0
            }
11586
0
        } else if ((ctxt->input->buf != NULL) &&
11587
0
                   (ctxt->input->buf->encoder != NULL) &&
11588
0
                   (ctxt->input->buf->error == 0) &&
11589
0
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11590
0
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11591
0
                           "Truncated multi-byte sequence at EOF\n");
11592
0
        }
11593
0
  if (ctxt->instate != XML_PARSER_EOF) {
11594
0
            ctxt->instate = XML_PARSER_EOF;
11595
0
            xmlFinishDocument(ctxt);
11596
0
  }
11597
0
    }
11598
0
    if (ctxt->wellFormed == 0)
11599
0
  return((xmlParserErrors) ctxt->errNo);
11600
0
    else
11601
0
        return(0);
11602
0
}
11603
11604
/************************************************************************
11605
 *                  *
11606
 *    I/O front end functions to the parser     *
11607
 *                  *
11608
 ************************************************************************/
11609
11610
/**
11611
 * xmlCreatePushParserCtxt:
11612
 * @sax:  a SAX handler (optional)
11613
 * @user_data:  user data for SAX callbacks (optional)
11614
 * @chunk:  initial chunk (optional, deprecated)
11615
 * @size:  size of initial chunk in bytes
11616
 * @filename:  file name or URI (optional)
11617
 *
11618
 * Create a parser context for using the XML parser in push mode.
11619
 * See xmlParseChunk.
11620
 *
11621
 * Passing an initial chunk is useless and deprecated.
11622
 *
11623
 * @filename is used as base URI to fetch external entities and for
11624
 * error reports.
11625
 *
11626
 * Returns the new parser context or NULL in case of error.
11627
 */
11628
11629
xmlParserCtxtPtr
11630
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11631
0
                        const char *chunk, int size, const char *filename) {
11632
0
    xmlParserCtxtPtr ctxt;
11633
0
    xmlParserInputPtr input;
11634
11635
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11636
0
    if (ctxt == NULL)
11637
0
  return(NULL);
11638
11639
0
    ctxt->options &= ~XML_PARSE_NODICT;
11640
0
    ctxt->dictNames = 1;
11641
11642
0
    input = xmlNewInputPush(ctxt, filename, chunk, size, NULL);
11643
0
    if (input == NULL) {
11644
0
  xmlFreeParserCtxt(ctxt);
11645
0
  return(NULL);
11646
0
    }
11647
0
    inputPush(ctxt, input);
11648
11649
0
    return(ctxt);
11650
0
}
11651
#endif /* LIBXML_PUSH_ENABLED */
11652
11653
/**
11654
 * xmlStopParser:
11655
 * @ctxt:  an XML parser context
11656
 *
11657
 * Blocks further parser processing
11658
 */
11659
void
11660
0
xmlStopParser(xmlParserCtxtPtr ctxt) {
11661
0
    if (ctxt == NULL)
11662
0
        return;
11663
0
    xmlHaltParser(ctxt);
11664
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11665
0
        ctxt->errNo = XML_ERR_USER_STOP;
11666
0
}
11667
11668
/**
11669
 * xmlCreateIOParserCtxt:
11670
 * @sax:  a SAX handler (optional)
11671
 * @user_data:  user data for SAX callbacks (optional)
11672
 * @ioread:  an I/O read function
11673
 * @ioclose:  an I/O close function (optional)
11674
 * @ioctx:  an I/O handler
11675
 * @enc:  the charset encoding if known (deprecated)
11676
 *
11677
 * Create a parser context for using the XML parser with an existing
11678
 * I/O stream
11679
 *
11680
 * Returns the new parser context or NULL
11681
 */
11682
xmlParserCtxtPtr
11683
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11684
                      xmlInputReadCallback ioread,
11685
                      xmlInputCloseCallback ioclose,
11686
0
                      void *ioctx, xmlCharEncoding enc) {
11687
0
    xmlParserCtxtPtr ctxt;
11688
0
    xmlParserInputPtr input;
11689
0
    const char *encoding;
11690
11691
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11692
0
    if (ctxt == NULL)
11693
0
  return(NULL);
11694
11695
0
    encoding = xmlGetCharEncodingName(enc);
11696
0
    input = xmlNewInputIO(ctxt, NULL, ioread, ioclose, ioctx, encoding, 0);
11697
0
    if (input == NULL) {
11698
0
  xmlFreeParserCtxt(ctxt);
11699
0
        return (NULL);
11700
0
    }
11701
0
    inputPush(ctxt, input);
11702
11703
0
    return(ctxt);
11704
0
}
11705
11706
#ifdef LIBXML_VALID_ENABLED
11707
/************************************************************************
11708
 *                  *
11709
 *    Front ends when parsing a DTD       *
11710
 *                  *
11711
 ************************************************************************/
11712
11713
/**
11714
 * xmlIOParseDTD:
11715
 * @sax:  the SAX handler block or NULL
11716
 * @input:  an Input Buffer
11717
 * @enc:  the charset encoding if known
11718
 *
11719
 * Load and parse a DTD
11720
 *
11721
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11722
 * @input will be freed by the function in any case.
11723
 */
11724
11725
xmlDtdPtr
11726
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11727
0
        xmlCharEncoding enc) {
11728
0
    xmlDtdPtr ret = NULL;
11729
0
    xmlParserCtxtPtr ctxt;
11730
0
    xmlParserInputPtr pinput = NULL;
11731
11732
0
    if (input == NULL)
11733
0
  return(NULL);
11734
11735
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11736
0
    if (ctxt == NULL) {
11737
0
        xmlFreeParserInputBuffer(input);
11738
0
  return(NULL);
11739
0
    }
11740
11741
    /*
11742
     * generate a parser input from the I/O handler
11743
     */
11744
11745
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11746
0
    if (pinput == NULL) {
11747
0
        xmlFreeParserInputBuffer(input);
11748
0
  xmlFreeParserCtxt(ctxt);
11749
0
  return(NULL);
11750
0
    }
11751
11752
    /*
11753
     * plug some encoding conversion routines here.
11754
     */
11755
0
    if (xmlPushInput(ctxt, pinput) < 0) {
11756
0
  xmlFreeParserCtxt(ctxt);
11757
0
  return(NULL);
11758
0
    }
11759
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11760
0
        xmlSwitchEncoding(ctxt, enc);
11761
0
    }
11762
11763
    /*
11764
     * let's parse that entity knowing it's an external subset.
11765
     */
11766
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11767
0
    if (ctxt->myDoc == NULL) {
11768
0
  xmlErrMemory(ctxt);
11769
0
  return(NULL);
11770
0
    }
11771
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11772
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11773
0
                                 BAD_CAST "none", BAD_CAST "none");
11774
11775
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11776
11777
0
    if (ctxt->myDoc != NULL) {
11778
0
  if (ctxt->wellFormed) {
11779
0
      ret = ctxt->myDoc->extSubset;
11780
0
      ctxt->myDoc->extSubset = NULL;
11781
0
      if (ret != NULL) {
11782
0
    xmlNodePtr tmp;
11783
11784
0
    ret->doc = NULL;
11785
0
    tmp = ret->children;
11786
0
    while (tmp != NULL) {
11787
0
        tmp->doc = NULL;
11788
0
        tmp = tmp->next;
11789
0
    }
11790
0
      }
11791
0
  } else {
11792
0
      ret = NULL;
11793
0
  }
11794
0
        xmlFreeDoc(ctxt->myDoc);
11795
0
        ctxt->myDoc = NULL;
11796
0
    }
11797
0
    xmlFreeParserCtxt(ctxt);
11798
11799
0
    return(ret);
11800
0
}
11801
11802
/**
11803
 * xmlSAXParseDTD:
11804
 * @sax:  the SAX handler block
11805
 * @ExternalID:  a NAME* containing the External ID of the DTD
11806
 * @SystemID:  a NAME* containing the URL to the DTD
11807
 *
11808
 * DEPRECATED: Don't use.
11809
 *
11810
 * Load and parse an external subset.
11811
 *
11812
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11813
 */
11814
11815
xmlDtdPtr
11816
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11817
0
                          const xmlChar *SystemID) {
11818
0
    xmlDtdPtr ret = NULL;
11819
0
    xmlParserCtxtPtr ctxt;
11820
0
    xmlParserInputPtr input = NULL;
11821
0
    xmlChar* systemIdCanonic;
11822
11823
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11824
11825
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11826
0
    if (ctxt == NULL) {
11827
0
  return(NULL);
11828
0
    }
11829
11830
    /*
11831
     * Canonicalise the system ID
11832
     */
11833
0
    systemIdCanonic = xmlCanonicPath(SystemID);
11834
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11835
0
  xmlFreeParserCtxt(ctxt);
11836
0
  return(NULL);
11837
0
    }
11838
11839
    /*
11840
     * Ask the Entity resolver to load the damn thing
11841
     */
11842
11843
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11844
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11845
0
                                   systemIdCanonic);
11846
0
    if (input == NULL) {
11847
0
  xmlFreeParserCtxt(ctxt);
11848
0
  if (systemIdCanonic != NULL)
11849
0
      xmlFree(systemIdCanonic);
11850
0
  return(NULL);
11851
0
    }
11852
11853
    /*
11854
     * plug some encoding conversion routines here.
11855
     */
11856
0
    if (xmlPushInput(ctxt, input) < 0) {
11857
0
  xmlFreeParserCtxt(ctxt);
11858
0
  if (systemIdCanonic != NULL)
11859
0
      xmlFree(systemIdCanonic);
11860
0
  return(NULL);
11861
0
    }
11862
11863
0
    xmlDetectEncoding(ctxt);
11864
11865
0
    if (input->filename == NULL)
11866
0
  input->filename = (char *) systemIdCanonic;
11867
0
    else
11868
0
  xmlFree(systemIdCanonic);
11869
11870
    /*
11871
     * let's parse that entity knowing it's an external subset.
11872
     */
11873
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11874
0
    if (ctxt->myDoc == NULL) {
11875
0
  xmlErrMemory(ctxt);
11876
0
  xmlFreeParserCtxt(ctxt);
11877
0
  return(NULL);
11878
0
    }
11879
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11880
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11881
0
                                 ExternalID, SystemID);
11882
0
    if (ctxt->myDoc->extSubset == NULL) {
11883
0
        xmlFreeDoc(ctxt->myDoc);
11884
0
        xmlFreeParserCtxt(ctxt);
11885
0
        return(NULL);
11886
0
    }
11887
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11888
11889
0
    if (ctxt->myDoc != NULL) {
11890
0
  if (ctxt->wellFormed) {
11891
0
      ret = ctxt->myDoc->extSubset;
11892
0
      ctxt->myDoc->extSubset = NULL;
11893
0
      if (ret != NULL) {
11894
0
    xmlNodePtr tmp;
11895
11896
0
    ret->doc = NULL;
11897
0
    tmp = ret->children;
11898
0
    while (tmp != NULL) {
11899
0
        tmp->doc = NULL;
11900
0
        tmp = tmp->next;
11901
0
    }
11902
0
      }
11903
0
  } else {
11904
0
      ret = NULL;
11905
0
  }
11906
0
        xmlFreeDoc(ctxt->myDoc);
11907
0
        ctxt->myDoc = NULL;
11908
0
    }
11909
0
    xmlFreeParserCtxt(ctxt);
11910
11911
0
    return(ret);
11912
0
}
11913
11914
11915
/**
11916
 * xmlParseDTD:
11917
 * @ExternalID:  a NAME* containing the External ID of the DTD
11918
 * @SystemID:  a NAME* containing the URL to the DTD
11919
 *
11920
 * Load and parse an external subset.
11921
 *
11922
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11923
 */
11924
11925
xmlDtdPtr
11926
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11927
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11928
0
}
11929
#endif /* LIBXML_VALID_ENABLED */
11930
11931
/************************************************************************
11932
 *                  *
11933
 *    Front ends when parsing an Entity     *
11934
 *                  *
11935
 ************************************************************************/
11936
11937
static xmlNodePtr
11938
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11939
0
                    int hasTextDecl, int buildTree) {
11940
0
    xmlNodePtr root = NULL;
11941
0
    xmlNodePtr list = NULL;
11942
0
    xmlChar *rootName = BAD_CAST "#root";
11943
0
    int result;
11944
11945
0
    if (buildTree) {
11946
0
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11947
0
        if (root == NULL) {
11948
0
            xmlErrMemory(ctxt);
11949
0
            goto error;
11950
0
        }
11951
0
    }
11952
11953
0
    if (xmlPushInput(ctxt, input) < 0)
11954
0
        goto error;
11955
11956
0
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11957
0
    spacePush(ctxt, -1);
11958
11959
0
    if (buildTree)
11960
0
        nodePush(ctxt, root);
11961
11962
0
    if (hasTextDecl) {
11963
0
        xmlDetectEncoding(ctxt);
11964
11965
        /*
11966
         * Parse a possible text declaration first
11967
         */
11968
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11969
0
            (IS_BLANK_CH(NXT(5)))) {
11970
0
            xmlParseTextDecl(ctxt);
11971
            /*
11972
             * An XML-1.0 document can't reference an entity not XML-1.0
11973
             */
11974
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11975
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11976
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11977
0
                               "Version mismatch between document and "
11978
0
                               "entity\n");
11979
0
            }
11980
0
        }
11981
0
    }
11982
11983
0
    xmlParseContentInternal(ctxt);
11984
11985
0
    if (ctxt->input->cur < ctxt->input->end)
11986
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11987
11988
0
    if ((ctxt->wellFormed) ||
11989
0
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
11990
0
        if (root != NULL) {
11991
0
            xmlNodePtr cur;
11992
11993
            /*
11994
             * Return the newly created nodeset after unlinking it from
11995
             * its pseudo parent.
11996
             */
11997
0
            cur = root->children;
11998
0
            list = cur;
11999
0
            while (cur != NULL) {
12000
0
                cur->parent = NULL;
12001
0
                cur = cur->next;
12002
0
            }
12003
0
            root->children = NULL;
12004
0
            root->last = NULL;
12005
0
        }
12006
0
    }
12007
12008
    /*
12009
     * Read the rest of the stream in case of errors. We want
12010
     * to account for the whole entity size.
12011
     */
12012
0
    do {
12013
0
        ctxt->input->cur = ctxt->input->end;
12014
0
        xmlParserShrink(ctxt);
12015
0
        result = xmlParserGrow(ctxt);
12016
0
    } while (result > 0);
12017
12018
0
    if (buildTree)
12019
0
        nodePop(ctxt);
12020
12021
0
    namePop(ctxt);
12022
0
    spacePop(ctxt);
12023
12024
    /* xmlPopInput would free the stream */
12025
0
    inputPop(ctxt);
12026
12027
0
error:
12028
0
    xmlFreeNode(root);
12029
12030
0
    return(list);
12031
0
}
12032
12033
static void
12034
0
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12035
0
    xmlParserInputPtr input;
12036
0
    xmlNodePtr list;
12037
0
    unsigned long consumed;
12038
0
    int isExternal;
12039
0
    int buildTree;
12040
0
    int oldMinNsIndex;
12041
0
    int oldNodelen, oldNodemem;
12042
12043
0
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12044
0
    buildTree = (ctxt->node != NULL);
12045
12046
    /*
12047
     * Recursion check
12048
     */
12049
0
    if (ent->flags & XML_ENT_EXPANDING) {
12050
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12051
0
        xmlHaltParser(ctxt);
12052
0
        goto error;
12053
0
    }
12054
12055
    /*
12056
     * Load entity
12057
     */
12058
0
    input = xmlNewEntityInputStream(ctxt, ent);
12059
0
    if (input == NULL)
12060
0
        goto error;
12061
12062
    /*
12063
     * When building a tree, we need to limit the scope of namespace
12064
     * declarations, so that entities don't reference xmlNs structs
12065
     * from the parent of a reference.
12066
     */
12067
0
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
12068
0
    if (buildTree)
12069
0
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
12070
12071
0
    oldNodelen = ctxt->nodelen;
12072
0
    oldNodemem = ctxt->nodemem;
12073
0
    ctxt->nodelen = 0;
12074
0
    ctxt->nodemem = 0;
12075
12076
    /*
12077
     * Parse content
12078
     *
12079
     * This initiates a recursive call chain:
12080
     *
12081
     * - xmlCtxtParseContent
12082
     * - xmlParseContentInternal
12083
     * - xmlParseReference
12084
     * - xmlCtxtParseEntity
12085
     *
12086
     * The nesting depth is limited by the maximum number of inputs,
12087
     * see xmlPushInput.
12088
     *
12089
     * It's possible to make this non-recursive (minNsIndex must be
12090
     * stored in the input struct) at the expense of code readability.
12091
     */
12092
12093
0
    ent->flags |= XML_ENT_EXPANDING;
12094
12095
0
    list = xmlCtxtParseContent(ctxt, input, isExternal, buildTree);
12096
12097
0
    ent->flags &= ~XML_ENT_EXPANDING;
12098
12099
0
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
12100
0
    ctxt->nodelen = oldNodelen;
12101
0
    ctxt->nodemem = oldNodemem;
12102
12103
    /*
12104
     * Entity size accounting
12105
     */
12106
0
    consumed = input->consumed;
12107
0
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12108
12109
0
    if ((ent->flags & XML_ENT_CHECKED) == 0)
12110
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
12111
12112
0
    if ((ent->flags & XML_ENT_PARSED) == 0) {
12113
0
        if (isExternal)
12114
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12115
12116
0
        ent->children = list;
12117
12118
0
        while (list != NULL) {
12119
0
            list->parent = (xmlNodePtr) ent;
12120
0
            if (list->next == NULL)
12121
0
                ent->last = list;
12122
0
            list = list->next;
12123
0
        }
12124
0
    } else {
12125
0
        xmlFreeNodeList(list);
12126
0
    }
12127
12128
0
    xmlFreeInputStream(input);
12129
12130
0
error:
12131
0
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12132
0
}
12133
12134
/**
12135
 * xmlParseCtxtExternalEntity:
12136
 * @ctxt:  the existing parsing context
12137
 * @URL:  the URL for the entity to load
12138
 * @ID:  the System ID for the entity to load
12139
 * @listOut:  the return value for the set of parsed nodes
12140
 *
12141
 * Parse an external general entity within an existing parsing context
12142
 * An external general parsed entity is well-formed if it matches the
12143
 * production labeled extParsedEnt.
12144
 *
12145
 * [78] extParsedEnt ::= TextDecl? content
12146
 *
12147
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12148
 *    the parser error code otherwise
12149
 */
12150
12151
int
12152
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12153
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
12154
0
    xmlParserInputPtr input;
12155
0
    xmlNodePtr list;
12156
12157
0
    if (listOut != NULL)
12158
0
        *listOut = NULL;
12159
12160
0
    if (ctxt == NULL)
12161
0
        return(XML_ERR_ARGUMENT);
12162
12163
0
    input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12164
0
    if (input == NULL)
12165
0
        return(ctxt->errNo);
12166
12167
0
    xmlCtxtInitializeLate(ctxt);
12168
12169
0
    list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 1, 1);
12170
0
    if (listOut != NULL)
12171
0
        *listOut = list;
12172
0
    else
12173
0
        xmlFreeNodeList(list);
12174
12175
0
    xmlFreeInputStream(input);
12176
0
    return(ctxt->errNo);
12177
0
}
12178
12179
#ifdef LIBXML_SAX1_ENABLED
12180
/**
12181
 * xmlParseExternalEntity:
12182
 * @doc:  the document the chunk pertains to
12183
 * @sax:  the SAX handler block (possibly NULL)
12184
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12185
 * @depth:  Used for loop detection, use 0
12186
 * @URL:  the URL for the entity to load
12187
 * @ID:  the System ID for the entity to load
12188
 * @list:  the return value for the set of parsed nodes
12189
 *
12190
 * DEPRECATED: Use xmlParseCtxtExternalEntity.
12191
 *
12192
 * Parse an external general entity
12193
 * An external general parsed entity is well-formed if it matches the
12194
 * production labeled extParsedEnt.
12195
 *
12196
 * [78] extParsedEnt ::= TextDecl? content
12197
 *
12198
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12199
 *    the parser error code otherwise
12200
 */
12201
12202
int
12203
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12204
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12205
0
    xmlParserCtxtPtr ctxt;
12206
0
    int ret;
12207
12208
0
    if (list != NULL)
12209
0
        *list = NULL;
12210
12211
0
    if (doc == NULL)
12212
0
        return(XML_ERR_ARGUMENT);
12213
12214
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12215
0
    if (ctxt == NULL)
12216
0
        return(XML_ERR_NO_MEMORY);
12217
12218
0
    ctxt->depth = depth;
12219
0
    ctxt->myDoc = doc;
12220
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12221
12222
0
    xmlFreeParserCtxt(ctxt);
12223
0
    return(ret);
12224
0
}
12225
12226
/**
12227
 * xmlParseBalancedChunkMemory:
12228
 * @doc:  the document the chunk pertains to (must not be NULL)
12229
 * @sax:  the SAX handler block (possibly NULL)
12230
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12231
 * @depth:  Used for loop detection, use 0
12232
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12233
 * @lst:  the return value for the set of parsed nodes
12234
 *
12235
 * Parse a well-balanced chunk of an XML document
12236
 * called by the parser
12237
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12238
 * the content production in the XML grammar:
12239
 *
12240
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12241
 *
12242
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12243
 *    the parser error code otherwise
12244
 */
12245
12246
int
12247
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12248
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12249
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12250
0
                                                depth, string, lst, 0 );
12251
0
}
12252
#endif /* LIBXML_SAX1_ENABLED */
12253
12254
/**
12255
 * xmlParseInNodeContext:
12256
 * @node:  the context node
12257
 * @data:  the input string
12258
 * @datalen:  the input string length in bytes
12259
 * @options:  a combination of xmlParserOption
12260
 * @lst:  the return value for the set of parsed nodes
12261
 *
12262
 * Parse a well-balanced chunk of an XML document
12263
 * within the context (DTD, namespaces, etc ...) of the given node.
12264
 *
12265
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12266
 * the content production in the XML grammar:
12267
 *
12268
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12269
 *
12270
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12271
 * error code otherwise
12272
 */
12273
xmlParserErrors
12274
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12275
0
                      int options, xmlNodePtr *lst) {
12276
0
    xmlParserCtxtPtr ctxt;
12277
0
    xmlDocPtr doc = NULL;
12278
0
    xmlNodePtr fake, cur;
12279
0
    int nsnr = 0;
12280
12281
0
    xmlParserErrors ret = XML_ERR_OK;
12282
12283
    /*
12284
     * check all input parameters, grab the document
12285
     */
12286
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12287
0
        return(XML_ERR_ARGUMENT);
12288
0
    switch (node->type) {
12289
0
        case XML_ELEMENT_NODE:
12290
0
        case XML_ATTRIBUTE_NODE:
12291
0
        case XML_TEXT_NODE:
12292
0
        case XML_CDATA_SECTION_NODE:
12293
0
        case XML_ENTITY_REF_NODE:
12294
0
        case XML_PI_NODE:
12295
0
        case XML_COMMENT_NODE:
12296
0
        case XML_DOCUMENT_NODE:
12297
0
        case XML_HTML_DOCUMENT_NODE:
12298
0
      break;
12299
0
  default:
12300
0
      return(XML_ERR_INTERNAL_ERROR);
12301
12302
0
    }
12303
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12304
0
           (node->type != XML_DOCUMENT_NODE) &&
12305
0
     (node->type != XML_HTML_DOCUMENT_NODE))
12306
0
  node = node->parent;
12307
0
    if (node == NULL)
12308
0
  return(XML_ERR_INTERNAL_ERROR);
12309
0
    if (node->type == XML_ELEMENT_NODE)
12310
0
  doc = node->doc;
12311
0
    else
12312
0
        doc = (xmlDocPtr) node;
12313
0
    if (doc == NULL)
12314
0
  return(XML_ERR_INTERNAL_ERROR);
12315
12316
    /*
12317
     * allocate a context and set-up everything not related to the
12318
     * node position in the tree
12319
     */
12320
0
    if (doc->type == XML_DOCUMENT_NODE)
12321
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12322
0
#ifdef LIBXML_HTML_ENABLED
12323
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12324
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12325
        /*
12326
         * When parsing in context, it makes no sense to add implied
12327
         * elements like html/body/etc...
12328
         */
12329
0
        options |= HTML_PARSE_NOIMPLIED;
12330
0
    }
12331
0
#endif
12332
0
    else
12333
0
        return(XML_ERR_INTERNAL_ERROR);
12334
12335
0
    if (ctxt == NULL)
12336
0
        return(XML_ERR_NO_MEMORY);
12337
12338
    /*
12339
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12340
     * We need a dictionary for xmlCtxtInitializeLate, so if there's no doc dict
12341
     * we must wait until the last moment to free the original one.
12342
     */
12343
0
    if (doc->dict != NULL) {
12344
0
        if (ctxt->dict != NULL)
12345
0
      xmlDictFree(ctxt->dict);
12346
0
  ctxt->dict = doc->dict;
12347
0
    } else {
12348
0
        options |= XML_PARSE_NODICT;
12349
0
        ctxt->dictNames = 0;
12350
0
    }
12351
12352
0
    if (doc->encoding != NULL)
12353
0
        xmlSwitchEncodingName(ctxt, (const char *) doc->encoding);
12354
12355
0
    xmlCtxtUseOptions(ctxt, options);
12356
0
    xmlCtxtInitializeLate(ctxt);
12357
0
    ctxt->myDoc = doc;
12358
    /* parsing in context, i.e. as within existing content */
12359
0
    ctxt->input_id = 2;
12360
12361
    /*
12362
     * TODO: Use xmlCtxtParseContent
12363
     */
12364
12365
0
    fake = xmlNewDocComment(node->doc, NULL);
12366
0
    if (fake == NULL) {
12367
0
        xmlFreeParserCtxt(ctxt);
12368
0
  return(XML_ERR_NO_MEMORY);
12369
0
    }
12370
0
    xmlAddChild(node, fake);
12371
12372
0
    if (node->type == XML_ELEMENT_NODE)
12373
0
  nodePush(ctxt, node);
12374
12375
0
    if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
12376
  /*
12377
   * initialize the SAX2 namespaces stack
12378
   */
12379
0
  cur = node;
12380
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12381
0
      xmlNsPtr ns = cur->nsDef;
12382
0
            xmlHashedString hprefix, huri;
12383
12384
0
      while (ns != NULL) {
12385
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12386
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12387
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12388
0
                    nsnr++;
12389
0
    ns = ns->next;
12390
0
      }
12391
0
      cur = cur->parent;
12392
0
  }
12393
0
    }
12394
12395
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12396
  /*
12397
   * ID/IDREF registration will be done in xmlValidateElement below
12398
   */
12399
0
  ctxt->loadsubset |= XML_SKIP_IDS;
12400
0
    }
12401
12402
0
#ifdef LIBXML_HTML_ENABLED
12403
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
12404
0
        __htmlParseContent(ctxt);
12405
0
    else
12406
0
#endif
12407
0
  xmlParseContentInternal(ctxt);
12408
12409
0
    if (ctxt->input->cur < ctxt->input->end)
12410
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12411
12412
0
    xmlParserNsPop(ctxt, nsnr);
12413
12414
0
    if ((ctxt->wellFormed) ||
12415
0
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12416
0
        ret = XML_ERR_OK;
12417
0
    } else {
12418
0
  ret = (xmlParserErrors) ctxt->errNo;
12419
0
    }
12420
12421
    /*
12422
     * Return the newly created nodeset after unlinking it from
12423
     * the pseudo sibling.
12424
     */
12425
12426
0
    cur = fake->next;
12427
0
    fake->next = NULL;
12428
0
    node->last = fake;
12429
12430
0
    if (cur != NULL) {
12431
0
  cur->prev = NULL;
12432
0
    }
12433
12434
0
    *lst = cur;
12435
12436
0
    while (cur != NULL) {
12437
0
  cur->parent = NULL;
12438
0
  cur = cur->next;
12439
0
    }
12440
12441
0
    xmlUnlinkNode(fake);
12442
0
    xmlFreeNode(fake);
12443
12444
12445
0
    if (ret != XML_ERR_OK) {
12446
0
        xmlFreeNodeList(*lst);
12447
0
  *lst = NULL;
12448
0
    }
12449
12450
0
    if (doc->dict != NULL)
12451
0
        ctxt->dict = NULL;
12452
0
    xmlFreeParserCtxt(ctxt);
12453
12454
0
    return(ret);
12455
0
}
12456
12457
#ifdef LIBXML_SAX1_ENABLED
12458
/**
12459
 * xmlParseBalancedChunkMemoryRecover:
12460
 * @doc:  the document the chunk pertains to (must not be NULL)
12461
 * @sax:  the SAX handler block (possibly NULL)
12462
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12463
 * @depth:  Used for loop detection, use 0
12464
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12465
 * @listOut:  the return value for the set of parsed nodes
12466
 * @recover: return nodes even if the data is broken (use 0)
12467
 *
12468
 * Parse a well-balanced chunk of an XML document
12469
 *
12470
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12471
 * the content production in the XML grammar:
12472
 *
12473
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12474
 *
12475
 * Returns 0 if the chunk is well balanced, or thehe parser error code
12476
 * otherwise.
12477
 *
12478
 * In case recover is set to 1, the nodelist will not be empty even if
12479
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12480
 * some extent.
12481
 */
12482
int
12483
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12484
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12485
0
     int recover) {
12486
0
    xmlParserCtxtPtr ctxt;
12487
0
    xmlParserInputPtr input;
12488
0
    xmlNodePtr list;
12489
0
    int ret;
12490
12491
0
    if (listOut != NULL)
12492
0
        *listOut = NULL;
12493
12494
0
    if (string == NULL)
12495
0
        return(XML_ERR_ARGUMENT);
12496
12497
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12498
0
    if (ctxt == NULL)
12499
0
        return(XML_ERR_NO_MEMORY);
12500
12501
0
    xmlCtxtInitializeLate(ctxt);
12502
12503
0
    ctxt->depth = depth;
12504
0
    ctxt->myDoc = doc;
12505
0
    if (recover) {
12506
0
        ctxt->options |= XML_PARSE_RECOVER;
12507
0
        ctxt->recovery = 1;
12508
0
    }
12509
12510
0
    input = xmlNewStringInputStream(ctxt, string);
12511
0
    if (input == NULL)
12512
0
        return(ctxt->errNo);
12513
12514
0
    list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 0, 1);
12515
0
    if (listOut != NULL)
12516
0
        *listOut = list;
12517
0
    else
12518
0
        xmlFreeNodeList(list);
12519
12520
0
    ret = ctxt->errNo;
12521
12522
0
    xmlFreeInputStream(input);
12523
0
    xmlFreeParserCtxt(ctxt);
12524
0
    return(ret);
12525
0
}
12526
12527
/**
12528
 * xmlSAXParseEntity:
12529
 * @sax:  the SAX handler block
12530
 * @filename:  the filename
12531
 *
12532
 * DEPRECATED: Don't use.
12533
 *
12534
 * parse an XML external entity out of context and build a tree.
12535
 * It use the given SAX function block to handle the parsing callback.
12536
 * If sax is NULL, fallback to the default DOM tree building routines.
12537
 *
12538
 * [78] extParsedEnt ::= TextDecl? content
12539
 *
12540
 * This correspond to a "Well Balanced" chunk
12541
 *
12542
 * Returns the resulting document tree
12543
 */
12544
12545
xmlDocPtr
12546
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12547
0
    xmlDocPtr ret;
12548
0
    xmlParserCtxtPtr ctxt;
12549
12550
0
    ctxt = xmlCreateFileParserCtxt(filename);
12551
0
    if (ctxt == NULL) {
12552
0
  return(NULL);
12553
0
    }
12554
0
    if (sax != NULL) {
12555
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12556
0
            *ctxt->sax = *sax;
12557
0
        } else {
12558
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12559
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12560
0
        }
12561
0
        ctxt->userData = NULL;
12562
0
    }
12563
12564
0
    xmlParseExtParsedEnt(ctxt);
12565
12566
0
    if (ctxt->wellFormed) {
12567
0
  ret = ctxt->myDoc;
12568
0
    } else {
12569
0
        ret = NULL;
12570
0
        xmlFreeDoc(ctxt->myDoc);
12571
0
    }
12572
12573
0
    xmlFreeParserCtxt(ctxt);
12574
12575
0
    return(ret);
12576
0
}
12577
12578
/**
12579
 * xmlParseEntity:
12580
 * @filename:  the filename
12581
 *
12582
 * parse an XML external entity out of context and build a tree.
12583
 *
12584
 * [78] extParsedEnt ::= TextDecl? content
12585
 *
12586
 * This correspond to a "Well Balanced" chunk
12587
 *
12588
 * Returns the resulting document tree
12589
 */
12590
12591
xmlDocPtr
12592
0
xmlParseEntity(const char *filename) {
12593
0
    return(xmlSAXParseEntity(NULL, filename));
12594
0
}
12595
#endif /* LIBXML_SAX1_ENABLED */
12596
12597
/**
12598
 * xmlCreateEntityParserCtxt:
12599
 * @URL:  the entity URL
12600
 * @ID:  the entity PUBLIC ID
12601
 * @base:  a possible base for the target URI
12602
 *
12603
 * DEPRECATED: Don't use.
12604
 *
12605
 * Create a parser context for an external entity
12606
 * Automatic support for ZLIB/Compress compressed document is provided
12607
 * by default if found at compile-time.
12608
 *
12609
 * Returns the new parser context or NULL
12610
 */
12611
xmlParserCtxtPtr
12612
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12613
0
                    const xmlChar *base) {
12614
0
    xmlParserCtxtPtr ctxt;
12615
0
    xmlParserInputPtr input;
12616
0
    xmlChar *uri = NULL;
12617
12618
0
    ctxt = xmlNewParserCtxt();
12619
0
    if (ctxt == NULL)
12620
0
  return(NULL);
12621
12622
0
    if (base != NULL) {
12623
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12624
0
            goto error;
12625
0
        if (uri != NULL)
12626
0
            URL = uri;
12627
0
    }
12628
12629
0
    input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12630
0
    if (input == NULL)
12631
0
        goto error;
12632
12633
0
    if (inputPush(ctxt, input) < 0)
12634
0
        goto error;
12635
12636
0
    xmlFree(uri);
12637
0
    return(ctxt);
12638
12639
0
error:
12640
0
    xmlFree(uri);
12641
0
    xmlFreeParserCtxt(ctxt);
12642
0
    return(NULL);
12643
0
}
12644
12645
/************************************************************************
12646
 *                  *
12647
 *    Front ends when parsing from a file     *
12648
 *                  *
12649
 ************************************************************************/
12650
12651
/**
12652
 * xmlCreateURLParserCtxt:
12653
 * @filename:  the filename or URL
12654
 * @options:  a combination of xmlParserOption
12655
 *
12656
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12657
 *
12658
 * Create a parser context for a file or URL content.
12659
 * Automatic support for ZLIB/Compress compressed document is provided
12660
 * by default if found at compile-time and for file accesses
12661
 *
12662
 * Returns the new parser context or NULL
12663
 */
12664
xmlParserCtxtPtr
12665
xmlCreateURLParserCtxt(const char *filename, int options)
12666
0
{
12667
0
    xmlParserCtxtPtr ctxt;
12668
0
    xmlParserInputPtr input;
12669
12670
0
    ctxt = xmlNewParserCtxt();
12671
0
    if (ctxt == NULL)
12672
0
  return(NULL);
12673
12674
0
    xmlCtxtUseOptions(ctxt, options);
12675
0
    ctxt->linenumbers = 1;
12676
12677
0
    input = xmlLoadExternalEntity(filename, NULL, ctxt);
12678
0
    if (input == NULL) {
12679
0
  xmlFreeParserCtxt(ctxt);
12680
0
  return(NULL);
12681
0
    }
12682
0
    inputPush(ctxt, input);
12683
12684
0
    return(ctxt);
12685
0
}
12686
12687
/**
12688
 * xmlCreateFileParserCtxt:
12689
 * @filename:  the filename
12690
 *
12691
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12692
 *
12693
 * Create a parser context for a file content.
12694
 * Automatic support for ZLIB/Compress compressed document is provided
12695
 * by default if found at compile-time.
12696
 *
12697
 * Returns the new parser context or NULL
12698
 */
12699
xmlParserCtxtPtr
12700
xmlCreateFileParserCtxt(const char *filename)
12701
0
{
12702
0
    return(xmlCreateURLParserCtxt(filename, 0));
12703
0
}
12704
12705
#ifdef LIBXML_SAX1_ENABLED
12706
/**
12707
 * xmlSAXParseFileWithData:
12708
 * @sax:  the SAX handler block
12709
 * @filename:  the filename
12710
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12711
 *             documents
12712
 * @data:  the userdata
12713
 *
12714
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12715
 *
12716
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12717
 * compressed document is provided by default if found at compile-time.
12718
 * It use the given SAX function block to handle the parsing callback.
12719
 * If sax is NULL, fallback to the default DOM tree building routines.
12720
 *
12721
 * User data (void *) is stored within the parser context in the
12722
 * context's _private member, so it is available nearly everywhere in libxml
12723
 *
12724
 * Returns the resulting document tree
12725
 */
12726
12727
xmlDocPtr
12728
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12729
0
                        int recovery, void *data) {
12730
0
    xmlDocPtr ret;
12731
0
    xmlParserCtxtPtr ctxt;
12732
0
    xmlParserInputPtr input;
12733
12734
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12735
0
    if (ctxt == NULL)
12736
0
  return(NULL);
12737
12738
0
    if (data != NULL)
12739
0
  ctxt->_private = data;
12740
12741
0
    if (recovery) {
12742
0
        ctxt->options |= XML_PARSE_RECOVER;
12743
0
        ctxt->recovery = 1;
12744
0
    }
12745
12746
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12747
0
        input = xmlNewInputFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12748
0
    else
12749
0
        input = xmlNewInputURL(ctxt, filename, NULL, NULL, 0);
12750
12751
0
    ret = xmlCtxtParseDocument(ctxt, input);
12752
12753
0
    xmlFreeParserCtxt(ctxt);
12754
0
    return(ret);
12755
0
}
12756
12757
/**
12758
 * xmlSAXParseFile:
12759
 * @sax:  the SAX handler block
12760
 * @filename:  the filename
12761
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12762
 *             documents
12763
 *
12764
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12765
 *
12766
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12767
 * compressed document is provided by default if found at compile-time.
12768
 * It use the given SAX function block to handle the parsing callback.
12769
 * If sax is NULL, fallback to the default DOM tree building routines.
12770
 *
12771
 * Returns the resulting document tree
12772
 */
12773
12774
xmlDocPtr
12775
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12776
0
                          int recovery) {
12777
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12778
0
}
12779
12780
/**
12781
 * xmlRecoverDoc:
12782
 * @cur:  a pointer to an array of xmlChar
12783
 *
12784
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12785
 *
12786
 * parse an XML in-memory document and build a tree.
12787
 * In the case the document is not Well Formed, a attempt to build a
12788
 * tree is tried anyway
12789
 *
12790
 * Returns the resulting document tree or NULL in case of failure
12791
 */
12792
12793
xmlDocPtr
12794
0
xmlRecoverDoc(const xmlChar *cur) {
12795
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12796
0
}
12797
12798
/**
12799
 * xmlParseFile:
12800
 * @filename:  the filename
12801
 *
12802
 * DEPRECATED: Use xmlReadFile.
12803
 *
12804
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12805
 * compressed document is provided by default if found at compile-time.
12806
 *
12807
 * Returns the resulting document tree if the file was wellformed,
12808
 * NULL otherwise.
12809
 */
12810
12811
xmlDocPtr
12812
0
xmlParseFile(const char *filename) {
12813
0
    return(xmlSAXParseFile(NULL, filename, 0));
12814
0
}
12815
12816
/**
12817
 * xmlRecoverFile:
12818
 * @filename:  the filename
12819
 *
12820
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12821
 *
12822
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12823
 * compressed document is provided by default if found at compile-time.
12824
 * In the case the document is not Well Formed, it attempts to build
12825
 * a tree anyway
12826
 *
12827
 * Returns the resulting document tree or NULL in case of failure
12828
 */
12829
12830
xmlDocPtr
12831
0
xmlRecoverFile(const char *filename) {
12832
0
    return(xmlSAXParseFile(NULL, filename, 1));
12833
0
}
12834
12835
12836
/**
12837
 * xmlSetupParserForBuffer:
12838
 * @ctxt:  an XML parser context
12839
 * @buffer:  a xmlChar * buffer
12840
 * @filename:  a file name
12841
 *
12842
 * DEPRECATED: Don't use.
12843
 *
12844
 * Setup the parser context to parse a new buffer; Clears any prior
12845
 * contents from the parser context. The buffer parameter must not be
12846
 * NULL, but the filename parameter can be
12847
 */
12848
void
12849
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12850
                             const char* filename)
12851
0
{
12852
0
    xmlParserInputPtr input;
12853
12854
0
    if ((ctxt == NULL) || (buffer == NULL))
12855
0
        return;
12856
12857
0
    xmlClearParserCtxt(ctxt);
12858
12859
0
    input = xmlNewInputString(ctxt, filename, (const char *) buffer, NULL, 0);
12860
0
    if (input == NULL)
12861
0
        return;
12862
0
    inputPush(ctxt, input);
12863
0
}
12864
12865
/**
12866
 * xmlSAXUserParseFile:
12867
 * @sax:  a SAX handler
12868
 * @user_data:  The user data returned on SAX callbacks
12869
 * @filename:  a file name
12870
 *
12871
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12872
 *
12873
 * parse an XML file and call the given SAX handler routines.
12874
 * Automatic support for ZLIB/Compress compressed document is provided
12875
 *
12876
 * Returns 0 in case of success or a error number otherwise
12877
 */
12878
int
12879
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12880
0
                    const char *filename) {
12881
0
    int ret = 0;
12882
0
    xmlParserCtxtPtr ctxt;
12883
12884
0
    ctxt = xmlCreateFileParserCtxt(filename);
12885
0
    if (ctxt == NULL) return -1;
12886
0
    if (sax != NULL) {
12887
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12888
0
            *ctxt->sax = *sax;
12889
0
        } else {
12890
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12891
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12892
0
        }
12893
0
  ctxt->userData = user_data;
12894
0
    }
12895
12896
0
    xmlParseDocument(ctxt);
12897
12898
0
    if (ctxt->wellFormed)
12899
0
  ret = 0;
12900
0
    else {
12901
0
        if (ctxt->errNo != 0)
12902
0
      ret = ctxt->errNo;
12903
0
  else
12904
0
      ret = -1;
12905
0
    }
12906
0
    if (ctxt->myDoc != NULL) {
12907
0
        xmlFreeDoc(ctxt->myDoc);
12908
0
  ctxt->myDoc = NULL;
12909
0
    }
12910
0
    xmlFreeParserCtxt(ctxt);
12911
12912
0
    return ret;
12913
0
}
12914
#endif /* LIBXML_SAX1_ENABLED */
12915
12916
/************************************************************************
12917
 *                  *
12918
 *    Front ends when parsing from memory     *
12919
 *                  *
12920
 ************************************************************************/
12921
12922
/**
12923
 * xmlCreateMemoryParserCtxt:
12924
 * @buffer:  a pointer to a char array
12925
 * @size:  the size of the array
12926
 *
12927
 * Create a parser context for an XML in-memory document. The input buffer
12928
 * must not contain a terminating null byte.
12929
 *
12930
 * Returns the new parser context or NULL
12931
 */
12932
xmlParserCtxtPtr
12933
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12934
0
    xmlParserCtxtPtr ctxt;
12935
0
    xmlParserInputPtr input;
12936
12937
0
    if (size < 0)
12938
0
  return(NULL);
12939
12940
0
    ctxt = xmlNewParserCtxt();
12941
0
    if (ctxt == NULL)
12942
0
  return(NULL);
12943
12944
0
    input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL, 0);
12945
0
    if (input == NULL) {
12946
0
  xmlFreeParserCtxt(ctxt);
12947
0
  return(NULL);
12948
0
    }
12949
0
    inputPush(ctxt, input);
12950
12951
0
    return(ctxt);
12952
0
}
12953
12954
#ifdef LIBXML_SAX1_ENABLED
12955
/**
12956
 * xmlSAXParseMemoryWithData:
12957
 * @sax:  the SAX handler block
12958
 * @buffer:  an pointer to a char array
12959
 * @size:  the size of the array
12960
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12961
 *             documents
12962
 * @data:  the userdata
12963
 *
12964
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
12965
 *
12966
 * parse an XML in-memory block and use the given SAX function block
12967
 * to handle the parsing callback. If sax is NULL, fallback to the default
12968
 * DOM tree building routines.
12969
 *
12970
 * User data (void *) is stored within the parser context in the
12971
 * context's _private member, so it is available nearly everywhere in libxml
12972
 *
12973
 * Returns the resulting document tree
12974
 */
12975
12976
xmlDocPtr
12977
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12978
0
                          int size, int recovery, void *data) {
12979
0
    xmlDocPtr ret;
12980
0
    xmlParserCtxtPtr ctxt;
12981
0
    xmlParserInputPtr input;
12982
12983
0
    if (size < 0)
12984
0
        return(NULL);
12985
12986
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12987
0
    if (ctxt == NULL)
12988
0
        return(NULL);
12989
12990
0
    if (data != NULL)
12991
0
  ctxt->_private=data;
12992
12993
0
    if (recovery) {
12994
0
        ctxt->options |= XML_PARSE_RECOVER;
12995
0
        ctxt->recovery = 1;
12996
0
    }
12997
12998
0
    input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL,
12999
0
                              XML_INPUT_BUF_STATIC);
13000
13001
0
    ret = xmlCtxtParseDocument(ctxt, input);
13002
13003
0
    xmlFreeParserCtxt(ctxt);
13004
0
    return(ret);
13005
0
}
13006
13007
/**
13008
 * xmlSAXParseMemory:
13009
 * @sax:  the SAX handler block
13010
 * @buffer:  an pointer to a char array
13011
 * @size:  the size of the array
13012
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13013
 *             documents
13014
 *
13015
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13016
 *
13017
 * parse an XML in-memory block and use the given SAX function block
13018
 * to handle the parsing callback. If sax is NULL, fallback to the default
13019
 * DOM tree building routines.
13020
 *
13021
 * Returns the resulting document tree
13022
 */
13023
xmlDocPtr
13024
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13025
0
            int size, int recovery) {
13026
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13027
0
}
13028
13029
/**
13030
 * xmlParseMemory:
13031
 * @buffer:  an pointer to a char array
13032
 * @size:  the size of the array
13033
 *
13034
 * DEPRECATED: Use xmlReadMemory.
13035
 *
13036
 * parse an XML in-memory block and build a tree.
13037
 *
13038
 * Returns the resulting document tree
13039
 */
13040
13041
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13042
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13043
0
}
13044
13045
/**
13046
 * xmlRecoverMemory:
13047
 * @buffer:  an pointer to a char array
13048
 * @size:  the size of the array
13049
 *
13050
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13051
 *
13052
 * parse an XML in-memory block and build a tree.
13053
 * In the case the document is not Well Formed, an attempt to
13054
 * build a tree is tried anyway
13055
 *
13056
 * Returns the resulting document tree or NULL in case of error
13057
 */
13058
13059
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13060
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13061
0
}
13062
13063
/**
13064
 * xmlSAXUserParseMemory:
13065
 * @sax:  a SAX handler
13066
 * @user_data:  The user data returned on SAX callbacks
13067
 * @buffer:  an in-memory XML document input
13068
 * @size:  the length of the XML document in bytes
13069
 *
13070
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13071
 *
13072
 * parse an XML in-memory buffer and call the given SAX handler routines.
13073
 *
13074
 * Returns 0 in case of success or a error number otherwise
13075
 */
13076
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13077
0
        const char *buffer, int size) {
13078
0
    int ret = 0;
13079
0
    xmlParserCtxtPtr ctxt;
13080
13081
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13082
0
    if (ctxt == NULL) return -1;
13083
0
    if (sax != NULL) {
13084
0
        if (sax->initialized == XML_SAX2_MAGIC) {
13085
0
            *ctxt->sax = *sax;
13086
0
        } else {
13087
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13088
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13089
0
        }
13090
0
  ctxt->userData = user_data;
13091
0
    }
13092
13093
0
    xmlParseDocument(ctxt);
13094
13095
0
    if (ctxt->wellFormed)
13096
0
  ret = 0;
13097
0
    else {
13098
0
        if (ctxt->errNo != 0)
13099
0
      ret = ctxt->errNo;
13100
0
  else
13101
0
      ret = -1;
13102
0
    }
13103
0
    if (ctxt->myDoc != NULL) {
13104
0
        xmlFreeDoc(ctxt->myDoc);
13105
0
  ctxt->myDoc = NULL;
13106
0
    }
13107
0
    xmlFreeParserCtxt(ctxt);
13108
13109
0
    return ret;
13110
0
}
13111
#endif /* LIBXML_SAX1_ENABLED */
13112
13113
/**
13114
 * xmlCreateDocParserCtxt:
13115
 * @str:  a pointer to an array of xmlChar
13116
 *
13117
 * Creates a parser context for an XML in-memory document.
13118
 *
13119
 * Returns the new parser context or NULL
13120
 */
13121
xmlParserCtxtPtr
13122
0
xmlCreateDocParserCtxt(const xmlChar *str) {
13123
0
    xmlParserCtxtPtr ctxt;
13124
0
    xmlParserInputPtr input;
13125
13126
0
    ctxt = xmlNewParserCtxt();
13127
0
    if (ctxt == NULL)
13128
0
  return(NULL);
13129
13130
0
    input = xmlNewInputString(ctxt, NULL, (const char *) str, NULL, 0);
13131
0
    if (input == NULL) {
13132
0
  xmlFreeParserCtxt(ctxt);
13133
0
  return(NULL);
13134
0
    }
13135
0
    inputPush(ctxt, input);
13136
13137
0
    return(ctxt);
13138
0
}
13139
13140
#ifdef LIBXML_SAX1_ENABLED
13141
/**
13142
 * xmlSAXParseDoc:
13143
 * @sax:  the SAX handler block
13144
 * @cur:  a pointer to an array of xmlChar
13145
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13146
 *             documents
13147
 *
13148
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13149
 *
13150
 * parse an XML in-memory document and build a tree.
13151
 * It use the given SAX function block to handle the parsing callback.
13152
 * If sax is NULL, fallback to the default DOM tree building routines.
13153
 *
13154
 * Returns the resulting document tree
13155
 */
13156
13157
xmlDocPtr
13158
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13159
0
    xmlDocPtr ret;
13160
0
    xmlParserCtxtPtr ctxt;
13161
0
    xmlSAXHandlerPtr oldsax = NULL;
13162
13163
0
    if (cur == NULL) return(NULL);
13164
13165
13166
0
    ctxt = xmlCreateDocParserCtxt(cur);
13167
0
    if (ctxt == NULL) return(NULL);
13168
0
    if (sax != NULL) {
13169
0
        oldsax = ctxt->sax;
13170
0
        ctxt->sax = sax;
13171
0
        ctxt->userData = NULL;
13172
0
    }
13173
13174
0
    xmlParseDocument(ctxt);
13175
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13176
0
    else {
13177
0
       ret = NULL;
13178
0
       xmlFreeDoc(ctxt->myDoc);
13179
0
       ctxt->myDoc = NULL;
13180
0
    }
13181
0
    if (sax != NULL)
13182
0
  ctxt->sax = oldsax;
13183
0
    xmlFreeParserCtxt(ctxt);
13184
13185
0
    return(ret);
13186
0
}
13187
13188
/**
13189
 * xmlParseDoc:
13190
 * @cur:  a pointer to an array of xmlChar
13191
 *
13192
 * DEPRECATED: Use xmlReadDoc.
13193
 *
13194
 * parse an XML in-memory document and build a tree.
13195
 *
13196
 * Returns the resulting document tree
13197
 */
13198
13199
xmlDocPtr
13200
0
xmlParseDoc(const xmlChar *cur) {
13201
0
    return(xmlSAXParseDoc(NULL, cur, 0));
13202
0
}
13203
#endif /* LIBXML_SAX1_ENABLED */
13204
13205
/************************************************************************
13206
 *                  *
13207
 *  New set (2.6.0) of simpler and more flexible APIs   *
13208
 *                  *
13209
 ************************************************************************/
13210
13211
/**
13212
 * DICT_FREE:
13213
 * @str:  a string
13214
 *
13215
 * Free a string if it is not owned by the "dict" dictionary in the
13216
 * current scope
13217
 */
13218
#define DICT_FREE(str)            \
13219
0
  if ((str) && ((!dict) ||       \
13220
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
13221
0
      xmlFree((char *)(str));
13222
13223
/**
13224
 * xmlCtxtReset:
13225
 * @ctxt: an XML parser context
13226
 *
13227
 * Reset a parser context
13228
 */
13229
void
13230
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13231
0
{
13232
0
    xmlParserInputPtr input;
13233
0
    xmlDictPtr dict;
13234
13235
0
    if (ctxt == NULL)
13236
0
        return;
13237
13238
0
    dict = ctxt->dict;
13239
13240
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13241
0
        xmlFreeInputStream(input);
13242
0
    }
13243
0
    ctxt->inputNr = 0;
13244
0
    ctxt->input = NULL;
13245
13246
0
    ctxt->spaceNr = 0;
13247
0
    if (ctxt->spaceTab != NULL) {
13248
0
  ctxt->spaceTab[0] = -1;
13249
0
  ctxt->space = &ctxt->spaceTab[0];
13250
0
    } else {
13251
0
        ctxt->space = NULL;
13252
0
    }
13253
13254
13255
0
    ctxt->nodeNr = 0;
13256
0
    ctxt->node = NULL;
13257
13258
0
    ctxt->nameNr = 0;
13259
0
    ctxt->name = NULL;
13260
13261
0
    ctxt->nsNr = 0;
13262
0
    xmlParserNsReset(ctxt->nsdb);
13263
13264
0
    DICT_FREE(ctxt->version);
13265
0
    ctxt->version = NULL;
13266
0
    DICT_FREE(ctxt->encoding);
13267
0
    ctxt->encoding = NULL;
13268
0
    DICT_FREE(ctxt->extSubURI);
13269
0
    ctxt->extSubURI = NULL;
13270
0
    DICT_FREE(ctxt->extSubSystem);
13271
0
    ctxt->extSubSystem = NULL;
13272
0
    if (ctxt->myDoc != NULL)
13273
0
        xmlFreeDoc(ctxt->myDoc);
13274
0
    ctxt->myDoc = NULL;
13275
13276
0
    ctxt->standalone = -1;
13277
0
    ctxt->hasExternalSubset = 0;
13278
0
    ctxt->hasPErefs = 0;
13279
0
    ctxt->html = 0;
13280
0
    ctxt->instate = XML_PARSER_START;
13281
13282
0
    ctxt->wellFormed = 1;
13283
0
    ctxt->nsWellFormed = 1;
13284
0
    ctxt->disableSAX = 0;
13285
0
    ctxt->valid = 1;
13286
#if 0
13287
    ctxt->vctxt.userData = ctxt;
13288
    ctxt->vctxt.error = xmlParserValidityError;
13289
    ctxt->vctxt.warning = xmlParserValidityWarning;
13290
#endif
13291
0
    ctxt->record_info = 0;
13292
0
    ctxt->checkIndex = 0;
13293
0
    ctxt->endCheckState = 0;
13294
0
    ctxt->inSubset = 0;
13295
0
    ctxt->errNo = XML_ERR_OK;
13296
0
    ctxt->depth = 0;
13297
0
    ctxt->catalogs = NULL;
13298
0
    ctxt->sizeentities = 0;
13299
0
    ctxt->sizeentcopy = 0;
13300
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13301
13302
0
    if (ctxt->attsDefault != NULL) {
13303
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13304
0
        ctxt->attsDefault = NULL;
13305
0
    }
13306
0
    if (ctxt->attsSpecial != NULL) {
13307
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13308
0
        ctxt->attsSpecial = NULL;
13309
0
    }
13310
13311
0
#ifdef LIBXML_CATALOG_ENABLED
13312
0
    if (ctxt->catalogs != NULL)
13313
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13314
0
#endif
13315
0
    ctxt->nbErrors = 0;
13316
0
    ctxt->nbWarnings = 0;
13317
0
    if (ctxt->lastError.code != XML_ERR_OK)
13318
0
        xmlResetError(&ctxt->lastError);
13319
0
}
13320
13321
/**
13322
 * xmlCtxtResetPush:
13323
 * @ctxt: an XML parser context
13324
 * @chunk:  a pointer to an array of chars
13325
 * @size:  number of chars in the array
13326
 * @filename:  an optional file name or URI
13327
 * @encoding:  the document encoding, or NULL
13328
 *
13329
 * Reset a push parser context
13330
 *
13331
 * Returns 0 in case of success and 1 in case of error
13332
 */
13333
int
13334
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13335
                 int size, const char *filename, const char *encoding)
13336
0
{
13337
0
    xmlParserInputPtr input;
13338
13339
0
    if (ctxt == NULL)
13340
0
        return(1);
13341
13342
0
    xmlCtxtReset(ctxt);
13343
13344
0
    input = xmlNewInputPush(ctxt, filename, chunk, size, encoding);
13345
0
    if (input == NULL)
13346
0
        return(1);
13347
0
    inputPush(ctxt, input);
13348
13349
0
    return(0);
13350
0
}
13351
13352
static int
13353
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13354
0
{
13355
0
    int allMask;
13356
13357
0
    if (ctxt == NULL)
13358
0
        return(-1);
13359
13360
    /*
13361
     * XInclude options aren't handled by the parser.
13362
     *
13363
     * XML_PARSE_XINCLUDE
13364
     * XML_PARSE_NOXINCNODE
13365
     * XML_PARSE_NOBASEFIX
13366
     */
13367
0
    allMask = XML_PARSE_RECOVER |
13368
0
              XML_PARSE_NOENT |
13369
0
              XML_PARSE_DTDLOAD |
13370
0
              XML_PARSE_DTDATTR |
13371
0
              XML_PARSE_DTDVALID |
13372
0
              XML_PARSE_NOERROR |
13373
0
              XML_PARSE_NOWARNING |
13374
0
              XML_PARSE_PEDANTIC |
13375
0
              XML_PARSE_NOBLANKS |
13376
0
#ifdef LIBXML_SAX1_ENABLED
13377
0
              XML_PARSE_SAX1 |
13378
0
#endif
13379
0
              XML_PARSE_NONET |
13380
0
              XML_PARSE_NODICT |
13381
0
              XML_PARSE_NSCLEAN |
13382
0
              XML_PARSE_NOCDATA |
13383
0
              XML_PARSE_COMPACT |
13384
0
              XML_PARSE_OLD10 |
13385
0
              XML_PARSE_HUGE |
13386
0
              XML_PARSE_OLDSAX |
13387
0
              XML_PARSE_IGNORE_ENC |
13388
0
              XML_PARSE_BIG_LINES |
13389
0
              XML_PARSE_NO_XXE;
13390
13391
0
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13392
13393
    /*
13394
     * For some options, struct members are historically the source
13395
     * of truth. The values are initalized from global variables and
13396
     * old code could also modify them directly. Several older API
13397
     * functions that don't take an options argument rely on these
13398
     * deprecated mechanisms.
13399
     *
13400
     * Once public access to struct members and the globals are
13401
     * disabled, we can use the options bitmask as source of
13402
     * truth, making all these struct members obsolete.
13403
     *
13404
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13405
     * loading of the external subset.
13406
     */
13407
0
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13408
0
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13409
0
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13410
0
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13411
0
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13412
0
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13413
0
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13414
0
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13415
13416
    /*
13417
     * Changing SAX callbacks is a bad idea. This should be fixed.
13418
     */
13419
0
    if (options & XML_PARSE_NOBLANKS) {
13420
0
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13421
0
    }
13422
0
    if (options & XML_PARSE_NOCDATA) {
13423
0
        ctxt->sax->cdataBlock = NULL;
13424
0
    }
13425
0
    if (options & XML_PARSE_HUGE) {
13426
0
        if (ctxt->dict != NULL)
13427
0
            xmlDictSetLimit(ctxt->dict, 0);
13428
0
    }
13429
13430
0
    ctxt->linenumbers = 1;
13431
13432
0
    return(options & ~allMask);
13433
0
}
13434
13435
/**
13436
 * xmlCtxtSetOptions:
13437
 * @ctxt: an XML parser context
13438
 * @options:  a bitmask of xmlParserOption values
13439
 *
13440
 * Applies the options to the parser context. Unset options are
13441
 * cleared.
13442
 *
13443
 * Available since 2.13.0. With older versions, you can use
13444
 * xmlCtxtUseOptions.
13445
 *
13446
 * XML_PARSE_RECOVER
13447
 *
13448
 * Enable "recovery" mode which allows non-wellformed documents.
13449
 * How this mode behaves exactly is unspecified and may change
13450
 * without further notice. Use of this feature is DISCOURAGED.
13451
 *
13452
 * XML_PARSE_NOENT
13453
 *
13454
 * Despite the confusing name, this option enables substitution
13455
 * of entities. The resulting tree won't contain any entity
13456
 * reference nodes.
13457
 *
13458
 * This option also enables loading of external entities (both
13459
 * general and parameter entities) which is dangerous. If you
13460
 * process untrusted data, it's recommended to set the
13461
 * XML_PARSE_NO_XXE option to disable loading of external
13462
 * entities.
13463
 *
13464
 * XML_PARSE_DTDLOAD
13465
 *
13466
 * Enables loading of an external DTD and the loading and
13467
 * substitution of external parameter entities. Has no effect
13468
 * if XML_PARSE_NO_XXE is set.
13469
 *
13470
 * XML_PARSE_DTDATTR
13471
 *
13472
 * Adds default attributes from the DTD to the result document.
13473
 *
13474
 * Implies XML_PARSE_DTDLOAD, but loading of external content
13475
 * can be disabled with XML_PARSE_NO_XXE.
13476
 *
13477
 * XML_PARSE_DTDVALID
13478
 *
13479
 * This option enables DTD validation which requires to load
13480
 * external DTDs and external entities (both general and
13481
 * parameter entities) unless XML_PARSE_NO_XXE was set.
13482
 *
13483
 * XML_PARSE_NO_XXE
13484
 *
13485
 * Disables loading of external DTDs or entities.
13486
 *
13487
 * XML_PARSE_NOERROR
13488
 *
13489
 * Disable error and warning reports to the error handlers.
13490
 * Errors are still accessible with xmlCtxtGetLastError.
13491
 *
13492
 * XML_PARSE_NOWARNING
13493
 *
13494
 * Disable warning reports.
13495
 *
13496
 * XML_PARSE_PEDANTIC
13497
 *
13498
 * Enable some pedantic warnings.
13499
 *
13500
 * XML_PARSE_NOBLANKS
13501
 *
13502
 * Remove some text nodes containing only whitespace from the
13503
 * result document. Which nodes are removed depends on DTD
13504
 * element declarations or a conservative heuristic. The
13505
 * reindenting feature of the serialization code relies on this
13506
 * option to be set when parsing. Use of this option is
13507
 * DISCOURAGED.
13508
 *
13509
 * XML_PARSE_SAX1
13510
 *
13511
 * Always invoke the deprecated SAX1 startElement and endElement
13512
 * handlers. This option is DEPRECATED.
13513
 *
13514
 * XML_PARSE_NONET
13515
 *
13516
 * Disable network access with the builtin HTTP and FTP clients.
13517
 *
13518
 * XML_PARSE_NODICT
13519
 *
13520
 * Create a document without interned strings, making all
13521
 * strings separate memory allocations.
13522
 *
13523
 * XML_PARSE_NSCLEAN
13524
 *
13525
 * Remove redundant namespace declarations from the result
13526
 * document.
13527
 *
13528
 * XML_PARSE_NOCDATA
13529
 *
13530
 * Output normal text nodes instead of CDATA nodes.
13531
 *
13532
 * XML_PARSE_COMPACT
13533
 *
13534
 * Store small strings directly in the node struct to save
13535
 * memory.
13536
 *
13537
 * XML_PARSE_OLD10
13538
 *
13539
 * Use old Name productions from before XML 1.0 Fifth Edition.
13540
 * This options is DEPRECATED.
13541
 *
13542
 * XML_PARSE_HUGE
13543
 *
13544
 * Relax some internal limits.
13545
 *
13546
 * Maximum size of text nodes, tags, comments, processing instructions,
13547
 * CDATA sections, entity values
13548
 *
13549
 * normal: 10M
13550
 * huge:    1B
13551
 *
13552
 * Maximum size of names, system literals, pubid literals
13553
 *
13554
 * normal: 50K
13555
 * huge:   10M
13556
 *
13557
 * Maximum nesting depth of elements
13558
 *
13559
 * normal:  256
13560
 * huge:   2048
13561
 *
13562
 * Maximum nesting depth of entities
13563
 *
13564
 * normal: 20
13565
 * huge:   40
13566
 *
13567
 * XML_PARSE_OLDSAX
13568
 *
13569
 * Enable an unspecified legacy mode for SAX parsers. This
13570
 * option is DEPRECATED.
13571
 *
13572
 * XML_PARSE_IGNORE_ENC
13573
 *
13574
 * Ignore the encoding in the XML declaration. This option is
13575
 * mostly unneeded these days. The only effect is to enforce
13576
 * UTF-8 decoding of ASCII-like data.
13577
 *
13578
 * XML_PARSE_BIG_LINES
13579
 *
13580
 * Enable reporting of line numbers larger than 65535.
13581
 *
13582
 * Returns 0 in case of success, the set of unknown or unimplemented options
13583
 *         in case of error.
13584
 */
13585
int
13586
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13587
0
{
13588
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13589
0
}
13590
13591
/**
13592
 * xmlCtxtUseOptions:
13593
 * @ctxt: an XML parser context
13594
 * @options:  a combination of xmlParserOption
13595
 *
13596
 * DEPRECATED: Use xmlCtxtSetOptions.
13597
 *
13598
 * Applies the options to the parser context. The following options
13599
 * are never cleared and can only be enabled:
13600
 *
13601
 * XML_PARSE_NOERROR
13602
 * XML_PARSE_NOWARNING
13603
 * XML_PARSE_NONET
13604
 * XML_PARSE_NSCLEAN
13605
 * XML_PARSE_NOCDATA
13606
 * XML_PARSE_COMPACT
13607
 * XML_PARSE_OLD10
13608
 * XML_PARSE_HUGE
13609
 * XML_PARSE_OLDSAX
13610
 * XML_PARSE_IGNORE_ENC
13611
 * XML_PARSE_BIG_LINES
13612
 *
13613
 * Returns 0 in case of success, the set of unknown or unimplemented options
13614
 *         in case of error.
13615
 */
13616
int
13617
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13618
0
{
13619
0
    int keepMask;
13620
13621
    /*
13622
     * For historic reasons, some options can only be enabled.
13623
     */
13624
0
    keepMask = XML_PARSE_NOERROR |
13625
0
               XML_PARSE_NOWARNING |
13626
0
               XML_PARSE_NONET |
13627
0
               XML_PARSE_NSCLEAN |
13628
0
               XML_PARSE_NOCDATA |
13629
0
               XML_PARSE_COMPACT |
13630
0
               XML_PARSE_OLD10 |
13631
0
               XML_PARSE_HUGE |
13632
0
               XML_PARSE_OLDSAX |
13633
0
               XML_PARSE_IGNORE_ENC |
13634
0
               XML_PARSE_BIG_LINES;
13635
13636
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13637
0
}
13638
13639
/**
13640
 * xmlCtxtSetMaxAmplification:
13641
 * @ctxt: an XML parser context
13642
 * @maxAmpl:  maximum amplification factor
13643
 *
13644
 * To protect against exponential entity expansion ("billion laughs"), the
13645
 * size of serialized output is (roughly) limited to the input size
13646
 * multiplied by this factor. The default value is 5.
13647
 *
13648
 * When working with documents making heavy use of entity expansion, it can
13649
 * be necessary to increase the value. For security reasons, this should only
13650
 * be considered when processing trusted input.
13651
 */
13652
void
13653
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13654
0
{
13655
0
    ctxt->maxAmpl = maxAmpl;
13656
0
}
13657
13658
/**
13659
 * xmlCtxtParseDocument:
13660
 * @ctxt:  an XML parser context
13661
 * @input:  parser input
13662
 *
13663
 * Parse an XML document and return the resulting document tree.
13664
 * Takes ownership of the input object.
13665
 *
13666
 * Available since 2.13.0.
13667
 *
13668
 * Returns the resulting document tree or NULL
13669
 */
13670
xmlDocPtr
13671
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13672
0
{
13673
0
    xmlDocPtr ret = NULL;
13674
13675
0
    if ((ctxt == NULL) || (input == NULL))
13676
0
        return(NULL);
13677
13678
    /* assert(ctxt->inputNr == 0); */
13679
0
    while (ctxt->inputNr > 0)
13680
0
        xmlFreeInputStream(inputPop(ctxt));
13681
13682
0
    if (inputPush(ctxt, input) < 0) {
13683
0
        xmlFreeInputStream(input);
13684
0
        return(NULL);
13685
0
    }
13686
13687
0
    xmlParseDocument(ctxt);
13688
13689
0
    if ((ctxt->wellFormed) ||
13690
0
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13691
0
        ret = ctxt->myDoc;
13692
0
    } else {
13693
0
        if (ctxt->errNo == XML_ERR_OK)
13694
0
            xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13695
13696
0
        ret = NULL;
13697
0
  xmlFreeDoc(ctxt->myDoc);
13698
0
    }
13699
0
    ctxt->myDoc = NULL;
13700
13701
    /* assert(ctxt->inputNr == 1); */
13702
0
    while (ctxt->inputNr > 0)
13703
0
        xmlFreeInputStream(inputPop(ctxt));
13704
13705
0
    return(ret);
13706
0
}
13707
13708
/**
13709
 * xmlReadDoc:
13710
 * @cur:  a pointer to a zero terminated string
13711
 * @URL:  base URL (optional)
13712
 * @encoding:  the document encoding (optional)
13713
 * @options:  a combination of xmlParserOption
13714
 *
13715
 * Convenience function to parse an XML document from a
13716
 * zero-terminated string.
13717
 *
13718
 * See xmlCtxtReadDoc for details.
13719
 *
13720
 * Returns the resulting document tree
13721
 */
13722
xmlDocPtr
13723
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13724
           int options)
13725
0
{
13726
0
    xmlParserCtxtPtr ctxt;
13727
0
    xmlParserInputPtr input;
13728
0
    xmlDocPtr doc;
13729
13730
0
    ctxt = xmlNewParserCtxt();
13731
0
    if (ctxt == NULL)
13732
0
        return(NULL);
13733
13734
0
    xmlCtxtUseOptions(ctxt, options);
13735
13736
0
    input = xmlNewInputString(ctxt, URL, (const char *) cur, encoding,
13737
0
                              XML_INPUT_BUF_STATIC);
13738
13739
0
    doc = xmlCtxtParseDocument(ctxt, input);
13740
13741
0
    xmlFreeParserCtxt(ctxt);
13742
0
    return(doc);
13743
0
}
13744
13745
/**
13746
 * xmlReadFile:
13747
 * @filename:  a file or URL
13748
 * @encoding:  the document encoding (optional)
13749
 * @options:  a combination of xmlParserOption
13750
 *
13751
 * Convenience function to parse an XML file from the filesystem,
13752
 * the network or a global user-define resource loader.
13753
 *
13754
 * See xmlCtxtReadFile for details.
13755
 *
13756
 * Returns the resulting document tree
13757
 */
13758
xmlDocPtr
13759
xmlReadFile(const char *filename, const char *encoding, int options)
13760
0
{
13761
0
    xmlParserCtxtPtr ctxt;
13762
0
    xmlParserInputPtr input;
13763
0
    xmlDocPtr doc;
13764
13765
0
    ctxt = xmlNewParserCtxt();
13766
0
    if (ctxt == NULL)
13767
0
        return(NULL);
13768
13769
0
    xmlCtxtUseOptions(ctxt, options);
13770
13771
    /*
13772
     * Backward compatibility for users of command line utilities like
13773
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13774
     * should be removed at some point.
13775
     */
13776
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13777
0
        input = xmlNewInputFd(ctxt, filename, STDIN_FILENO, encoding, 0);
13778
0
    else
13779
0
        input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13780
13781
0
    doc = xmlCtxtParseDocument(ctxt, input);
13782
13783
0
    xmlFreeParserCtxt(ctxt);
13784
0
    return(doc);
13785
0
}
13786
13787
/**
13788
 * xmlReadMemory:
13789
 * @buffer:  a pointer to a char array
13790
 * @size:  the size of the array
13791
 * @url:  base URL (optional)
13792
 * @encoding:  the document encoding (optional)
13793
 * @options:  a combination of xmlParserOption
13794
 *
13795
 * Parse an XML in-memory document and build a tree. The input buffer must
13796
 * not contain a terminating null byte.
13797
 *
13798
 * See xmlCtxtReadMemory for details.
13799
 *
13800
 * Returns the resulting document tree
13801
 */
13802
xmlDocPtr
13803
xmlReadMemory(const char *buffer, int size, const char *url,
13804
              const char *encoding, int options)
13805
0
{
13806
0
    xmlParserCtxtPtr ctxt;
13807
0
    xmlParserInputPtr input;
13808
0
    xmlDocPtr doc;
13809
13810
0
    if (size < 0)
13811
0
  return(NULL);
13812
13813
0
    ctxt = xmlNewParserCtxt();
13814
0
    if (ctxt == NULL)
13815
0
        return(NULL);
13816
13817
0
    xmlCtxtUseOptions(ctxt, options);
13818
13819
0
    input = xmlNewInputMemory(ctxt, url, buffer, size, encoding,
13820
0
                              XML_INPUT_BUF_STATIC);
13821
13822
0
    doc = xmlCtxtParseDocument(ctxt, input);
13823
13824
0
    xmlFreeParserCtxt(ctxt);
13825
0
    return(doc);
13826
0
}
13827
13828
/**
13829
 * xmlReadFd:
13830
 * @fd:  an open file descriptor
13831
 * @URL:  base URL (optional)
13832
 * @encoding:  the document encoding (optional)
13833
 * @options:  a combination of xmlParserOption
13834
 *
13835
 * Parse an XML from a file descriptor and build a tree.
13836
 *
13837
 * See xmlCtxtReadFd for details.
13838
 *
13839
 * NOTE that the file descriptor will not be closed when the
13840
 * context is freed or reset.
13841
 *
13842
 * Returns the resulting document tree
13843
 */
13844
xmlDocPtr
13845
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13846
0
{
13847
0
    xmlParserCtxtPtr ctxt;
13848
0
    xmlParserInputPtr input;
13849
0
    xmlDocPtr doc;
13850
13851
0
    ctxt = xmlNewParserCtxt();
13852
0
    if (ctxt == NULL)
13853
0
        return(NULL);
13854
13855
0
    xmlCtxtUseOptions(ctxt, options);
13856
13857
0
    input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
13858
13859
0
    doc = xmlCtxtParseDocument(ctxt, input);
13860
13861
0
    xmlFreeParserCtxt(ctxt);
13862
0
    return(doc);
13863
0
}
13864
13865
/**
13866
 * xmlReadIO:
13867
 * @ioread:  an I/O read function
13868
 * @ioclose:  an I/O close function (optional)
13869
 * @ioctx:  an I/O handler
13870
 * @URL:  base URL (optional)
13871
 * @encoding:  the document encoding (optional)
13872
 * @options:  a combination of xmlParserOption
13873
 *
13874
 * Parse an XML document from I/O functions and context and build a tree.
13875
 *
13876
 * See xmlCtxtReadIO for details.
13877
 *
13878
 * Returns the resulting document tree
13879
 */
13880
xmlDocPtr
13881
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13882
          void *ioctx, const char *URL, const char *encoding, int options)
13883
0
{
13884
0
    xmlParserCtxtPtr ctxt;
13885
0
    xmlParserInputPtr input;
13886
0
    xmlDocPtr doc;
13887
13888
0
    ctxt = xmlNewParserCtxt();
13889
0
    if (ctxt == NULL)
13890
0
        return(NULL);
13891
13892
0
    xmlCtxtUseOptions(ctxt, options);
13893
13894
0
    input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
13895
13896
0
    doc = xmlCtxtParseDocument(ctxt, input);
13897
13898
0
    xmlFreeParserCtxt(ctxt);
13899
0
    return(doc);
13900
0
}
13901
13902
/**
13903
 * xmlCtxtReadDoc:
13904
 * @ctxt:  an XML parser context
13905
 * @str:  a pointer to a zero terminated string
13906
 * @URL:  base URL (optional)
13907
 * @encoding:  the document encoding (optional)
13908
 * @options:  a combination of xmlParserOption
13909
 *
13910
 * Parse an XML in-memory document and build a tree.
13911
 *
13912
 * @URL is used as base to resolve external entities and for error
13913
 * reporting.
13914
 *
13915
 * See xmlCtxtUseOptions for details.
13916
 *
13917
 * Returns the resulting document tree
13918
 */
13919
xmlDocPtr
13920
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13921
               const char *URL, const char *encoding, int options)
13922
0
{
13923
0
    xmlParserInputPtr input;
13924
13925
0
    if (ctxt == NULL)
13926
0
        return(NULL);
13927
13928
0
    xmlCtxtReset(ctxt);
13929
0
    xmlCtxtUseOptions(ctxt, options);
13930
13931
0
    input = xmlNewInputString(ctxt, URL, (const char *) str, encoding,
13932
0
                              XML_INPUT_BUF_STATIC);
13933
13934
0
    return(xmlCtxtParseDocument(ctxt, input));
13935
0
}
13936
13937
/**
13938
 * xmlCtxtReadFile:
13939
 * @ctxt:  an XML parser context
13940
 * @filename:  a file or URL
13941
 * @encoding:  the document encoding (optional)
13942
 * @options:  a combination of xmlParserOption
13943
 *
13944
 * Parse an XML file from the filesystem, the network or a user-defined
13945
 * resource loader.
13946
 *
13947
 * Returns the resulting document tree
13948
 */
13949
xmlDocPtr
13950
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13951
                const char *encoding, int options)
13952
0
{
13953
0
    xmlParserInputPtr input;
13954
13955
0
    if (ctxt == NULL)
13956
0
        return(NULL);
13957
13958
0
    xmlCtxtReset(ctxt);
13959
0
    xmlCtxtUseOptions(ctxt, options);
13960
13961
0
    input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13962
13963
0
    return(xmlCtxtParseDocument(ctxt, input));
13964
0
}
13965
13966
/**
13967
 * xmlCtxtReadMemory:
13968
 * @ctxt:  an XML parser context
13969
 * @buffer:  a pointer to a char array
13970
 * @size:  the size of the array
13971
 * @URL:  base URL (optional)
13972
 * @encoding:  the document encoding (optional)
13973
 * @options:  a combination of xmlParserOption
13974
 *
13975
 * Parse an XML in-memory document and build a tree. The input buffer must
13976
 * not contain a terminating null byte.
13977
 *
13978
 * @URL is used as base to resolve external entities and for error
13979
 * reporting.
13980
 *
13981
 * See xmlCtxtUseOptions for details.
13982
 *
13983
 * Returns the resulting document tree
13984
 */
13985
xmlDocPtr
13986
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13987
                  const char *URL, const char *encoding, int options)
13988
0
{
13989
0
    xmlParserInputPtr input;
13990
13991
0
    if ((ctxt == NULL) || (size < 0))
13992
0
        return(NULL);
13993
13994
0
    xmlCtxtReset(ctxt);
13995
0
    xmlCtxtUseOptions(ctxt, options);
13996
13997
0
    input = xmlNewInputMemory(ctxt, URL, buffer, size, encoding,
13998
0
                              XML_INPUT_BUF_STATIC);
13999
14000
0
    return(xmlCtxtParseDocument(ctxt, input));
14001
0
}
14002
14003
/**
14004
 * xmlCtxtReadFd:
14005
 * @ctxt:  an XML parser context
14006
 * @fd:  an open file descriptor
14007
 * @URL:  base URL (optional)
14008
 * @encoding:  the document encoding (optional)
14009
 * @options:  a combination of xmlParserOption
14010
 *
14011
 * Parse an XML document from a file descriptor and build a tree.
14012
 *
14013
 * NOTE that the file descriptor will not be closed when the
14014
 * context is freed or reset.
14015
 *
14016
 * @URL is used as base to resolve external entities and for error
14017
 * reporting.
14018
 *
14019
 * See xmlCtxtUseOptions for details.
14020
 *
14021
 * Returns the resulting document tree
14022
 */
14023
xmlDocPtr
14024
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14025
              const char *URL, const char *encoding, int options)
14026
0
{
14027
0
    xmlParserInputPtr input;
14028
14029
0
    if (ctxt == NULL)
14030
0
        return(NULL);
14031
14032
0
    xmlCtxtReset(ctxt);
14033
0
    xmlCtxtUseOptions(ctxt, options);
14034
14035
0
    input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
14036
14037
0
    return(xmlCtxtParseDocument(ctxt, input));
14038
0
}
14039
14040
/**
14041
 * xmlCtxtReadIO:
14042
 * @ctxt:  an XML parser context
14043
 * @ioread:  an I/O read function
14044
 * @ioclose:  an I/O close function
14045
 * @ioctx:  an I/O handler
14046
 * @URL:  the base URL to use for the document
14047
 * @encoding:  the document encoding, or NULL
14048
 * @options:  a combination of xmlParserOption
14049
 *
14050
 * parse an XML document from I/O functions and source and build a tree.
14051
 * This reuses the existing @ctxt parser context
14052
 *
14053
 * @URL is used as base to resolve external entities and for error
14054
 * reporting.
14055
 *
14056
 * See xmlCtxtUseOptions for details.
14057
 *
14058
 * Returns the resulting document tree
14059
 */
14060
xmlDocPtr
14061
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14062
              xmlInputCloseCallback ioclose, void *ioctx,
14063
        const char *URL,
14064
              const char *encoding, int options)
14065
0
{
14066
0
    xmlParserInputPtr input;
14067
14068
0
    if (ctxt == NULL)
14069
0
        return(NULL);
14070
14071
0
    xmlCtxtReset(ctxt);
14072
0
    xmlCtxtUseOptions(ctxt, options);
14073
14074
0
    input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
14075
14076
0
    return(xmlCtxtParseDocument(ctxt, input));
14077
0
}
14078