Coverage Report

Created: 2026-03-12 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2-2.9.7/parser.c
Line
Count
Source
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32) && !defined (__CYGWIN__)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <libxml/xmlmemory.h>
53
#include <libxml/threads.h>
54
#include <libxml/globals.h>
55
#include <libxml/tree.h>
56
#include <libxml/parser.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#ifdef LIBXML_CATALOG_ENABLED
65
#include <libxml/catalog.h>
66
#endif
67
#ifdef LIBXML_SCHEMAS_ENABLED
68
#include <libxml/xmlschemastypes.h>
69
#include <libxml/relaxng.h>
70
#endif
71
#ifdef HAVE_CTYPE_H
72
#include <ctype.h>
73
#endif
74
#ifdef HAVE_STDLIB_H
75
#include <stdlib.h>
76
#endif
77
#ifdef HAVE_SYS_STAT_H
78
#include <sys/stat.h>
79
#endif
80
#ifdef HAVE_FCNTL_H
81
#include <fcntl.h>
82
#endif
83
#ifdef HAVE_UNISTD_H
84
#include <unistd.h>
85
#endif
86
#ifdef HAVE_ZLIB_H
87
#include <zlib.h>
88
#endif
89
#ifdef HAVE_LZMA_H
90
#include <lzma.h>
91
#endif
92
93
#include "buf.h"
94
#include "enc.h"
95
96
static void
97
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
98
99
static xmlParserCtxtPtr
100
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
101
                    const xmlChar *base, xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
/************************************************************************
106
 *                  *
107
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
108
 *                  *
109
 ************************************************************************/
110
111
53.3k
#define XML_PARSER_BIG_ENTITY 1000
112
#define XML_PARSER_LOT_ENTITY 5000
113
114
/*
115
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
116
 *    replacement over the size in byte of the input indicates that you have
117
 *    and eponential behaviour. A value of 10 correspond to at least 3 entity
118
 *    replacement per byte of input.
119
 */
120
676k
#define XML_PARSER_NON_LINEAR 10
121
122
/*
123
 * xmlParserEntityCheck
124
 *
125
 * Function to check non-linear entity expansion behaviour
126
 * This is here to detect and stop exponential linear entity expansion
127
 * This is not a limitation of the parser but a safety
128
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
129
 * parser option.
130
 */
131
static int
132
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
133
                     xmlEntityPtr ent, size_t replacement)
134
2.14M
{
135
2.14M
    size_t consumed = 0;
136
137
2.14M
    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
138
0
        return (0);
139
2.14M
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
140
519k
        return (1);
141
142
    /*
143
     * This may look absurd but is needed to detect
144
     * entities problems
145
     */
146
1.62M
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
147
663k
  (ent->content != NULL) && (ent->checked == 0) &&
148
18.2k
  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
149
18.2k
  unsigned long oldnbent = ctxt->nbentities;
150
18.2k
  xmlChar *rep;
151
152
18.2k
  ent->checked = 1;
153
154
18.2k
        ++ctxt->depth;
155
18.2k
  rep = xmlStringDecodeEntities(ctxt, ent->content,
156
18.2k
          XML_SUBSTITUTE_REF, 0, 0, 0);
157
18.2k
        --ctxt->depth;
158
18.2k
  if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
159
9.43k
      ent->content[0] = 0;
160
9.43k
  }
161
162
18.2k
  ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
163
18.2k
  if (rep != NULL) {
164
15.2k
      if (xmlStrchr(rep, '<'))
165
3.76k
    ent->checked |= 1;
166
15.2k
      xmlFree(rep);
167
15.2k
      rep = NULL;
168
15.2k
  }
169
18.2k
    }
170
1.62M
    if (replacement != 0) {
171
0
  if (replacement < XML_MAX_TEXT_LENGTH)
172
0
      return(0);
173
174
        /*
175
   * If the volume of entity copy reaches 10 times the
176
   * amount of parsed data and over the large text threshold
177
   * then that's very likely to be an abuse.
178
   */
179
0
        if (ctxt->input != NULL) {
180
0
      consumed = ctxt->input->consumed +
181
0
                 (ctxt->input->cur - ctxt->input->base);
182
0
  }
183
0
        consumed += ctxt->sizeentities;
184
185
0
        if (replacement < XML_PARSER_NON_LINEAR * consumed)
186
0
      return(0);
187
1.62M
    } else if (size != 0) {
188
        /*
189
         * Do the check based on the replacement size of the entity
190
         */
191
53.3k
        if (size < XML_PARSER_BIG_ENTITY)
192
45.8k
      return(0);
193
194
        /*
195
         * A limit on the amount of text data reasonably used
196
         */
197
7.56k
        if (ctxt->input != NULL) {
198
7.56k
            consumed = ctxt->input->consumed +
199
7.56k
                (ctxt->input->cur - ctxt->input->base);
200
7.56k
        }
201
7.56k
        consumed += ctxt->sizeentities;
202
203
7.56k
        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
204
7.54k
      (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
205
6.88k
            return (0);
206
1.57M
    } else if (ent != NULL) {
207
        /*
208
         * use the number of parsed entities in the replacement
209
         */
210
661k
        size = ent->checked / 2;
211
212
        /*
213
         * The amount of data parsed counting entities size only once
214
         */
215
661k
        if (ctxt->input != NULL) {
216
661k
            consumed = ctxt->input->consumed +
217
661k
                (ctxt->input->cur - ctxt->input->base);
218
661k
        }
219
661k
        consumed += ctxt->sizeentities;
220
221
        /*
222
         * Check the density of entities for the amount of data
223
   * knowing an entity reference will take at least 3 bytes
224
         */
225
661k
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
226
601k
            return (0);
227
912k
    } else {
228
        /*
229
         * strange we got no data for checking
230
         */
231
912k
  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
232
352k
       (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
233
862k
      (ctxt->nbentities <= 10000))
234
635k
      return (0);
235
912k
    }
236
338k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
237
338k
    return (1);
238
1.62M
}
239
240
/**
241
 * xmlParserMaxDepth:
242
 *
243
 * arbitrary depth limit for the XML documents that we allow to
244
 * process. This is not a limitation of the parser but a safety
245
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
246
 * parser option.
247
 */
248
unsigned int xmlParserMaxDepth = 256;
249
250
251
252
#define SAX2 1
253
979M
#define XML_PARSER_BIG_BUFFER_SIZE 300
254
2.43G
#define XML_PARSER_BUFFER_SIZE 100
255
148k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
256
257
/**
258
 * XML_PARSER_CHUNK_SIZE
259
 *
260
 * When calling GROW that's the minimal amount of data
261
 * the parser expected to have received. It is not a hard
262
 * limit but an optimization when reading strings like Names
263
 * It is not strictly needed as long as inputs available characters
264
 * are followed by 0, which should be provided by the I/O level
265
 */
266
309M
#define XML_PARSER_CHUNK_SIZE 100
267
268
/*
269
 * List of XML prefixed PI allowed by W3C specs
270
 */
271
272
static const char *xmlW3CPIs[] = {
273
    "xml-stylesheet",
274
    "xml-model",
275
    NULL
276
};
277
278
279
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
280
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
281
                                              const xmlChar **str);
282
283
static xmlParserErrors
284
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
285
                xmlSAXHandlerPtr sax,
286
          void *user_data, int depth, const xmlChar *URL,
287
          const xmlChar *ID, xmlNodePtr *list);
288
289
static int
290
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
291
                          const char *encoding);
292
#ifdef LIBXML_LEGACY_ENABLED
293
static void
294
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
295
                      xmlNodePtr lastNode);
296
#endif /* LIBXML_LEGACY_ENABLED */
297
298
static xmlParserErrors
299
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
300
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
301
302
static int
303
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
304
305
/************************************************************************
306
 *                  *
307
 *    Some factorized error routines        *
308
 *                  *
309
 ************************************************************************/
310
311
/**
312
 * xmlErrAttributeDup:
313
 * @ctxt:  an XML parser context
314
 * @prefix:  the attribute prefix
315
 * @localname:  the attribute localname
316
 *
317
 * Handle a redefinition of attribute error
318
 */
319
static void
320
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
321
                   const xmlChar * localname)
322
106k
{
323
106k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
324
21.8k
        (ctxt->instate == XML_PARSER_EOF))
325
0
  return;
326
106k
    if (ctxt != NULL)
327
106k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
328
329
106k
    if (prefix == NULL)
330
81.4k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
331
81.4k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
332
81.4k
                        (const char *) localname, NULL, NULL, 0, 0,
333
81.4k
                        "Attribute %s redefined\n", localname);
334
25.3k
    else
335
25.3k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
336
25.3k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
337
25.3k
                        (const char *) prefix, (const char *) localname,
338
25.3k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
339
25.3k
                        localname);
340
106k
    if (ctxt != NULL) {
341
106k
  ctxt->wellFormed = 0;
342
106k
  if (ctxt->recovery == 0)
343
21.9k
      ctxt->disableSAX = 1;
344
106k
    }
345
106k
}
346
347
/**
348
 * xmlFatalErr:
349
 * @ctxt:  an XML parser context
350
 * @error:  the error number
351
 * @extra:  extra information string
352
 *
353
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
354
 */
355
static void
356
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
357
5.05M
{
358
5.05M
    const char *errmsg;
359
360
5.05M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
361
683k
        (ctxt->instate == XML_PARSER_EOF))
362
232k
  return;
363
4.82M
    switch (error) {
364
130k
        case XML_ERR_INVALID_HEX_CHARREF:
365
130k
            errmsg = "CharRef: invalid hexadecimal value";
366
130k
            break;
367
234k
        case XML_ERR_INVALID_DEC_CHARREF:
368
234k
            errmsg = "CharRef: invalid decimal value";
369
234k
            break;
370
0
        case XML_ERR_INVALID_CHARREF:
371
0
            errmsg = "CharRef: invalid value";
372
0
            break;
373
174k
        case XML_ERR_INTERNAL_ERROR:
374
174k
            errmsg = "internal error";
375
174k
            break;
376
0
        case XML_ERR_PEREF_AT_EOF:
377
0
            errmsg = "PEReference at end of document";
378
0
            break;
379
0
        case XML_ERR_PEREF_IN_PROLOG:
380
0
            errmsg = "PEReference in prolog";
381
0
            break;
382
0
        case XML_ERR_PEREF_IN_EPILOG:
383
0
            errmsg = "PEReference in epilog";
384
0
            break;
385
0
        case XML_ERR_PEREF_NO_NAME:
386
0
            errmsg = "PEReference: no name";
387
0
            break;
388
184k
        case XML_ERR_PEREF_SEMICOL_MISSING:
389
184k
            errmsg = "PEReference: expecting ';'";
390
184k
            break;
391
359k
        case XML_ERR_ENTITY_LOOP:
392
359k
            errmsg = "Detected an entity reference loop";
393
359k
            break;
394
0
        case XML_ERR_ENTITY_NOT_STARTED:
395
0
            errmsg = "EntityValue: \" or ' expected";
396
0
            break;
397
2.06k
        case XML_ERR_ENTITY_PE_INTERNAL:
398
2.06k
            errmsg = "PEReferences forbidden in internal subset";
399
2.06k
            break;
400
470
        case XML_ERR_ENTITY_NOT_FINISHED:
401
470
            errmsg = "EntityValue: \" or ' expected";
402
470
            break;
403
242k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
404
242k
            errmsg = "AttValue: \" or ' expected";
405
242k
            break;
406
609k
        case XML_ERR_LT_IN_ATTRIBUTE:
407
609k
            errmsg = "Unescaped '<' not allowed in attributes values";
408
609k
            break;
409
11.5k
        case XML_ERR_LITERAL_NOT_STARTED:
410
11.5k
            errmsg = "SystemLiteral \" or ' expected";
411
11.5k
            break;
412
16.3k
        case XML_ERR_LITERAL_NOT_FINISHED:
413
16.3k
            errmsg = "Unfinished System or Public ID \" or ' expected";
414
16.3k
            break;
415
54.0k
        case XML_ERR_MISPLACED_CDATA_END:
416
54.0k
            errmsg = "Sequence ']]>' not allowed in content";
417
54.0k
            break;
418
7.95k
        case XML_ERR_URI_REQUIRED:
419
7.95k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
420
7.95k
            break;
421
3.98k
        case XML_ERR_PUBID_REQUIRED:
422
3.98k
            errmsg = "PUBLIC, the Public Identifier is missing";
423
3.98k
            break;
424
164k
        case XML_ERR_HYPHEN_IN_COMMENT:
425
164k
            errmsg = "Comment must not contain '--' (double-hyphen)";
426
164k
            break;
427
248k
        case XML_ERR_PI_NOT_STARTED:
428
248k
            errmsg = "xmlParsePI : no target name";
429
248k
            break;
430
6.90k
        case XML_ERR_RESERVED_XML_NAME:
431
6.90k
            errmsg = "Invalid PI name";
432
6.90k
            break;
433
2.92k
        case XML_ERR_NOTATION_NOT_STARTED:
434
2.92k
            errmsg = "NOTATION: Name expected here";
435
2.92k
            break;
436
25.3k
        case XML_ERR_NOTATION_NOT_FINISHED:
437
25.3k
            errmsg = "'>' required to close NOTATION declaration";
438
25.3k
            break;
439
7.18k
        case XML_ERR_VALUE_REQUIRED:
440
7.18k
            errmsg = "Entity value required";
441
7.18k
            break;
442
6.30k
        case XML_ERR_URI_FRAGMENT:
443
6.30k
            errmsg = "Fragment not allowed";
444
6.30k
            break;
445
29.2k
        case XML_ERR_ATTLIST_NOT_STARTED:
446
29.2k
            errmsg = "'(' required to start ATTLIST enumeration";
447
29.2k
            break;
448
2.56k
        case XML_ERR_NMTOKEN_REQUIRED:
449
2.56k
            errmsg = "NmToken expected in ATTLIST enumeration";
450
2.56k
            break;
451
6.12k
        case XML_ERR_ATTLIST_NOT_FINISHED:
452
6.12k
            errmsg = "')' required to finish ATTLIST enumeration";
453
6.12k
            break;
454
11.2k
        case XML_ERR_MIXED_NOT_STARTED:
455
11.2k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
456
11.2k
            break;
457
0
        case XML_ERR_PCDATA_REQUIRED:
458
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
459
0
            break;
460
54.2k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
461
54.2k
            errmsg = "ContentDecl : Name or '(' expected";
462
54.2k
            break;
463
210k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
464
210k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
465
210k
            break;
466
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
467
0
            errmsg =
468
0
                "PEReference: forbidden within markup decl in internal subset";
469
0
            break;
470
1.04M
        case XML_ERR_GT_REQUIRED:
471
1.04M
            errmsg = "expected '>'";
472
1.04M
            break;
473
41
        case XML_ERR_CONDSEC_INVALID:
474
41
            errmsg = "XML conditional section '[' expected";
475
41
            break;
476
326
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
477
326
            errmsg = "Content error in the external subset";
478
326
            break;
479
127
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
480
127
            errmsg =
481
127
                "conditional section INCLUDE or IGNORE keyword expected";
482
127
            break;
483
8.13k
        case XML_ERR_CONDSEC_NOT_FINISHED:
484
8.13k
            errmsg = "XML conditional section not closed";
485
8.13k
            break;
486
0
        case XML_ERR_XMLDECL_NOT_STARTED:
487
0
            errmsg = "Text declaration '<?xml' required";
488
0
            break;
489
18.6k
        case XML_ERR_XMLDECL_NOT_FINISHED:
490
18.6k
            errmsg = "parsing XML declaration: '?>' expected";
491
18.6k
            break;
492
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
493
0
            errmsg = "external parsed entities cannot be standalone";
494
0
            break;
495
669k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
496
669k
            errmsg = "EntityRef: expecting ';'";
497
669k
            break;
498
20.1k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
499
20.1k
            errmsg = "DOCTYPE improperly terminated";
500
20.1k
            break;
501
0
        case XML_ERR_LTSLASH_REQUIRED:
502
0
            errmsg = "EndTag: '</' not found";
503
0
            break;
504
672
        case XML_ERR_EQUAL_REQUIRED:
505
672
            errmsg = "expected '='";
506
672
            break;
507
1.48k
        case XML_ERR_STRING_NOT_CLOSED:
508
1.48k
            errmsg = "String not closed expecting \" or '";
509
1.48k
            break;
510
452
        case XML_ERR_STRING_NOT_STARTED:
511
452
            errmsg = "String not started expecting ' or \"";
512
452
            break;
513
177
        case XML_ERR_ENCODING_NAME:
514
177
            errmsg = "Invalid XML encoding name";
515
177
            break;
516
118
        case XML_ERR_STANDALONE_VALUE:
517
118
            errmsg = "standalone accepts only 'yes' or 'no'";
518
118
            break;
519
24.4k
        case XML_ERR_DOCUMENT_EMPTY:
520
24.4k
            errmsg = "Document is empty";
521
24.4k
            break;
522
76.5k
        case XML_ERR_DOCUMENT_END:
523
76.5k
            errmsg = "Extra content at the end of the document";
524
76.5k
            break;
525
113k
        case XML_ERR_NOT_WELL_BALANCED:
526
113k
            errmsg = "chunk is not well balanced";
527
113k
            break;
528
0
        case XML_ERR_EXTRA_CONTENT:
529
0
            errmsg = "extra content at the end of well balanced chunk";
530
0
            break;
531
19.3k
        case XML_ERR_VERSION_MISSING:
532
19.3k
            errmsg = "Malformed declaration expecting version";
533
19.3k
            break;
534
11.0k
        case XML_ERR_NAME_TOO_LONG:
535
11.0k
            errmsg = "Name too long use XML_PARSE_HUGE option";
536
11.0k
            break;
537
#if 0
538
        case:
539
            errmsg = "";
540
            break;
541
#endif
542
1.11k
        default:
543
1.11k
            errmsg = "Unregistered error message";
544
4.82M
    }
545
4.82M
    if (ctxt != NULL)
546
4.82M
  ctxt->errNo = error;
547
4.82M
    if (info == NULL) {
548
4.63M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
549
4.63M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
550
4.63M
                        errmsg);
551
4.63M
    } else {
552
185k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
553
185k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
554
185k
                        errmsg, info);
555
185k
    }
556
4.82M
    if (ctxt != NULL) {
557
4.82M
  ctxt->wellFormed = 0;
558
4.82M
  if (ctxt->recovery == 0)
559
502k
      ctxt->disableSAX = 1;
560
4.82M
    }
561
4.82M
}
562
563
/**
564
 * xmlFatalErrMsg:
565
 * @ctxt:  an XML parser context
566
 * @error:  the error number
567
 * @msg:  the error message
568
 *
569
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
570
 */
571
static void LIBXML_ATTR_FORMAT(3,0)
572
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
573
               const char *msg)
574
9.55M
{
575
9.55M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
576
1.03M
        (ctxt->instate == XML_PARSER_EOF))
577
3
  return;
578
9.55M
    if (ctxt != NULL)
579
9.55M
  ctxt->errNo = error;
580
9.55M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581
9.55M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
582
9.55M
    if (ctxt != NULL) {
583
9.55M
  ctxt->wellFormed = 0;
584
9.55M
  if (ctxt->recovery == 0)
585
1.03M
      ctxt->disableSAX = 1;
586
9.55M
    }
587
9.55M
}
588
589
/**
590
 * xmlWarningMsg:
591
 * @ctxt:  an XML parser context
592
 * @error:  the error number
593
 * @msg:  the error message
594
 * @str1:  extra data
595
 * @str2:  extra data
596
 *
597
 * Handle a warning.
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
              const char *msg, const xmlChar *str1, const xmlChar *str2)
602
129k
{
603
129k
    xmlStructuredErrorFunc schannel = NULL;
604
605
129k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
606
8.24k
        (ctxt->instate == XML_PARSER_EOF))
607
0
  return;
608
129k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
609
129k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
610
129k
        schannel = ctxt->sax->serror;
611
129k
    if (ctxt != NULL) {
612
129k
        __xmlRaiseError(schannel,
613
129k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
614
129k
                    ctxt->userData,
615
129k
                    ctxt, NULL, XML_FROM_PARSER, error,
616
129k
                    XML_ERR_WARNING, NULL, 0,
617
129k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
618
129k
        msg, (const char *) str1, (const char *) str2);
619
129k
    } else {
620
0
        __xmlRaiseError(schannel, NULL, NULL,
621
0
                    ctxt, NULL, XML_FROM_PARSER, error,
622
0
                    XML_ERR_WARNING, NULL, 0,
623
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
624
0
        msg, (const char *) str1, (const char *) str2);
625
0
    }
626
129k
}
627
628
/**
629
 * xmlValidityError:
630
 * @ctxt:  an XML parser context
631
 * @error:  the error number
632
 * @msg:  the error message
633
 * @str1:  extra data
634
 *
635
 * Handle a validity error.
636
 */
637
static void LIBXML_ATTR_FORMAT(3,0)
638
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
639
              const char *msg, const xmlChar *str1, const xmlChar *str2)
640
28.7k
{
641
28.7k
    xmlStructuredErrorFunc schannel = NULL;
642
643
28.7k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
644
0
        (ctxt->instate == XML_PARSER_EOF))
645
0
  return;
646
28.7k
    if (ctxt != NULL) {
647
28.7k
  ctxt->errNo = error;
648
28.7k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
649
28.7k
      schannel = ctxt->sax->serror;
650
28.7k
    }
651
28.7k
    if (ctxt != NULL) {
652
28.7k
        __xmlRaiseError(schannel,
653
28.7k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
654
28.7k
                    ctxt, NULL, XML_FROM_DTD, error,
655
28.7k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
656
28.7k
        (const char *) str2, NULL, 0, 0,
657
28.7k
        msg, (const char *) str1, (const char *) str2);
658
28.7k
  ctxt->valid = 0;
659
28.7k
    } else {
660
0
        __xmlRaiseError(schannel, NULL, NULL,
661
0
                    ctxt, NULL, XML_FROM_DTD, error,
662
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
663
0
        (const char *) str2, NULL, 0, 0,
664
0
        msg, (const char *) str1, (const char *) str2);
665
0
    }
666
28.7k
}
667
668
/**
669
 * xmlFatalErrMsgInt:
670
 * @ctxt:  an XML parser context
671
 * @error:  the error number
672
 * @msg:  the error message
673
 * @val:  an integer value
674
 *
675
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
676
 */
677
static void LIBXML_ATTR_FORMAT(3,0)
678
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
679
                  const char *msg, int val)
680
11.0M
{
681
11.0M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
682
113k
        (ctxt->instate == XML_PARSER_EOF))
683
0
  return;
684
11.0M
    if (ctxt != NULL)
685
11.0M
  ctxt->errNo = error;
686
11.0M
    __xmlRaiseError(NULL, NULL, NULL,
687
11.0M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
688
11.0M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
689
11.0M
    if (ctxt != NULL) {
690
11.0M
  ctxt->wellFormed = 0;
691
11.0M
  if (ctxt->recovery == 0)
692
113k
      ctxt->disableSAX = 1;
693
11.0M
    }
694
11.0M
}
695
696
/**
697
 * xmlFatalErrMsgStrIntStr:
698
 * @ctxt:  an XML parser context
699
 * @error:  the error number
700
 * @msg:  the error message
701
 * @str1:  an string info
702
 * @val:  an integer value
703
 * @str2:  an string info
704
 *
705
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706
 */
707
static void LIBXML_ATTR_FORMAT(3,0)
708
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709
                  const char *msg, const xmlChar *str1, int val,
710
      const xmlChar *str2)
711
3.86M
{
712
3.86M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
713
1.58M
        (ctxt->instate == XML_PARSER_EOF))
714
0
  return;
715
3.86M
    if (ctxt != NULL)
716
3.86M
  ctxt->errNo = error;
717
3.86M
    __xmlRaiseError(NULL, NULL, NULL,
718
3.86M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
719
3.86M
                    NULL, 0, (const char *) str1, (const char *) str2,
720
3.86M
        NULL, val, 0, msg, str1, val, str2);
721
3.86M
    if (ctxt != NULL) {
722
3.86M
  ctxt->wellFormed = 0;
723
3.86M
  if (ctxt->recovery == 0)
724
1.58M
      ctxt->disableSAX = 1;
725
3.86M
    }
726
3.86M
}
727
728
/**
729
 * xmlFatalErrMsgStr:
730
 * @ctxt:  an XML parser context
731
 * @error:  the error number
732
 * @msg:  the error message
733
 * @val:  a string value
734
 *
735
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
736
 */
737
static void LIBXML_ATTR_FORMAT(3,0)
738
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
739
                  const char *msg, const xmlChar * val)
740
6.62M
{
741
6.62M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
742
269k
        (ctxt->instate == XML_PARSER_EOF))
743
0
  return;
744
6.62M
    if (ctxt != NULL)
745
6.62M
  ctxt->errNo = error;
746
6.62M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
747
6.62M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
748
6.62M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
749
6.62M
                    val);
750
6.62M
    if (ctxt != NULL) {
751
6.62M
  ctxt->wellFormed = 0;
752
6.62M
  if (ctxt->recovery == 0)
753
385k
      ctxt->disableSAX = 1;
754
6.62M
    }
755
6.62M
}
756
757
/**
758
 * xmlErrMsgStr:
759
 * @ctxt:  an XML parser context
760
 * @error:  the error number
761
 * @msg:  the error message
762
 * @val:  a string value
763
 *
764
 * Handle a non fatal parser error
765
 */
766
static void LIBXML_ATTR_FORMAT(3,0)
767
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
768
                  const char *msg, const xmlChar * val)
769
257k
{
770
257k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
771
0
        (ctxt->instate == XML_PARSER_EOF))
772
0
  return;
773
257k
    if (ctxt != NULL)
774
257k
  ctxt->errNo = error;
775
257k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
776
257k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
777
257k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
778
257k
                    val);
779
257k
}
780
781
/**
782
 * xmlNsErr:
783
 * @ctxt:  an XML parser context
784
 * @error:  the error number
785
 * @msg:  the message
786
 * @info1:  extra information string
787
 * @info2:  extra information string
788
 *
789
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
790
 */
791
static void LIBXML_ATTR_FORMAT(3,0)
792
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
793
         const char *msg,
794
         const xmlChar * info1, const xmlChar * info2,
795
         const xmlChar * info3)
796
1.89M
{
797
1.89M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
798
287k
        (ctxt->instate == XML_PARSER_EOF))
799
0
  return;
800
1.89M
    if (ctxt != NULL)
801
1.89M
  ctxt->errNo = error;
802
1.89M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
803
1.89M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
804
1.89M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
805
1.89M
                    info1, info2, info3);
806
1.89M
    if (ctxt != NULL)
807
1.89M
  ctxt->nsWellFormed = 0;
808
1.89M
}
809
810
/**
811
 * xmlNsWarn
812
 * @ctxt:  an XML parser context
813
 * @error:  the error number
814
 * @msg:  the message
815
 * @info1:  extra information string
816
 * @info2:  extra information string
817
 *
818
 * Handle a namespace warning error
819
 */
820
static void LIBXML_ATTR_FORMAT(3,0)
821
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
822
         const char *msg,
823
         const xmlChar * info1, const xmlChar * info2,
824
         const xmlChar * info3)
825
85.2k
{
826
85.2k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
827
29.5k
        (ctxt->instate == XML_PARSER_EOF))
828
0
  return;
829
85.2k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
830
85.2k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
831
85.2k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
832
85.2k
                    info1, info2, info3);
833
85.2k
}
834
835
/************************************************************************
836
 *                  *
837
 *    Library wide options          *
838
 *                  *
839
 ************************************************************************/
840
841
/**
842
  * xmlHasFeature:
843
  * @feature: the feature to be examined
844
  *
845
  * Examines if the library has been compiled with a given feature.
846
  *
847
  * Returns a non-zero value if the feature exist, otherwise zero.
848
  * Returns zero (0) if the feature does not exist or an unknown
849
  * unknown feature is requested, non-zero otherwise.
850
  */
851
int
852
xmlHasFeature(xmlFeature feature)
853
0
{
854
0
    switch (feature) {
855
0
  case XML_WITH_THREAD:
856
0
#ifdef LIBXML_THREAD_ENABLED
857
0
      return(1);
858
#else
859
      return(0);
860
#endif
861
0
        case XML_WITH_TREE:
862
0
#ifdef LIBXML_TREE_ENABLED
863
0
            return(1);
864
#else
865
            return(0);
866
#endif
867
0
        case XML_WITH_OUTPUT:
868
0
#ifdef LIBXML_OUTPUT_ENABLED
869
0
            return(1);
870
#else
871
            return(0);
872
#endif
873
0
        case XML_WITH_PUSH:
874
0
#ifdef LIBXML_PUSH_ENABLED
875
0
            return(1);
876
#else
877
            return(0);
878
#endif
879
0
        case XML_WITH_READER:
880
0
#ifdef LIBXML_READER_ENABLED
881
0
            return(1);
882
#else
883
            return(0);
884
#endif
885
0
        case XML_WITH_PATTERN:
886
0
#ifdef LIBXML_PATTERN_ENABLED
887
0
            return(1);
888
#else
889
            return(0);
890
#endif
891
0
        case XML_WITH_WRITER:
892
0
#ifdef LIBXML_WRITER_ENABLED
893
0
            return(1);
894
#else
895
            return(0);
896
#endif
897
0
        case XML_WITH_SAX1:
898
0
#ifdef LIBXML_SAX1_ENABLED
899
0
            return(1);
900
#else
901
            return(0);
902
#endif
903
0
        case XML_WITH_FTP:
904
0
#ifdef LIBXML_FTP_ENABLED
905
0
            return(1);
906
#else
907
            return(0);
908
#endif
909
0
        case XML_WITH_HTTP:
910
0
#ifdef LIBXML_HTTP_ENABLED
911
0
            return(1);
912
#else
913
            return(0);
914
#endif
915
0
        case XML_WITH_VALID:
916
0
#ifdef LIBXML_VALID_ENABLED
917
0
            return(1);
918
#else
919
            return(0);
920
#endif
921
0
        case XML_WITH_HTML:
922
0
#ifdef LIBXML_HTML_ENABLED
923
0
            return(1);
924
#else
925
            return(0);
926
#endif
927
0
        case XML_WITH_LEGACY:
928
0
#ifdef LIBXML_LEGACY_ENABLED
929
0
            return(1);
930
#else
931
            return(0);
932
#endif
933
0
        case XML_WITH_C14N:
934
0
#ifdef LIBXML_C14N_ENABLED
935
0
            return(1);
936
#else
937
            return(0);
938
#endif
939
0
        case XML_WITH_CATALOG:
940
0
#ifdef LIBXML_CATALOG_ENABLED
941
0
            return(1);
942
#else
943
            return(0);
944
#endif
945
0
        case XML_WITH_XPATH:
946
0
#ifdef LIBXML_XPATH_ENABLED
947
0
            return(1);
948
#else
949
            return(0);
950
#endif
951
0
        case XML_WITH_XPTR:
952
0
#ifdef LIBXML_XPTR_ENABLED
953
0
            return(1);
954
#else
955
            return(0);
956
#endif
957
0
        case XML_WITH_XINCLUDE:
958
0
#ifdef LIBXML_XINCLUDE_ENABLED
959
0
            return(1);
960
#else
961
            return(0);
962
#endif
963
0
        case XML_WITH_ICONV:
964
0
#ifdef LIBXML_ICONV_ENABLED
965
0
            return(1);
966
#else
967
            return(0);
968
#endif
969
0
        case XML_WITH_ISO8859X:
970
0
#ifdef LIBXML_ISO8859X_ENABLED
971
0
            return(1);
972
#else
973
            return(0);
974
#endif
975
0
        case XML_WITH_UNICODE:
976
0
#ifdef LIBXML_UNICODE_ENABLED
977
0
            return(1);
978
#else
979
            return(0);
980
#endif
981
0
        case XML_WITH_REGEXP:
982
0
#ifdef LIBXML_REGEXP_ENABLED
983
0
            return(1);
984
#else
985
            return(0);
986
#endif
987
0
        case XML_WITH_AUTOMATA:
988
0
#ifdef LIBXML_AUTOMATA_ENABLED
989
0
            return(1);
990
#else
991
            return(0);
992
#endif
993
0
        case XML_WITH_EXPR:
994
0
#ifdef LIBXML_EXPR_ENABLED
995
0
            return(1);
996
#else
997
            return(0);
998
#endif
999
0
        case XML_WITH_SCHEMAS:
1000
0
#ifdef LIBXML_SCHEMAS_ENABLED
1001
0
            return(1);
1002
#else
1003
            return(0);
1004
#endif
1005
0
        case XML_WITH_SCHEMATRON:
1006
0
#ifdef LIBXML_SCHEMATRON_ENABLED
1007
0
            return(1);
1008
#else
1009
            return(0);
1010
#endif
1011
0
        case XML_WITH_MODULES:
1012
0
#ifdef LIBXML_MODULES_ENABLED
1013
0
            return(1);
1014
#else
1015
            return(0);
1016
#endif
1017
0
        case XML_WITH_DEBUG:
1018
0
#ifdef LIBXML_DEBUG_ENABLED
1019
0
            return(1);
1020
#else
1021
            return(0);
1022
#endif
1023
0
        case XML_WITH_DEBUG_MEM:
1024
#ifdef DEBUG_MEMORY_LOCATION
1025
            return(1);
1026
#else
1027
0
            return(0);
1028
0
#endif
1029
0
        case XML_WITH_DEBUG_RUN:
1030
#ifdef LIBXML_DEBUG_RUNTIME
1031
            return(1);
1032
#else
1033
0
            return(0);
1034
0
#endif
1035
0
        case XML_WITH_ZLIB:
1036
#ifdef LIBXML_ZLIB_ENABLED
1037
            return(1);
1038
#else
1039
0
            return(0);
1040
0
#endif
1041
0
        case XML_WITH_LZMA:
1042
#ifdef LIBXML_LZMA_ENABLED
1043
            return(1);
1044
#else
1045
0
            return(0);
1046
0
#endif
1047
0
        case XML_WITH_ICU:
1048
#ifdef LIBXML_ICU_ENABLED
1049
            return(1);
1050
#else
1051
0
            return(0);
1052
0
#endif
1053
0
        default:
1054
0
      break;
1055
0
     }
1056
0
     return(0);
1057
0
}
1058
1059
/************************************************************************
1060
 *                  *
1061
 *    SAX2 defaulted attributes handling      *
1062
 *                  *
1063
 ************************************************************************/
1064
1065
/**
1066
 * xmlDetectSAX2:
1067
 * @ctxt:  an XML parser context
1068
 *
1069
 * Do the SAX2 detection and specific intialization
1070
 */
1071
static void
1072
2.31M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1073
2.31M
    if (ctxt == NULL) return;
1074
2.31M
#ifdef LIBXML_SAX1_ENABLED
1075
2.31M
    if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1076
2.31M
        ((ctxt->sax->startElementNs != NULL) ||
1077
2.31M
         (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1078
#else
1079
    ctxt->sax2 = 1;
1080
#endif /* LIBXML_SAX1_ENABLED */
1081
1082
2.31M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1083
2.31M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1084
2.31M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1085
2.31M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1086
2.31M
    (ctxt->str_xml_ns == NULL)) {
1087
0
        xmlErrMemory(ctxt, NULL);
1088
0
    }
1089
2.31M
}
1090
1091
typedef struct _xmlDefAttrs xmlDefAttrs;
1092
typedef xmlDefAttrs *xmlDefAttrsPtr;
1093
struct _xmlDefAttrs {
1094
    int nbAttrs;  /* number of defaulted attributes on that element */
1095
    int maxAttrs;       /* the size of the array */
1096
#if __STDC_VERSION__ >= 199901L
1097
    /* Using a C99 flexible array member avoids UBSan errors. */
1098
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1099
#else
1100
    const xmlChar *values[5];
1101
#endif
1102
};
1103
1104
/**
1105
 * xmlAttrNormalizeSpace:
1106
 * @src: the source string
1107
 * @dst: the target string
1108
 *
1109
 * Normalize the space in non CDATA attribute values:
1110
 * If the attribute type is not CDATA, then the XML processor MUST further
1111
 * process the normalized attribute value by discarding any leading and
1112
 * trailing space (#x20) characters, and by replacing sequences of space
1113
 * (#x20) characters by a single space (#x20) character.
1114
 * Note that the size of dst need to be at least src, and if one doesn't need
1115
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1116
 * passing src as dst is just fine.
1117
 *
1118
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1119
 *         is needed.
1120
 */
1121
static xmlChar *
1122
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1123
181k
{
1124
181k
    if ((src == NULL) || (dst == NULL))
1125
0
        return(NULL);
1126
1127
215k
    while (*src == 0x20) src++;
1128
25.9M
    while (*src != 0) {
1129
25.7M
  if (*src == 0x20) {
1130
918k
      while (*src == 0x20) src++;
1131
296k
      if (*src != 0)
1132
263k
    *dst++ = 0x20;
1133
25.4M
  } else {
1134
25.4M
      *dst++ = *src++;
1135
25.4M
  }
1136
25.7M
    }
1137
181k
    *dst = 0;
1138
181k
    if (dst == src)
1139
110k
       return(NULL);
1140
71.0k
    return(dst);
1141
181k
}
1142
1143
/**
1144
 * xmlAttrNormalizeSpace2:
1145
 * @src: the source string
1146
 *
1147
 * Normalize the space in non CDATA attribute values, a slightly more complex
1148
 * front end to avoid allocation problems when running on attribute values
1149
 * coming from the input.
1150
 *
1151
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1152
 *         is needed.
1153
 */
1154
static const xmlChar *
1155
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1156
67.6k
{
1157
67.6k
    int i;
1158
67.6k
    int remove_head = 0;
1159
67.6k
    int need_realloc = 0;
1160
67.6k
    const xmlChar *cur;
1161
1162
67.6k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1163
0
        return(NULL);
1164
67.6k
    i = *len;
1165
67.6k
    if (i <= 0)
1166
12.1k
        return(NULL);
1167
1168
55.5k
    cur = src;
1169
73.8k
    while (*cur == 0x20) {
1170
18.3k
        cur++;
1171
18.3k
  remove_head++;
1172
18.3k
    }
1173
885k
    while (*cur != 0) {
1174
838k
  if (*cur == 0x20) {
1175
42.7k
      cur++;
1176
42.7k
      if ((*cur == 0x20) || (*cur == 0)) {
1177
8.48k
          need_realloc = 1;
1178
8.48k
    break;
1179
8.48k
      }
1180
42.7k
  } else
1181
796k
      cur++;
1182
838k
    }
1183
55.5k
    if (need_realloc) {
1184
8.48k
        xmlChar *ret;
1185
1186
8.48k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1187
8.48k
  if (ret == NULL) {
1188
0
      xmlErrMemory(ctxt, NULL);
1189
0
      return(NULL);
1190
0
  }
1191
8.48k
  xmlAttrNormalizeSpace(ret, ret);
1192
8.48k
  *len = (int) strlen((const char *)ret);
1193
8.48k
        return(ret);
1194
47.0k
    } else if (remove_head) {
1195
8.55k
        *len -= remove_head;
1196
8.55k
        memmove(src, src + remove_head, 1 + *len);
1197
8.55k
  return(src);
1198
8.55k
    }
1199
38.4k
    return(NULL);
1200
55.5k
}
1201
1202
/**
1203
 * xmlAddDefAttrs:
1204
 * @ctxt:  an XML parser context
1205
 * @fullname:  the element fullname
1206
 * @fullattr:  the attribute fullname
1207
 * @value:  the attribute value
1208
 *
1209
 * Add a defaulted attribute for an element
1210
 */
1211
static void
1212
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1213
               const xmlChar *fullname,
1214
               const xmlChar *fullattr,
1215
171k
               const xmlChar *value) {
1216
171k
    xmlDefAttrsPtr defaults;
1217
171k
    int len;
1218
171k
    const xmlChar *name;
1219
171k
    const xmlChar *prefix;
1220
1221
    /*
1222
     * Allows to detect attribute redefinitions
1223
     */
1224
171k
    if (ctxt->attsSpecial != NULL) {
1225
159k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1226
104k
      return;
1227
159k
    }
1228
1229
66.8k
    if (ctxt->attsDefault == NULL) {
1230
12.0k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1231
12.0k
  if (ctxt->attsDefault == NULL)
1232
0
      goto mem_error;
1233
12.0k
    }
1234
1235
    /*
1236
     * split the element name into prefix:localname , the string found
1237
     * are within the DTD and then not associated to namespace names.
1238
     */
1239
66.8k
    name = xmlSplitQName3(fullname, &len);
1240
66.8k
    if (name == NULL) {
1241
37.8k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1242
37.8k
  prefix = NULL;
1243
37.8k
    } else {
1244
28.9k
        name = xmlDictLookup(ctxt->dict, name, -1);
1245
28.9k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1246
28.9k
    }
1247
1248
    /*
1249
     * make sure there is some storage
1250
     */
1251
66.8k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1252
66.8k
    if (defaults == NULL) {
1253
15.6k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1254
15.6k
                     (4 * 5) * sizeof(const xmlChar *));
1255
15.6k
  if (defaults == NULL)
1256
0
      goto mem_error;
1257
15.6k
  defaults->nbAttrs = 0;
1258
15.6k
  defaults->maxAttrs = 4;
1259
15.6k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1260
15.6k
                          defaults, NULL) < 0) {
1261
0
      xmlFree(defaults);
1262
0
      goto mem_error;
1263
0
  }
1264
51.1k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1265
7.36k
        xmlDefAttrsPtr temp;
1266
1267
7.36k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1268
7.36k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1269
7.36k
  if (temp == NULL)
1270
0
      goto mem_error;
1271
7.36k
  defaults = temp;
1272
7.36k
  defaults->maxAttrs *= 2;
1273
7.36k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1274
7.36k
                          defaults, NULL) < 0) {
1275
0
      xmlFree(defaults);
1276
0
      goto mem_error;
1277
0
  }
1278
7.36k
    }
1279
1280
    /*
1281
     * Split the element name into prefix:localname , the string found
1282
     * are within the DTD and hen not associated to namespace names.
1283
     */
1284
66.8k
    name = xmlSplitQName3(fullattr, &len);
1285
66.8k
    if (name == NULL) {
1286
37.2k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1287
37.2k
  prefix = NULL;
1288
37.2k
    } else {
1289
29.6k
        name = xmlDictLookup(ctxt->dict, name, -1);
1290
29.6k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1291
29.6k
    }
1292
1293
66.8k
    defaults->values[5 * defaults->nbAttrs] = name;
1294
66.8k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1295
    /* intern the string and precompute the end */
1296
66.8k
    len = xmlStrlen(value);
1297
66.8k
    value = xmlDictLookup(ctxt->dict, value, len);
1298
66.8k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1299
66.8k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1300
66.8k
    if (ctxt->external)
1301
0
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1302
66.8k
    else
1303
66.8k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1304
66.8k
    defaults->nbAttrs++;
1305
1306
66.8k
    return;
1307
1308
0
mem_error:
1309
0
    xmlErrMemory(ctxt, NULL);
1310
0
    return;
1311
66.8k
}
1312
1313
/**
1314
 * xmlAddSpecialAttr:
1315
 * @ctxt:  an XML parser context
1316
 * @fullname:  the element fullname
1317
 * @fullattr:  the attribute fullname
1318
 * @type:  the attribute type
1319
 *
1320
 * Register this attribute type
1321
 */
1322
static void
1323
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1324
      const xmlChar *fullname,
1325
      const xmlChar *fullattr,
1326
      int type)
1327
202k
{
1328
202k
    if (ctxt->attsSpecial == NULL) {
1329
12.9k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1330
12.9k
  if (ctxt->attsSpecial == NULL)
1331
0
      goto mem_error;
1332
12.9k
    }
1333
1334
202k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1335
130k
        return;
1336
1337
72.1k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1338
72.1k
                     (void *) (ptrdiff_t) type);
1339
72.1k
    return;
1340
1341
0
mem_error:
1342
0
    xmlErrMemory(ctxt, NULL);
1343
0
    return;
1344
202k
}
1345
1346
/**
1347
 * xmlCleanSpecialAttrCallback:
1348
 *
1349
 * Removes CDATA attributes from the special attribute table
1350
 */
1351
static void
1352
xmlCleanSpecialAttrCallback(void *payload, void *data,
1353
                            const xmlChar *fullname, const xmlChar *fullattr,
1354
72.0k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1355
72.0k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1356
1357
72.0k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1358
6.33k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1359
6.33k
    }
1360
72.0k
}
1361
1362
/**
1363
 * xmlCleanSpecialAttr:
1364
 * @ctxt:  an XML parser context
1365
 *
1366
 * Trim the list of attributes defined to remove all those of type
1367
 * CDATA as they are not special. This call should be done when finishing
1368
 * to parse the DTD and before starting to parse the document root.
1369
 */
1370
static void
1371
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1372
56.9k
{
1373
56.9k
    if (ctxt->attsSpecial == NULL)
1374
44.0k
        return;
1375
1376
12.8k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1377
1378
12.8k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1379
174
        xmlHashFree(ctxt->attsSpecial, NULL);
1380
174
        ctxt->attsSpecial = NULL;
1381
174
    }
1382
12.8k
    return;
1383
56.9k
}
1384
1385
/**
1386
 * xmlCheckLanguageID:
1387
 * @lang:  pointer to the string value
1388
 *
1389
 * Checks that the value conforms to the LanguageID production:
1390
 *
1391
 * NOTE: this is somewhat deprecated, those productions were removed from
1392
 *       the XML Second edition.
1393
 *
1394
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1395
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1396
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1397
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1398
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1399
 * [38] Subcode ::= ([a-z] | [A-Z])+
1400
 *
1401
 * The current REC reference the sucessors of RFC 1766, currently 5646
1402
 *
1403
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1404
 * langtag       = language
1405
 *                 ["-" script]
1406
 *                 ["-" region]
1407
 *                 *("-" variant)
1408
 *                 *("-" extension)
1409
 *                 ["-" privateuse]
1410
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1411
 *                 ["-" extlang]       ; sometimes followed by
1412
 *                                     ; extended language subtags
1413
 *               / 4ALPHA              ; or reserved for future use
1414
 *               / 5*8ALPHA            ; or registered language subtag
1415
 *
1416
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1417
 *                 *2("-" 3ALPHA)      ; permanently reserved
1418
 *
1419
 * script        = 4ALPHA              ; ISO 15924 code
1420
 *
1421
 * region        = 2ALPHA              ; ISO 3166-1 code
1422
 *               / 3DIGIT              ; UN M.49 code
1423
 *
1424
 * variant       = 5*8alphanum         ; registered variants
1425
 *               / (DIGIT 3alphanum)
1426
 *
1427
 * extension     = singleton 1*("-" (2*8alphanum))
1428
 *
1429
 *                                     ; Single alphanumerics
1430
 *                                     ; "x" reserved for private use
1431
 * singleton     = DIGIT               ; 0 - 9
1432
 *               / %x41-57             ; A - W
1433
 *               / %x59-5A             ; Y - Z
1434
 *               / %x61-77             ; a - w
1435
 *               / %x79-7A             ; y - z
1436
 *
1437
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1438
 * The parser below doesn't try to cope with extension or privateuse
1439
 * that could be added but that's not interoperable anyway
1440
 *
1441
 * Returns 1 if correct 0 otherwise
1442
 **/
1443
int
1444
xmlCheckLanguageID(const xmlChar * lang)
1445
0
{
1446
0
    const xmlChar *cur = lang, *nxt;
1447
1448
0
    if (cur == NULL)
1449
0
        return (0);
1450
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1451
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1452
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1453
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1454
        /*
1455
         * Still allow IANA code and user code which were coming
1456
         * from the previous version of the XML-1.0 specification
1457
         * it's deprecated but we should not fail
1458
         */
1459
0
        cur += 2;
1460
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1461
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1462
0
            cur++;
1463
0
        return(cur[0] == 0);
1464
0
    }
1465
0
    nxt = cur;
1466
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1467
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1468
0
           nxt++;
1469
0
    if (nxt - cur >= 4) {
1470
        /*
1471
         * Reserved
1472
         */
1473
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1474
0
            return(0);
1475
0
        return(1);
1476
0
    }
1477
0
    if (nxt - cur < 2)
1478
0
        return(0);
1479
    /* we got an ISO 639 code */
1480
0
    if (nxt[0] == 0)
1481
0
        return(1);
1482
0
    if (nxt[0] != '-')
1483
0
        return(0);
1484
1485
0
    nxt++;
1486
0
    cur = nxt;
1487
    /* now we can have extlang or script or region or variant */
1488
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1489
0
        goto region_m49;
1490
1491
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1492
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1493
0
           nxt++;
1494
0
    if (nxt - cur == 4)
1495
0
        goto script;
1496
0
    if (nxt - cur == 2)
1497
0
        goto region;
1498
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1499
0
        goto variant;
1500
0
    if (nxt - cur != 3)
1501
0
        return(0);
1502
    /* we parsed an extlang */
1503
0
    if (nxt[0] == 0)
1504
0
        return(1);
1505
0
    if (nxt[0] != '-')
1506
0
        return(0);
1507
1508
0
    nxt++;
1509
0
    cur = nxt;
1510
    /* now we can have script or region or variant */
1511
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1512
0
        goto region_m49;
1513
1514
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1515
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1516
0
           nxt++;
1517
0
    if (nxt - cur == 2)
1518
0
        goto region;
1519
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1520
0
        goto variant;
1521
0
    if (nxt - cur != 4)
1522
0
        return(0);
1523
    /* we parsed a script */
1524
0
script:
1525
0
    if (nxt[0] == 0)
1526
0
        return(1);
1527
0
    if (nxt[0] != '-')
1528
0
        return(0);
1529
1530
0
    nxt++;
1531
0
    cur = nxt;
1532
    /* now we can have region or variant */
1533
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1534
0
        goto region_m49;
1535
1536
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1538
0
           nxt++;
1539
1540
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1541
0
        goto variant;
1542
0
    if (nxt - cur != 2)
1543
0
        return(0);
1544
    /* we parsed a region */
1545
0
region:
1546
0
    if (nxt[0] == 0)
1547
0
        return(1);
1548
0
    if (nxt[0] != '-')
1549
0
        return(0);
1550
1551
0
    nxt++;
1552
0
    cur = nxt;
1553
    /* now we can just have a variant */
1554
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1555
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1556
0
           nxt++;
1557
1558
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1559
0
        return(0);
1560
1561
    /* we parsed a variant */
1562
0
variant:
1563
0
    if (nxt[0] == 0)
1564
0
        return(1);
1565
0
    if (nxt[0] != '-')
1566
0
        return(0);
1567
    /* extensions and private use subtags not checked */
1568
0
    return (1);
1569
1570
0
region_m49:
1571
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1572
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1573
0
        nxt += 3;
1574
0
        goto region;
1575
0
    }
1576
0
    return(0);
1577
0
}
1578
1579
/************************************************************************
1580
 *                  *
1581
 *    Parser stacks related functions and macros    *
1582
 *                  *
1583
 ************************************************************************/
1584
1585
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1586
                                            const xmlChar ** str);
1587
1588
#ifdef SAX2
1589
/**
1590
 * nsPush:
1591
 * @ctxt:  an XML parser context
1592
 * @prefix:  the namespace prefix or NULL
1593
 * @URL:  the namespace name
1594
 *
1595
 * Pushes a new parser namespace on top of the ns stack
1596
 *
1597
 * Returns -1 in case of error, -2 if the namespace should be discarded
1598
 *     and the index in the stack otherwise.
1599
 */
1600
static int
1601
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1602
1.86M
{
1603
1.86M
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1604
0
        int i;
1605
0
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1606
0
      if (ctxt->nsTab[i] == prefix) {
1607
    /* in scope */
1608
0
          if (ctxt->nsTab[i + 1] == URL)
1609
0
        return(-2);
1610
    /* out of scope keep it */
1611
0
    break;
1612
0
      }
1613
0
  }
1614
0
    }
1615
1.86M
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1616
115k
  ctxt->nsMax = 10;
1617
115k
  ctxt->nsNr = 0;
1618
115k
  ctxt->nsTab = (const xmlChar **)
1619
115k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1620
115k
  if (ctxt->nsTab == NULL) {
1621
0
      xmlErrMemory(ctxt, NULL);
1622
0
      ctxt->nsMax = 0;
1623
0
            return (-1);
1624
0
  }
1625
1.74M
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1626
34.8k
        const xmlChar ** tmp;
1627
34.8k
        ctxt->nsMax *= 2;
1628
34.8k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1629
34.8k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1630
34.8k
        if (tmp == NULL) {
1631
0
            xmlErrMemory(ctxt, NULL);
1632
0
      ctxt->nsMax /= 2;
1633
0
            return (-1);
1634
0
        }
1635
34.8k
  ctxt->nsTab = tmp;
1636
34.8k
    }
1637
1.86M
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1638
1.86M
    ctxt->nsTab[ctxt->nsNr++] = URL;
1639
1.86M
    return (ctxt->nsNr);
1640
1.86M
}
1641
/**
1642
 * nsPop:
1643
 * @ctxt: an XML parser context
1644
 * @nr:  the number to pop
1645
 *
1646
 * Pops the top @nr parser prefix/namespace from the ns stack
1647
 *
1648
 * Returns the number of namespaces removed
1649
 */
1650
static int
1651
nsPop(xmlParserCtxtPtr ctxt, int nr)
1652
302k
{
1653
302k
    int i;
1654
1655
302k
    if (ctxt->nsTab == NULL) return(0);
1656
302k
    if (ctxt->nsNr < nr) {
1657
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1658
0
        nr = ctxt->nsNr;
1659
0
    }
1660
302k
    if (ctxt->nsNr <= 0)
1661
0
        return (0);
1662
1663
1.07M
    for (i = 0;i < nr;i++) {
1664
777k
         ctxt->nsNr--;
1665
777k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1666
777k
    }
1667
302k
    return(nr);
1668
302k
}
1669
#endif
1670
1671
static int
1672
119k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1673
119k
    const xmlChar **atts;
1674
119k
    int *attallocs;
1675
119k
    int maxatts;
1676
1677
119k
    if (ctxt->atts == NULL) {
1678
117k
  maxatts = 55; /* allow for 10 attrs by default */
1679
117k
  atts = (const xmlChar **)
1680
117k
         xmlMalloc(maxatts * sizeof(xmlChar *));
1681
117k
  if (atts == NULL) goto mem_error;
1682
117k
  ctxt->atts = atts;
1683
117k
  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1684
117k
  if (attallocs == NULL) goto mem_error;
1685
117k
  ctxt->attallocs = attallocs;
1686
117k
  ctxt->maxatts = maxatts;
1687
117k
    } else if (nr + 5 > ctxt->maxatts) {
1688
2.27k
  maxatts = (nr + 5) * 2;
1689
2.27k
  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1690
2.27k
             maxatts * sizeof(const xmlChar *));
1691
2.27k
  if (atts == NULL) goto mem_error;
1692
2.27k
  ctxt->atts = atts;
1693
2.27k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1694
2.27k
                               (maxatts / 5) * sizeof(int));
1695
2.27k
  if (attallocs == NULL) goto mem_error;
1696
2.27k
  ctxt->attallocs = attallocs;
1697
2.27k
  ctxt->maxatts = maxatts;
1698
2.27k
    }
1699
119k
    return(ctxt->maxatts);
1700
0
mem_error:
1701
0
    xmlErrMemory(ctxt, NULL);
1702
0
    return(-1);
1703
119k
}
1704
1705
/**
1706
 * inputPush:
1707
 * @ctxt:  an XML parser context
1708
 * @value:  the parser input
1709
 *
1710
 * Pushes a new parser input on top of the input stack
1711
 *
1712
 * Returns -1 in case of error, the index in the stack otherwise
1713
 */
1714
int
1715
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1716
821k
{
1717
821k
    if ((ctxt == NULL) || (value == NULL))
1718
0
        return(-1);
1719
821k
    if (ctxt->inputNr >= ctxt->inputMax) {
1720
0
        ctxt->inputMax *= 2;
1721
0
        ctxt->inputTab =
1722
0
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1723
0
                                             ctxt->inputMax *
1724
0
                                             sizeof(ctxt->inputTab[0]));
1725
0
        if (ctxt->inputTab == NULL) {
1726
0
            xmlErrMemory(ctxt, NULL);
1727
0
      xmlFreeInputStream(value);
1728
0
      ctxt->inputMax /= 2;
1729
0
      value = NULL;
1730
0
            return (-1);
1731
0
        }
1732
0
    }
1733
821k
    ctxt->inputTab[ctxt->inputNr] = value;
1734
821k
    ctxt->input = value;
1735
821k
    return (ctxt->inputNr++);
1736
821k
}
1737
/**
1738
 * inputPop:
1739
 * @ctxt: an XML parser context
1740
 *
1741
 * Pops the top parser input from the input stack
1742
 *
1743
 * Returns the input just removed
1744
 */
1745
xmlParserInputPtr
1746
inputPop(xmlParserCtxtPtr ctxt)
1747
1.61M
{
1748
1.61M
    xmlParserInputPtr ret;
1749
1750
1.61M
    if (ctxt == NULL)
1751
0
        return(NULL);
1752
1.61M
    if (ctxt->inputNr <= 0)
1753
793k
        return (NULL);
1754
821k
    ctxt->inputNr--;
1755
821k
    if (ctxt->inputNr > 0)
1756
425k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1757
396k
    else
1758
396k
        ctxt->input = NULL;
1759
821k
    ret = ctxt->inputTab[ctxt->inputNr];
1760
821k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1761
821k
    return (ret);
1762
1.61M
}
1763
/**
1764
 * nodePush:
1765
 * @ctxt:  an XML parser context
1766
 * @value:  the element node
1767
 *
1768
 * Pushes a new element node on top of the node stack
1769
 *
1770
 * Returns -1 in case of error, the index in the stack otherwise
1771
 */
1772
int
1773
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1774
17.7M
{
1775
17.7M
    if (ctxt == NULL) return(0);
1776
17.7M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1777
80.2k
        xmlNodePtr *tmp;
1778
1779
80.2k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1780
80.2k
                                      ctxt->nodeMax * 2 *
1781
80.2k
                                      sizeof(ctxt->nodeTab[0]));
1782
80.2k
        if (tmp == NULL) {
1783
0
            xmlErrMemory(ctxt, NULL);
1784
0
            return (-1);
1785
0
        }
1786
80.2k
        ctxt->nodeTab = tmp;
1787
80.2k
  ctxt->nodeMax *= 2;
1788
80.2k
    }
1789
17.7M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1790
447
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1791
447
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1792
447
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1793
447
        xmlParserMaxDepth);
1794
447
  xmlHaltParser(ctxt);
1795
447
  return(-1);
1796
447
    }
1797
17.7M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1798
17.7M
    ctxt->node = value;
1799
17.7M
    return (ctxt->nodeNr++);
1800
17.7M
}
1801
1802
/**
1803
 * nodePop:
1804
 * @ctxt: an XML parser context
1805
 *
1806
 * Pops the top element node from the node stack
1807
 *
1808
 * Returns the node just removed
1809
 */
1810
xmlNodePtr
1811
nodePop(xmlParserCtxtPtr ctxt)
1812
17.1M
{
1813
17.1M
    xmlNodePtr ret;
1814
1815
17.1M
    if (ctxt == NULL) return(NULL);
1816
17.1M
    if (ctxt->nodeNr <= 0)
1817
885k
        return (NULL);
1818
16.2M
    ctxt->nodeNr--;
1819
16.2M
    if (ctxt->nodeNr > 0)
1820
14.7M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1821
1.45M
    else
1822
1.45M
        ctxt->node = NULL;
1823
16.2M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1824
16.2M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1825
16.2M
    return (ret);
1826
17.1M
}
1827
1828
#ifdef LIBXML_PUSH_ENABLED
1829
/**
1830
 * nameNsPush:
1831
 * @ctxt:  an XML parser context
1832
 * @value:  the element name
1833
 * @prefix:  the element prefix
1834
 * @URI:  the element namespace name
1835
 *
1836
 * Pushes a new element name/prefix/URL on top of the name stack
1837
 *
1838
 * Returns -1 in case of error, the index in the stack otherwise
1839
 */
1840
static int
1841
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1842
           const xmlChar *prefix, const xmlChar *URI, int nsNr)
1843
12.8M
{
1844
12.8M
    if (ctxt->nameNr >= ctxt->nameMax) {
1845
97.5k
        const xmlChar * *tmp;
1846
97.5k
        void **tmp2;
1847
97.5k
        ctxt->nameMax *= 2;
1848
97.5k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1849
97.5k
                                    ctxt->nameMax *
1850
97.5k
                                    sizeof(ctxt->nameTab[0]));
1851
97.5k
        if (tmp == NULL) {
1852
0
      ctxt->nameMax /= 2;
1853
0
      goto mem_error;
1854
0
        }
1855
97.5k
  ctxt->nameTab = tmp;
1856
97.5k
        tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1857
97.5k
                                    ctxt->nameMax * 3 *
1858
97.5k
                                    sizeof(ctxt->pushTab[0]));
1859
97.5k
        if (tmp2 == NULL) {
1860
0
      ctxt->nameMax /= 2;
1861
0
      goto mem_error;
1862
0
        }
1863
97.5k
  ctxt->pushTab = tmp2;
1864
97.5k
    }
1865
12.8M
    ctxt->nameTab[ctxt->nameNr] = value;
1866
12.8M
    ctxt->name = value;
1867
12.8M
    ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1868
12.8M
    ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1869
12.8M
    ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1870
12.8M
    return (ctxt->nameNr++);
1871
0
mem_error:
1872
0
    xmlErrMemory(ctxt, NULL);
1873
0
    return (-1);
1874
12.8M
}
1875
/**
1876
 * nameNsPop:
1877
 * @ctxt: an XML parser context
1878
 *
1879
 * Pops the top element/prefix/URI name from the name stack
1880
 *
1881
 * Returns the name just removed
1882
 */
1883
static const xmlChar *
1884
nameNsPop(xmlParserCtxtPtr ctxt)
1885
7.30M
{
1886
7.30M
    const xmlChar *ret;
1887
1888
7.30M
    if (ctxt->nameNr <= 0)
1889
0
        return (NULL);
1890
7.30M
    ctxt->nameNr--;
1891
7.30M
    if (ctxt->nameNr > 0)
1892
7.26M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1893
32.4k
    else
1894
32.4k
        ctxt->name = NULL;
1895
7.30M
    ret = ctxt->nameTab[ctxt->nameNr];
1896
7.30M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1897
7.30M
    return (ret);
1898
7.30M
}
1899
#endif /* LIBXML_PUSH_ENABLED */
1900
1901
/**
1902
 * namePush:
1903
 * @ctxt:  an XML parser context
1904
 * @value:  the element name
1905
 *
1906
 * Pushes a new element name on top of the name stack
1907
 *
1908
 * Returns -1 in case of error, the index in the stack otherwise
1909
 */
1910
int
1911
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1912
1.85M
{
1913
1.85M
    if (ctxt == NULL) return (-1);
1914
1915
1.85M
    if (ctxt->nameNr >= ctxt->nameMax) {
1916
40.5k
        const xmlChar * *tmp;
1917
40.5k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1918
40.5k
                                    ctxt->nameMax * 2 *
1919
40.5k
                                    sizeof(ctxt->nameTab[0]));
1920
40.5k
        if (tmp == NULL) {
1921
0
      goto mem_error;
1922
0
        }
1923
40.5k
  ctxt->nameTab = tmp;
1924
40.5k
        ctxt->nameMax *= 2;
1925
40.5k
    }
1926
1.85M
    ctxt->nameTab[ctxt->nameNr] = value;
1927
1.85M
    ctxt->name = value;
1928
1.85M
    return (ctxt->nameNr++);
1929
0
mem_error:
1930
0
    xmlErrMemory(ctxt, NULL);
1931
0
    return (-1);
1932
1.85M
}
1933
/**
1934
 * namePop:
1935
 * @ctxt: an XML parser context
1936
 *
1937
 * Pops the top element name from the name stack
1938
 *
1939
 * Returns the name just removed
1940
 */
1941
const xmlChar *
1942
namePop(xmlParserCtxtPtr ctxt)
1943
1.69M
{
1944
1.69M
    const xmlChar *ret;
1945
1946
1.69M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1947
0
        return (NULL);
1948
1.69M
    ctxt->nameNr--;
1949
1.69M
    if (ctxt->nameNr > 0)
1950
1.41M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1951
281k
    else
1952
281k
        ctxt->name = NULL;
1953
1.69M
    ret = ctxt->nameTab[ctxt->nameNr];
1954
1.69M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1955
1.69M
    return (ret);
1956
1.69M
}
1957
1958
18.9M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1959
18.9M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1960
100k
        int *tmp;
1961
1962
100k
  ctxt->spaceMax *= 2;
1963
100k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1964
100k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1965
100k
        if (tmp == NULL) {
1966
0
      xmlErrMemory(ctxt, NULL);
1967
0
      ctxt->spaceMax /=2;
1968
0
      return(-1);
1969
0
  }
1970
100k
  ctxt->spaceTab = tmp;
1971
100k
    }
1972
18.9M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1973
18.9M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1974
18.9M
    return(ctxt->spaceNr++);
1975
18.9M
}
1976
1977
17.5M
static int spacePop(xmlParserCtxtPtr ctxt) {
1978
17.5M
    int ret;
1979
17.5M
    if (ctxt->spaceNr <= 0) return(0);
1980
17.3M
    ctxt->spaceNr--;
1981
17.3M
    if (ctxt->spaceNr > 0)
1982
16.8M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1983
495k
    else
1984
495k
        ctxt->space = &ctxt->spaceTab[0];
1985
17.3M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1986
17.3M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1987
17.3M
    return(ret);
1988
17.5M
}
1989
1990
/*
1991
 * Macros for accessing the content. Those should be used only by the parser,
1992
 * and not exported.
1993
 *
1994
 * Dirty macros, i.e. one often need to make assumption on the context to
1995
 * use them
1996
 *
1997
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1998
 *           To be used with extreme caution since operations consuming
1999
 *           characters may move the input buffer to a different location !
2000
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2001
 *           This should be used internally by the parser
2002
 *           only to compare to ASCII values otherwise it would break when
2003
 *           running with UTF-8 encoding.
2004
 *   RAW     same as CUR but in the input buffer, bypass any token
2005
 *           extraction that may have been done
2006
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2007
 *           to compare on ASCII based substring.
2008
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2009
 *           strings without newlines within the parser.
2010
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2011
 *           defined char within the parser.
2012
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2013
 *
2014
 *   NEXT    Skip to the next character, this does the proper decoding
2015
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2016
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2017
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2018
 *           to the number of xmlChars used for the encoding [0-5].
2019
 *   CUR_SCHAR  same but operate on a string instead of the context
2020
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2021
 *            the index
2022
 *   GROW, SHRINK  handling of input buffers
2023
 */
2024
2025
186M
#define RAW (*ctxt->input->cur)
2026
62.7M
#define CUR (*ctxt->input->cur)
2027
1.38G
#define NXT(val) ctxt->input->cur[(val)]
2028
126M
#define CUR_PTR ctxt->input->cur
2029
6.32M
#define BASE_PTR ctxt->input->base
2030
2031
#define CMP4( s, c1, c2, c3, c4 ) \
2032
16.1M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2033
8.05M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2034
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2035
15.4M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2036
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2037
13.9M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2038
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2039
12.4M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2040
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2041
10.9M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2042
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2043
5.06M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2044
5.06M
    ((unsigned char *) s)[ 8 ] == c9 )
2045
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2046
93.1k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2047
93.1k
    ((unsigned char *) s)[ 9 ] == c10 )
2048
2049
16.1M
#define SKIP(val) do {             \
2050
16.1M
    ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);     \
2051
16.1M
    if (*ctxt->input->cur == 0)           \
2052
16.1M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2053
16.1M
  } while (0)
2054
2055
631k
#define SKIPL(val) do {             \
2056
631k
    int skipl;                \
2057
177M
    for(skipl=0; skipl<val; skipl++) {         \
2058
177M
  if (*(ctxt->input->cur) == '\n') {       \
2059
781k
  ctxt->input->line++; ctxt->input->col = 1;      \
2060
176M
  } else ctxt->input->col++;         \
2061
177M
  ctxt->nbChars++;            \
2062
177M
  ctxt->input->cur++;           \
2063
177M
    }                  \
2064
631k
    if (*ctxt->input->cur == 0)           \
2065
631k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2066
631k
  } while (0)
2067
2068
82.1M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2069
82.1M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2070
82.1M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2071
233k
  xmlSHRINK (ctxt);
2072
2073
264k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2074
264k
    xmlParserInputShrink(ctxt->input);
2075
264k
    if (*ctxt->input->cur == 0)
2076
29.3k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2077
264k
}
2078
2079
3.16G
#define GROW if ((ctxt->progressive == 0) &&       \
2080
3.16G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2081
3.16G
  xmlGROW (ctxt);
2082
2083
19.3M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2084
19.3M
    unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2085
19.3M
    unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2086
2087
19.3M
    if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2088
19.3M
         (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2089
42
         ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2090
28
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2091
28
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2092
28
        xmlHaltParser(ctxt);
2093
28
  return;
2094
28
    }
2095
19.3M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2096
19.3M
    if ((ctxt->input->cur > ctxt->input->end) ||
2097
19.3M
        (ctxt->input->cur < ctxt->input->base)) {
2098
0
        xmlHaltParser(ctxt);
2099
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2100
0
  return;
2101
0
    }
2102
19.3M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2103
2.04M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2104
19.3M
}
2105
2106
57.8M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2107
2108
49.5M
#define NEXT xmlNextChar(ctxt)
2109
2110
26.6M
#define NEXT1 {               \
2111
26.6M
  ctxt->input->col++;           \
2112
26.6M
  ctxt->input->cur++;           \
2113
26.6M
  ctxt->nbChars++;            \
2114
26.6M
  if (*ctxt->input->cur == 0)         \
2115
26.6M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2116
26.6M
    }
2117
2118
5.23G
#define NEXTL(l) do {             \
2119
5.23G
    if (*(ctxt->input->cur) == '\n') {         \
2120
64.4M
  ctxt->input->line++; ctxt->input->col = 1;      \
2121
5.17G
    } else ctxt->input->col++;           \
2122
5.23G
    ctxt->input->cur += l;        \
2123
5.23G
  } while (0)
2124
2125
5.24G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2126
1.96G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2127
2128
#define COPY_BUF(l,b,i,v)           \
2129
6.91G
    if (l == 1) b[i++] = (xmlChar) v;         \
2130
6.91G
    else i += xmlCopyCharMultiByte(&b[i],v)
2131
2132
/**
2133
 * xmlSkipBlankChars:
2134
 * @ctxt:  the XML parser context
2135
 *
2136
 * skip all blanks character found at that point in the input streams.
2137
 * It pops up finished entities in the process if allowable at that point.
2138
 *
2139
 * Returns the number of space chars skipped
2140
 */
2141
2142
int
2143
57.8M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2144
57.8M
    int res = 0;
2145
2146
    /*
2147
     * It's Okay to use CUR/NEXT here since all the blanks are on
2148
     * the ASCII range.
2149
     */
2150
57.8M
    if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2151
51.4M
  const xmlChar *cur;
2152
  /*
2153
   * if we are in the document content, go really fast
2154
   */
2155
51.4M
  cur = ctxt->input->cur;
2156
78.1M
  while (IS_BLANK_CH(*cur)) {
2157
78.1M
      if (*cur == '\n') {
2158
4.45M
    ctxt->input->line++; ctxt->input->col = 1;
2159
73.7M
      } else {
2160
73.7M
    ctxt->input->col++;
2161
73.7M
      }
2162
78.1M
      cur++;
2163
78.1M
      res++;
2164
78.1M
      if (*cur == 0) {
2165
132k
    ctxt->input->cur = cur;
2166
132k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2167
132k
    cur = ctxt->input->cur;
2168
132k
      }
2169
78.1M
  }
2170
51.4M
  ctxt->input->cur = cur;
2171
51.4M
    } else {
2172
6.45M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2173
2174
11.2M
  while (1) {
2175
11.2M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2176
4.05M
    NEXT;
2177
7.15M
      } else if (CUR == '%') {
2178
                /*
2179
                 * Need to handle support of entities branching here
2180
                 */
2181
874k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2182
455k
                    break;
2183
418k
          xmlParsePEReference(ctxt);
2184
6.27M
            } else if (CUR == 0) {
2185
288k
                if (ctxt->inputNr <= 1)
2186
10.6k
                    break;
2187
278k
                xmlPopInput(ctxt);
2188
5.98M
            } else {
2189
5.98M
                break;
2190
5.98M
            }
2191
2192
            /*
2193
             * Also increase the counter when entering or exiting a PERef.
2194
             * The spec says: "When a parameter-entity reference is recognized
2195
             * in the DTD and included, its replacement text MUST be enlarged
2196
             * by the attachment of one leading and one following space (#x20)
2197
             * character."
2198
             */
2199
4.75M
      res++;
2200
4.75M
        }
2201
6.45M
    }
2202
57.8M
    return(res);
2203
57.8M
}
2204
2205
/************************************************************************
2206
 *                  *
2207
 *    Commodity functions to handle entities      *
2208
 *                  *
2209
 ************************************************************************/
2210
2211
/**
2212
 * xmlPopInput:
2213
 * @ctxt:  an XML parser context
2214
 *
2215
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2216
 *          pop it and return the next char.
2217
 *
2218
 * Returns the current xmlChar in the parser context
2219
 */
2220
xmlChar
2221
424k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2222
424k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2223
424k
    if (xmlParserDebugEntities)
2224
0
  xmlGenericError(xmlGenericErrorContext,
2225
0
    "Popping input %d\n", ctxt->inputNr);
2226
424k
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2227
0
        (ctxt->instate != XML_PARSER_EOF))
2228
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2229
0
                    "Unfinished entity outside the DTD");
2230
424k
    xmlFreeInputStream(inputPop(ctxt));
2231
424k
    if (*ctxt->input->cur == 0)
2232
135
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2233
424k
    return(CUR);
2234
424k
}
2235
2236
/**
2237
 * xmlPushInput:
2238
 * @ctxt:  an XML parser context
2239
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2240
 *
2241
 * xmlPushInput: switch to a new input stream which is stacked on top
2242
 *               of the previous one(s).
2243
 * Returns -1 in case of error or the index in the input stack
2244
 */
2245
int
2246
425k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2247
425k
    int ret;
2248
425k
    if (input == NULL) return(-1);
2249
2250
425k
    if (xmlParserDebugEntities) {
2251
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2252
0
      xmlGenericError(xmlGenericErrorContext,
2253
0
        "%s(%d): ", ctxt->input->filename,
2254
0
        ctxt->input->line);
2255
0
  xmlGenericError(xmlGenericErrorContext,
2256
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2257
0
    }
2258
425k
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2259
425k
        (ctxt->inputNr > 1024)) {
2260
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2261
0
        while (ctxt->inputNr > 1)
2262
0
            xmlFreeInputStream(inputPop(ctxt));
2263
0
  return(-1);
2264
0
    }
2265
425k
    ret = inputPush(ctxt, input);
2266
425k
    if (ctxt->instate == XML_PARSER_EOF)
2267
0
        return(-1);
2268
425k
    GROW;
2269
425k
    return(ret);
2270
425k
}
2271
2272
/**
2273
 * xmlParseCharRef:
2274
 * @ctxt:  an XML parser context
2275
 *
2276
 * parse Reference declarations
2277
 *
2278
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2279
 *                  '&#x' [0-9a-fA-F]+ ';'
2280
 *
2281
 * [ WFC: Legal Character ]
2282
 * Characters referred to using character references must match the
2283
 * production for Char.
2284
 *
2285
 * Returns the value parsed (as an int), 0 in case of error
2286
 */
2287
int
2288
744k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2289
744k
    unsigned int val = 0;
2290
744k
    int count = 0;
2291
744k
    unsigned int outofrange = 0;
2292
2293
    /*
2294
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2295
     */
2296
744k
    if ((RAW == '&') && (NXT(1) == '#') &&
2297
744k
        (NXT(2) == 'x')) {
2298
302k
  SKIP(3);
2299
302k
  GROW;
2300
1.37M
  while (RAW != ';') { /* loop blocked by count */
2301
1.17M
      if (count++ > 20) {
2302
29.1k
    count = 0;
2303
29.1k
    GROW;
2304
29.1k
                if (ctxt->instate == XML_PARSER_EOF)
2305
0
                    return(0);
2306
29.1k
      }
2307
1.17M
      if ((RAW >= '0') && (RAW <= '9'))
2308
578k
          val = val * 16 + (CUR - '0');
2309
592k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2310
383k
          val = val * 16 + (CUR - 'a') + 10;
2311
208k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2312
106k
          val = val * 16 + (CUR - 'A') + 10;
2313
102k
      else {
2314
102k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2315
102k
    val = 0;
2316
102k
    break;
2317
102k
      }
2318
1.06M
      if (val > 0x10FFFF)
2319
231k
          outofrange = val;
2320
2321
1.06M
      NEXT;
2322
1.06M
      count++;
2323
1.06M
  }
2324
302k
  if (RAW == ';') {
2325
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2326
200k
      ctxt->input->col++;
2327
200k
      ctxt->nbChars ++;
2328
200k
      ctxt->input->cur++;
2329
200k
  }
2330
442k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2331
442k
  SKIP(2);
2332
442k
  GROW;
2333
1.77M
  while (RAW != ';') { /* loop blocked by count */
2334
1.54M
      if (count++ > 20) {
2335
23.0k
    count = 0;
2336
23.0k
    GROW;
2337
23.0k
                if (ctxt->instate == XML_PARSER_EOF)
2338
0
                    return(0);
2339
23.0k
      }
2340
1.54M
      if ((RAW >= '0') && (RAW <= '9'))
2341
1.32M
          val = val * 10 + (CUR - '0');
2342
218k
      else {
2343
218k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2344
218k
    val = 0;
2345
218k
    break;
2346
218k
      }
2347
1.32M
      if (val > 0x10FFFF)
2348
270k
          outofrange = val;
2349
2350
1.32M
      NEXT;
2351
1.32M
      count++;
2352
1.32M
  }
2353
442k
  if (RAW == ';') {
2354
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2355
223k
      ctxt->input->col++;
2356
223k
      ctxt->nbChars ++;
2357
223k
      ctxt->input->cur++;
2358
223k
  }
2359
442k
    } else {
2360
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2361
0
    }
2362
2363
    /*
2364
     * [ WFC: Legal Character ]
2365
     * Characters referred to using character references must match the
2366
     * production for Char.
2367
     */
2368
744k
    if ((IS_CHAR(val) && (outofrange == 0))) {
2369
361k
        return(val);
2370
383k
    } else {
2371
383k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2372
383k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2373
383k
                    val);
2374
383k
    }
2375
383k
    return(0);
2376
744k
}
2377
2378
/**
2379
 * xmlParseStringCharRef:
2380
 * @ctxt:  an XML parser context
2381
 * @str:  a pointer to an index in the string
2382
 *
2383
 * parse Reference declarations, variant parsing from a string rather
2384
 * than an an input flow.
2385
 *
2386
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2387
 *                  '&#x' [0-9a-fA-F]+ ';'
2388
 *
2389
 * [ WFC: Legal Character ]
2390
 * Characters referred to using character references must match the
2391
 * production for Char.
2392
 *
2393
 * Returns the value parsed (as an int), 0 in case of error, str will be
2394
 *         updated to the current value of the index
2395
 */
2396
static int
2397
203k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2398
203k
    const xmlChar *ptr;
2399
203k
    xmlChar cur;
2400
203k
    unsigned int val = 0;
2401
203k
    unsigned int outofrange = 0;
2402
2403
203k
    if ((str == NULL) || (*str == NULL)) return(0);
2404
203k
    ptr = *str;
2405
203k
    cur = *ptr;
2406
203k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2407
66.6k
  ptr += 3;
2408
66.6k
  cur = *ptr;
2409
331k
  while (cur != ';') { /* Non input consuming loop */
2410
292k
      if ((cur >= '0') && (cur <= '9'))
2411
185k
          val = val * 16 + (cur - '0');
2412
106k
      else if ((cur >= 'a') && (cur <= 'f'))
2413
28.9k
          val = val * 16 + (cur - 'a') + 10;
2414
77.8k
      else if ((cur >= 'A') && (cur <= 'F'))
2415
49.5k
          val = val * 16 + (cur - 'A') + 10;
2416
28.3k
      else {
2417
28.3k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2418
28.3k
    val = 0;
2419
28.3k
    break;
2420
28.3k
      }
2421
264k
      if (val > 0x10FFFF)
2422
64.6k
          outofrange = val;
2423
2424
264k
      ptr++;
2425
264k
      cur = *ptr;
2426
264k
  }
2427
66.6k
  if (cur == ';')
2428
38.2k
      ptr++;
2429
137k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2430
137k
  ptr += 2;
2431
137k
  cur = *ptr;
2432
481k
  while (cur != ';') { /* Non input consuming loops */
2433
360k
      if ((cur >= '0') && (cur <= '9'))
2434
344k
          val = val * 10 + (cur - '0');
2435
16.0k
      else {
2436
16.0k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2437
16.0k
    val = 0;
2438
16.0k
    break;
2439
16.0k
      }
2440
344k
      if (val > 0x10FFFF)
2441
17.5k
          outofrange = val;
2442
2443
344k
      ptr++;
2444
344k
      cur = *ptr;
2445
344k
  }
2446
137k
  if (cur == ';')
2447
121k
      ptr++;
2448
137k
    } else {
2449
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2450
0
  return(0);
2451
0
    }
2452
203k
    *str = ptr;
2453
2454
    /*
2455
     * [ WFC: Legal Character ]
2456
     * Characters referred to using character references must match the
2457
     * production for Char.
2458
     */
2459
203k
    if ((IS_CHAR(val) && (outofrange == 0))) {
2460
141k
        return(val);
2461
141k
    } else {
2462
62.0k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2463
62.0k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2464
62.0k
        val);
2465
62.0k
    }
2466
62.0k
    return(0);
2467
203k
}
2468
2469
/**
2470
 * xmlParserHandlePEReference:
2471
 * @ctxt:  the parser context
2472
 *
2473
 * [69] PEReference ::= '%' Name ';'
2474
 *
2475
 * [ WFC: No Recursion ]
2476
 * A parsed entity must not contain a recursive
2477
 * reference to itself, either directly or indirectly.
2478
 *
2479
 * [ WFC: Entity Declared ]
2480
 * In a document without any DTD, a document with only an internal DTD
2481
 * subset which contains no parameter entity references, or a document
2482
 * with "standalone='yes'", ...  ... The declaration of a parameter
2483
 * entity must precede any reference to it...
2484
 *
2485
 * [ VC: Entity Declared ]
2486
 * In a document with an external subset or external parameter entities
2487
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2488
 * must precede any reference to it...
2489
 *
2490
 * [ WFC: In DTD ]
2491
 * Parameter-entity references may only appear in the DTD.
2492
 * NOTE: misleading but this is handled.
2493
 *
2494
 * A PEReference may have been detected in the current input stream
2495
 * the handling is done accordingly to
2496
 *      http://www.w3.org/TR/REC-xml#entproc
2497
 * i.e.
2498
 *   - Included in literal in entity values
2499
 *   - Included as Parameter Entity reference within DTDs
2500
 */
2501
void
2502
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2503
0
    switch(ctxt->instate) {
2504
0
  case XML_PARSER_CDATA_SECTION:
2505
0
      return;
2506
0
        case XML_PARSER_COMMENT:
2507
0
      return;
2508
0
  case XML_PARSER_START_TAG:
2509
0
      return;
2510
0
  case XML_PARSER_END_TAG:
2511
0
      return;
2512
0
        case XML_PARSER_EOF:
2513
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2514
0
      return;
2515
0
        case XML_PARSER_PROLOG:
2516
0
  case XML_PARSER_START:
2517
0
  case XML_PARSER_MISC:
2518
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2519
0
      return;
2520
0
  case XML_PARSER_ENTITY_DECL:
2521
0
        case XML_PARSER_CONTENT:
2522
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2523
0
        case XML_PARSER_PI:
2524
0
  case XML_PARSER_SYSTEM_LITERAL:
2525
0
  case XML_PARSER_PUBLIC_LITERAL:
2526
      /* we just ignore it there */
2527
0
      return;
2528
0
        case XML_PARSER_EPILOG:
2529
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2530
0
      return;
2531
0
  case XML_PARSER_ENTITY_VALUE:
2532
      /*
2533
       * NOTE: in the case of entity values, we don't do the
2534
       *       substitution here since we need the literal
2535
       *       entity value to be able to save the internal
2536
       *       subset of the document.
2537
       *       This will be handled by xmlStringDecodeEntities
2538
       */
2539
0
      return;
2540
0
        case XML_PARSER_DTD:
2541
      /*
2542
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2543
       * In the internal DTD subset, parameter-entity references
2544
       * can occur only where markup declarations can occur, not
2545
       * within markup declarations.
2546
       * In that case this is handled in xmlParseMarkupDecl
2547
       */
2548
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2549
0
    return;
2550
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2551
0
    return;
2552
0
            break;
2553
0
        case XML_PARSER_IGNORE:
2554
0
            return;
2555
0
    }
2556
2557
0
    xmlParsePEReference(ctxt);
2558
0
}
2559
2560
/*
2561
 * Macro used to grow the current buffer.
2562
 * buffer##_size is expected to be a size_t
2563
 * mem_error: is expected to handle memory allocation failures
2564
 */
2565
518k
#define growBuffer(buffer, n) {           \
2566
518k
    xmlChar *tmp;             \
2567
518k
    size_t new_size = buffer##_size * 2 + n;                            \
2568
518k
    if (new_size < buffer##_size) goto mem_error;                       \
2569
518k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2570
518k
    if (tmp == NULL) goto mem_error;         \
2571
518k
    buffer = tmp;             \
2572
518k
    buffer##_size = new_size;                                           \
2573
518k
}
2574
2575
/**
2576
 * xmlStringLenDecodeEntities:
2577
 * @ctxt:  the parser context
2578
 * @str:  the input string
2579
 * @len: the string length
2580
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2581
 * @end:  an end marker xmlChar, 0 if none
2582
 * @end2:  an end marker xmlChar, 0 if none
2583
 * @end3:  an end marker xmlChar, 0 if none
2584
 *
2585
 * Takes a entity string content and process to do the adequate substitutions.
2586
 *
2587
 * [67] Reference ::= EntityRef | CharRef
2588
 *
2589
 * [69] PEReference ::= '%' Name ';'
2590
 *
2591
 * Returns A newly allocated string with the substitution done. The caller
2592
 *      must deallocate it !
2593
 */
2594
xmlChar *
2595
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2596
771k
          int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2597
771k
    xmlChar *buffer = NULL;
2598
771k
    size_t buffer_size = 0;
2599
771k
    size_t nbchars = 0;
2600
2601
771k
    xmlChar *current = NULL;
2602
771k
    xmlChar *rep = NULL;
2603
771k
    const xmlChar *last;
2604
771k
    xmlEntityPtr ent;
2605
771k
    int c,l;
2606
2607
771k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2608
0
  return(NULL);
2609
771k
    last = str + len;
2610
2611
771k
    if (((ctxt->depth > 40) &&
2612
2.56k
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2613
768k
  (ctxt->depth > 1024)) {
2614
2.56k
  xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2615
2.56k
  return(NULL);
2616
2.56k
    }
2617
2618
    /*
2619
     * allocate a translation buffer.
2620
     */
2621
768k
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2622
768k
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2623
768k
    if (buffer == NULL) goto mem_error;
2624
2625
    /*
2626
     * OK loop until we reach one of the ending char or a size limit.
2627
     * we are operating on already parsed values.
2628
     */
2629
768k
    if (str < last)
2630
677k
  c = CUR_SCHAR(str, l);
2631
90.8k
    else
2632
90.8k
        c = 0;
2633
1.90G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2634
1.90G
     (c != end2) && (c != end3)) {
2635
2636
1.90G
  if (c == 0) break;
2637
1.90G
        if ((c == '&') && (str[1] == '#')) {
2638
203k
      int val = xmlParseStringCharRef(ctxt, &str);
2639
203k
      if (val == 0)
2640
62.0k
                goto int_error;
2641
141k
      COPY_BUF(0,buffer,nbchars,val);
2642
141k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2643
3.96k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2644
3.96k
      }
2645
1.90G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2646
1.05M
      if (xmlParserDebugEntities)
2647
0
    xmlGenericError(xmlGenericErrorContext,
2648
0
      "String decoding Entity Reference: %.30s\n",
2649
0
      str);
2650
1.05M
      ent = xmlParseStringEntityRef(ctxt, &str);
2651
1.05M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2652
1.05M
      if (ent != NULL)
2653
679k
          ctxt->nbentities += ent->checked / 2;
2654
1.05M
      if ((ent != NULL) &&
2655
679k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2656
51.9k
    if (ent->content != NULL) {
2657
51.9k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2658
51.9k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2659
2.58k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2660
2.58k
        }
2661
51.9k
    } else {
2662
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2663
0
          "predefined entity has no content\n");
2664
0
                    goto int_error;
2665
0
    }
2666
1.00M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2667
597k
    ctxt->depth++;
2668
597k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2669
597k
                            0, 0, 0);
2670
597k
    ctxt->depth--;
2671
597k
    if (rep == NULL)
2672
88.4k
                    goto int_error;
2673
2674
509k
                current = rep;
2675
535M
                while (*current != 0) { /* non input consuming loop */
2676
534M
                    buffer[nbchars++] = *current++;
2677
534M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
53.5k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2679
833
                            goto int_error;
2680
158k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2681
158k
                    }
2682
534M
                }
2683
508k
                xmlFree(rep);
2684
508k
                rep = NULL;
2685
508k
      } else if (ent != NULL) {
2686
30.3k
    int i = xmlStrlen(ent->name);
2687
30.3k
    const xmlChar *cur = ent->name;
2688
2689
30.3k
    buffer[nbchars++] = '&';
2690
30.3k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2691
954
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2692
954
    }
2693
133k
    for (;i > 0;i--)
2694
103k
        buffer[nbchars++] = *cur++;
2695
30.3k
    buffer[nbchars++] = ';';
2696
30.3k
      }
2697
1.90G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2698
0
      if (xmlParserDebugEntities)
2699
0
    xmlGenericError(xmlGenericErrorContext,
2700
0
      "String decoding PE Reference: %.30s\n", str);
2701
0
      ent = xmlParseStringPEReference(ctxt, &str);
2702
0
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2703
0
      if (ent != NULL)
2704
0
          ctxt->nbentities += ent->checked / 2;
2705
0
      if (ent != NULL) {
2706
0
                if (ent->content == NULL) {
2707
        /*
2708
         * Note: external parsed entities will not be loaded,
2709
         * it is not required for a non-validating parser to
2710
         * complete external PEreferences coming from the
2711
         * internal subset
2712
         */
2713
0
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2714
0
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2715
0
      (ctxt->validate != 0)) {
2716
0
      xmlLoadEntityContent(ctxt, ent);
2717
0
        } else {
2718
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2719
0
      "not validating will not read content for PE entity %s\n",
2720
0
                          ent->name, NULL);
2721
0
        }
2722
0
    }
2723
0
    ctxt->depth++;
2724
0
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2725
0
                            0, 0, 0);
2726
0
    ctxt->depth--;
2727
0
    if (rep == NULL)
2728
0
                    goto int_error;
2729
0
                current = rep;
2730
0
                while (*current != 0) { /* non input consuming loop */
2731
0
                    buffer[nbchars++] = *current++;
2732
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2733
0
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2734
0
                            goto int_error;
2735
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2736
0
                    }
2737
0
                }
2738
0
                xmlFree(rep);
2739
0
                rep = NULL;
2740
0
      }
2741
1.90G
  } else {
2742
1.90G
      COPY_BUF(l,buffer,nbchars,c);
2743
1.90G
      str += l;
2744
1.90G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2745
304k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2746
304k
      }
2747
1.90G
  }
2748
1.90G
  if (str < last)
2749
1.90G
      c = CUR_SCHAR(str, l);
2750
526k
  else
2751
526k
      c = 0;
2752
1.90G
    }
2753
617k
    buffer[nbchars] = 0;
2754
617k
    return(buffer);
2755
2756
0
mem_error:
2757
0
    xmlErrMemory(ctxt, NULL);
2758
151k
int_error:
2759
151k
    if (rep != NULL)
2760
833
        xmlFree(rep);
2761
151k
    if (buffer != NULL)
2762
151k
        xmlFree(buffer);
2763
151k
    return(NULL);
2764
0
}
2765
2766
/**
2767
 * xmlStringDecodeEntities:
2768
 * @ctxt:  the parser context
2769
 * @str:  the input string
2770
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2771
 * @end:  an end marker xmlChar, 0 if none
2772
 * @end2:  an end marker xmlChar, 0 if none
2773
 * @end3:  an end marker xmlChar, 0 if none
2774
 *
2775
 * Takes a entity string content and process to do the adequate substitutions.
2776
 *
2777
 * [67] Reference ::= EntityRef | CharRef
2778
 *
2779
 * [69] PEReference ::= '%' Name ';'
2780
 *
2781
 * Returns A newly allocated string with the substitution done. The caller
2782
 *      must deallocate it !
2783
 */
2784
xmlChar *
2785
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2786
771k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2787
771k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2788
771k
    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2789
771k
           end, end2, end3));
2790
771k
}
2791
2792
/************************************************************************
2793
 *                  *
2794
 *    Commodity functions, cleanup needed ?     *
2795
 *                  *
2796
 ************************************************************************/
2797
2798
/**
2799
 * areBlanks:
2800
 * @ctxt:  an XML parser context
2801
 * @str:  a xmlChar *
2802
 * @len:  the size of @str
2803
 * @blank_chars: we know the chars are blanks
2804
 *
2805
 * Is this a sequence of blank chars that one can ignore ?
2806
 *
2807
 * Returns 1 if ignorable 0 otherwise.
2808
 */
2809
2810
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2811
14.2M
                     int blank_chars) {
2812
14.2M
    int i, ret;
2813
14.2M
    xmlNodePtr lastChild;
2814
2815
    /*
2816
     * Don't spend time trying to differentiate them, the same callback is
2817
     * used !
2818
     */
2819
14.2M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2820
0
  return(0);
2821
2822
    /*
2823
     * Check for xml:space value.
2824
     */
2825
14.2M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2826
13.7M
        (*(ctxt->space) == -2))
2827
9.46M
  return(0);
2828
2829
    /*
2830
     * Check that the string is made of blanks
2831
     */
2832
4.80M
    if (blank_chars == 0) {
2833
8.41M
  for (i = 0;i < len;i++)
2834
8.01M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2835
2.17M
    }
2836
2837
    /*
2838
     * Look if the element is mixed content in the DTD if available
2839
     */
2840
3.04M
    if (ctxt->node == NULL) return(0);
2841
2.93M
    if (ctxt->myDoc != NULL) {
2842
2.93M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2843
2.93M
        if (ret == 0) return(1);
2844
2.93M
        if (ret == 1) return(0);
2845
2.93M
    }
2846
2847
    /*
2848
     * Otherwise, heuristic :-\
2849
     */
2850
2.93M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2851
2.85M
    if ((ctxt->node->children == NULL) &&
2852
866k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2853
2854
2.78M
    lastChild = xmlGetLastChild(ctxt->node);
2855
2.78M
    if (lastChild == NULL) {
2856
799k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2857
0
            (ctxt->node->content != NULL)) return(0);
2858
1.98M
    } else if (xmlNodeIsText(lastChild))
2859
160k
        return(0);
2860
1.82M
    else if ((ctxt->node->children != NULL) &&
2861
1.82M
             (xmlNodeIsText(ctxt->node->children)))
2862
43.6k
        return(0);
2863
2.58M
    return(1);
2864
2.78M
}
2865
2866
/************************************************************************
2867
 *                  *
2868
 *    Extra stuff for namespace support     *
2869
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2870
 *                  *
2871
 ************************************************************************/
2872
2873
/**
2874
 * xmlSplitQName:
2875
 * @ctxt:  an XML parser context
2876
 * @name:  an XML parser context
2877
 * @prefix:  a xmlChar **
2878
 *
2879
 * parse an UTF8 encoded XML qualified name string
2880
 *
2881
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2882
 *
2883
 * [NS 6] Prefix ::= NCName
2884
 *
2885
 * [NS 7] LocalPart ::= NCName
2886
 *
2887
 * Returns the local part, and prefix is updated
2888
 *   to get the Prefix if any.
2889
 */
2890
2891
xmlChar *
2892
202k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2893
202k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2894
202k
    xmlChar *buffer = NULL;
2895
202k
    int len = 0;
2896
202k
    int max = XML_MAX_NAMELEN;
2897
202k
    xmlChar *ret = NULL;
2898
202k
    const xmlChar *cur = name;
2899
202k
    int c;
2900
2901
202k
    if (prefix == NULL) return(NULL);
2902
202k
    *prefix = NULL;
2903
2904
202k
    if (cur == NULL) return(NULL);
2905
2906
#ifndef XML_XML_NAMESPACE
2907
    /* xml: prefix is not really a namespace */
2908
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2909
        (cur[2] == 'l') && (cur[3] == ':'))
2910
  return(xmlStrdup(name));
2911
#endif
2912
2913
    /* nasty but well=formed */
2914
202k
    if (cur[0] == ':')
2915
13.3k
  return(xmlStrdup(name));
2916
2917
189k
    c = *cur++;
2918
4.54M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2919
4.35M
  buf[len++] = c;
2920
4.35M
  c = *cur++;
2921
4.35M
    }
2922
189k
    if (len >= max) {
2923
  /*
2924
   * Okay someone managed to make a huge name, so he's ready to pay
2925
   * for the processing speed.
2926
   */
2927
16.4k
  max = len * 2;
2928
2929
16.4k
  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2930
16.4k
  if (buffer == NULL) {
2931
0
      xmlErrMemory(ctxt, NULL);
2932
0
      return(NULL);
2933
0
  }
2934
16.4k
  memcpy(buffer, buf, len);
2935
3.38M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2936
3.36M
      if (len + 10 > max) {
2937
9.60k
          xmlChar *tmp;
2938
2939
9.60k
    max *= 2;
2940
9.60k
    tmp = (xmlChar *) xmlRealloc(buffer,
2941
9.60k
            max * sizeof(xmlChar));
2942
9.60k
    if (tmp == NULL) {
2943
0
        xmlFree(buffer);
2944
0
        xmlErrMemory(ctxt, NULL);
2945
0
        return(NULL);
2946
0
    }
2947
9.60k
    buffer = tmp;
2948
9.60k
      }
2949
3.36M
      buffer[len++] = c;
2950
3.36M
      c = *cur++;
2951
3.36M
  }
2952
16.4k
  buffer[len] = 0;
2953
16.4k
    }
2954
2955
189k
    if ((c == ':') && (*cur == 0)) {
2956
19.6k
        if (buffer != NULL)
2957
2.09k
      xmlFree(buffer);
2958
19.6k
  *prefix = NULL;
2959
19.6k
  return(xmlStrdup(name));
2960
19.6k
    }
2961
2962
169k
    if (buffer == NULL)
2963
155k
  ret = xmlStrndup(buf, len);
2964
14.3k
    else {
2965
14.3k
  ret = buffer;
2966
14.3k
  buffer = NULL;
2967
14.3k
  max = XML_MAX_NAMELEN;
2968
14.3k
    }
2969
2970
2971
169k
    if (c == ':') {
2972
50.1k
  c = *cur;
2973
50.1k
        *prefix = ret;
2974
50.1k
  if (c == 0) {
2975
0
      return(xmlStrndup(BAD_CAST "", 0));
2976
0
  }
2977
50.1k
  len = 0;
2978
2979
  /*
2980
   * Check that the first character is proper to start
2981
   * a new name
2982
   */
2983
50.1k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
2984
30.6k
        ((c >= 0x41) && (c <= 0x5A)) ||
2985
26.4k
        (c == '_') || (c == ':'))) {
2986
23.3k
      int l;
2987
23.3k
      int first = CUR_SCHAR(cur, l);
2988
2989
23.3k
      if (!IS_LETTER(first) && (first != '_')) {
2990
6.88k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2991
6.88k
          "Name %s is not XML Namespace compliant\n",
2992
6.88k
          name);
2993
6.88k
      }
2994
23.3k
  }
2995
50.1k
  cur++;
2996
2997
1.82M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2998
1.77M
      buf[len++] = c;
2999
1.77M
      c = *cur++;
3000
1.77M
  }
3001
50.1k
  if (len >= max) {
3002
      /*
3003
       * Okay someone managed to make a huge name, so he's ready to pay
3004
       * for the processing speed.
3005
       */
3006
8.62k
      max = len * 2;
3007
3008
8.62k
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3009
8.62k
      if (buffer == NULL) {
3010
0
          xmlErrMemory(ctxt, NULL);
3011
0
    return(NULL);
3012
0
      }
3013
8.62k
      memcpy(buffer, buf, len);
3014
3.65M
      while (c != 0) { /* tested bigname2.xml */
3015
3.64M
    if (len + 10 > max) {
3016
7.15k
        xmlChar *tmp;
3017
3018
7.15k
        max *= 2;
3019
7.15k
        tmp = (xmlChar *) xmlRealloc(buffer,
3020
7.15k
                max * sizeof(xmlChar));
3021
7.15k
        if (tmp == NULL) {
3022
0
      xmlErrMemory(ctxt, NULL);
3023
0
      xmlFree(buffer);
3024
0
      return(NULL);
3025
0
        }
3026
7.15k
        buffer = tmp;
3027
7.15k
    }
3028
3.64M
    buffer[len++] = c;
3029
3.64M
    c = *cur++;
3030
3.64M
      }
3031
8.62k
      buffer[len] = 0;
3032
8.62k
  }
3033
3034
50.1k
  if (buffer == NULL)
3035
41.5k
      ret = xmlStrndup(buf, len);
3036
8.62k
  else {
3037
8.62k
      ret = buffer;
3038
8.62k
  }
3039
50.1k
    }
3040
3041
169k
    return(ret);
3042
169k
}
3043
3044
/************************************************************************
3045
 *                  *
3046
 *      The parser itself       *
3047
 *  Relates to http://www.w3.org/TR/REC-xml       *
3048
 *                  *
3049
 ************************************************************************/
3050
3051
/************************************************************************
3052
 *                  *
3053
 *  Routines to parse Name, NCName and NmToken      *
3054
 *                  *
3055
 ************************************************************************/
3056
#ifdef DEBUG
3057
static unsigned long nbParseName = 0;
3058
static unsigned long nbParseNmToken = 0;
3059
static unsigned long nbParseNCName = 0;
3060
static unsigned long nbParseNCNameComplex = 0;
3061
static unsigned long nbParseNameComplex = 0;
3062
static unsigned long nbParseStringName = 0;
3063
#endif
3064
3065
/*
3066
 * The two following functions are related to the change of accepted
3067
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3068
 * They correspond to the modified production [4] and the new production [4a]
3069
 * changes in that revision. Also note that the macros used for the
3070
 * productions Letter, Digit, CombiningChar and Extender are not needed
3071
 * anymore.
3072
 * We still keep compatibility to pre-revision5 parsing semantic if the
3073
 * new XML_PARSE_OLD10 option is given to the parser.
3074
 */
3075
static int
3076
5.91M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3077
5.91M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3078
        /*
3079
   * Use the new checks of production [4] [4a] amd [5] of the
3080
   * Update 5 of XML-1.0
3081
   */
3082
5.91M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3083
5.90M
      (((c >= 'a') && (c <= 'z')) ||
3084
4.31M
       ((c >= 'A') && (c <= 'Z')) ||
3085
3.79M
       (c == '_') || (c == ':') ||
3086
3.42M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3087
3.32M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3088
3.24M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3089
3.06M
       ((c >= 0x370) && (c <= 0x37D)) ||
3090
3.06M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3091
2.90M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3092
2.90M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3093
2.90M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3094
2.90M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3095
2.89M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3096
2.89M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3097
2.88M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3098
3.02M
      return(1);
3099
5.91M
    } else {
3100
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3101
0
      return(1);
3102
0
    }
3103
2.89M
    return(0);
3104
5.91M
}
3105
3106
static int
3107
276M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3108
276M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3109
        /*
3110
   * Use the new checks of production [4] [4a] amd [5] of the
3111
   * Update 5 of XML-1.0
3112
   */
3113
276M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3114
275M
      (((c >= 'a') && (c <= 'z')) ||
3115
239M
       ((c >= 'A') && (c <= 'Z')) ||
3116
236M
       ((c >= '0') && (c <= '9')) || /* !start */
3117
234M
       (c == '_') || (c == ':') ||
3118
233M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3119
233M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3120
232M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3121
230M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3122
226M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3123
226M
       ((c >= 0x370) && (c <= 0x37D)) ||
3124
226M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3125
2.31M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3126
2.31M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3127
2.31M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3128
2.30M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3129
2.30M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3130
2.23M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3131
2.22M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3132
2.21M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3133
273M
       return(1);
3134
276M
    } else {
3135
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3136
0
            (c == '.') || (c == '-') ||
3137
0
      (c == '_') || (c == ':') ||
3138
0
      (IS_COMBINING(c)) ||
3139
0
      (IS_EXTENDER(c)))
3140
0
      return(1);
3141
0
    }
3142
2.27M
    return(0);
3143
276M
}
3144
3145
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3146
                                          int *len, int *alloc, int normalize);
3147
3148
static const xmlChar *
3149
3.00M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3150
3.00M
    int len = 0, l;
3151
3.00M
    int c;
3152
3.00M
    int count = 0;
3153
3154
#ifdef DEBUG
3155
    nbParseNameComplex++;
3156
#endif
3157
3158
    /*
3159
     * Handler for more complex cases
3160
     */
3161
3.00M
    GROW;
3162
3.00M
    if (ctxt->instate == XML_PARSER_EOF)
3163
3
        return(NULL);
3164
3.00M
    c = CUR_CHAR(l);
3165
3.00M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3166
        /*
3167
   * Use the new checks of production [4] [4a] amd [5] of the
3168
   * Update 5 of XML-1.0
3169
   */
3170
3.00M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3171
2.81M
      (!(((c >= 'a') && (c <= 'z')) ||
3172
2.47M
         ((c >= 'A') && (c <= 'Z')) ||
3173
1.93M
         (c == '_') || (c == ':') ||
3174
1.88M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3175
1.81M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3176
1.72M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3177
1.56M
         ((c >= 0x370) && (c <= 0x37D)) ||
3178
1.56M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179
1.49M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3180
1.49M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3181
1.49M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3182
1.49M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3183
1.48M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3184
1.48M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3185
1.65M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3186
1.65M
      return(NULL);
3187
1.65M
  }
3188
1.35M
  len += l;
3189
1.35M
  NEXTL(l);
3190
1.35M
  c = CUR_CHAR(l);
3191
97.1M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3192
96.8M
         (((c >= 'a') && (c <= 'z')) ||
3193
74.1M
          ((c >= 'A') && (c <= 'Z')) ||
3194
68.0M
          ((c >= '0') && (c <= '9')) || /* !start */
3195
67.1M
          (c == '_') || (c == ':') ||
3196
65.5M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3197
65.1M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3198
63.3M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3199
62.2M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3200
54.2M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3201
54.2M
          ((c >= 0x370) && (c <= 0x37D)) ||
3202
54.2M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3203
1.25M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3204
1.25M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3205
1.24M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3206
1.24M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3207
1.23M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3208
1.13M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3209
1.12M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3210
1.11M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3211
96.8M
    )) {
3212
95.7M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3213
737k
    count = 0;
3214
737k
    GROW;
3215
737k
                if (ctxt->instate == XML_PARSER_EOF)
3216
0
                    return(NULL);
3217
737k
      }
3218
95.7M
      len += l;
3219
95.7M
      NEXTL(l);
3220
95.7M
      c = CUR_CHAR(l);
3221
95.7M
  }
3222
1.35M
    } else {
3223
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3224
0
      (!IS_LETTER(c) && (c != '_') &&
3225
0
       (c != ':'))) {
3226
0
      return(NULL);
3227
0
  }
3228
0
  len += l;
3229
0
  NEXTL(l);
3230
0
  c = CUR_CHAR(l);
3231
3232
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3233
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3234
0
    (c == '.') || (c == '-') ||
3235
0
    (c == '_') || (c == ':') ||
3236
0
    (IS_COMBINING(c)) ||
3237
0
    (IS_EXTENDER(c)))) {
3238
0
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3239
0
    count = 0;
3240
0
    GROW;
3241
0
                if (ctxt->instate == XML_PARSER_EOF)
3242
0
                    return(NULL);
3243
0
      }
3244
0
      len += l;
3245
0
      NEXTL(l);
3246
0
      c = CUR_CHAR(l);
3247
0
  }
3248
0
    }
3249
1.35M
    if ((len > XML_MAX_NAME_LENGTH) &&
3250
1.57k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3251
1.57k
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3252
1.57k
        return(NULL);
3253
1.57k
    }
3254
1.34M
    if (ctxt->input->cur - ctxt->input->base < len) {
3255
        /*
3256
         * There were a couple of bugs where PERefs lead to to a change
3257
         * of the buffer. Check the buffer size to avoid passing an invalid
3258
         * pointer to xmlDictLookup.
3259
         */
3260
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3261
0
                    "unexpected change of input buffer");
3262
0
        return (NULL);
3263
0
    }
3264
1.34M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3265
4.84k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3266
1.34M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3267
1.34M
}
3268
3269
/**
3270
 * xmlParseName:
3271
 * @ctxt:  an XML parser context
3272
 *
3273
 * parse an XML name.
3274
 *
3275
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3276
 *                  CombiningChar | Extender
3277
 *
3278
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3279
 *
3280
 * [6] Names ::= Name (#x20 Name)*
3281
 *
3282
 * Returns the Name parsed or NULL
3283
 */
3284
3285
const xmlChar *
3286
8.39M
xmlParseName(xmlParserCtxtPtr ctxt) {
3287
8.39M
    const xmlChar *in;
3288
8.39M
    const xmlChar *ret;
3289
8.39M
    int count = 0;
3290
3291
8.39M
    GROW;
3292
3293
#ifdef DEBUG
3294
    nbParseName++;
3295
#endif
3296
3297
    /*
3298
     * Accelerator for simple ASCII names
3299
     */
3300
8.39M
    in = ctxt->input->cur;
3301
8.39M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3302
4.99M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3303
6.32M
  (*in == '_') || (*in == ':')) {
3304
6.32M
  in++;
3305
86.6M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3306
15.4M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3307
9.92M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3308
8.37M
         (*in == '_') || (*in == '-') ||
3309
7.25M
         (*in == ':') || (*in == '.'))
3310
80.3M
      in++;
3311
6.32M
  if ((*in > 0) && (*in < 0x80)) {
3312
5.39M
      count = in - ctxt->input->cur;
3313
5.39M
            if ((count > XML_MAX_NAME_LENGTH) &&
3314
687
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3315
687
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316
687
                return(NULL);
3317
687
            }
3318
5.39M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3319
5.39M
      ctxt->input->cur = in;
3320
5.39M
      ctxt->nbChars += count;
3321
5.39M
      ctxt->input->col += count;
3322
5.39M
      if (ret == NULL)
3323
0
          xmlErrMemory(ctxt, NULL);
3324
5.39M
      return(ret);
3325
5.39M
  }
3326
6.32M
    }
3327
    /* accelerator for special cases */
3328
3.00M
    return(xmlParseNameComplex(ctxt));
3329
8.39M
}
3330
3331
static const xmlChar *
3332
4.90M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3333
4.90M
    int len = 0, l;
3334
4.90M
    int c;
3335
4.90M
    int count = 0;
3336
4.90M
    size_t startPosition = 0;
3337
3338
#ifdef DEBUG
3339
    nbParseNCNameComplex++;
3340
#endif
3341
3342
    /*
3343
     * Handler for more complex cases
3344
     */
3345
4.90M
    GROW;
3346
4.90M
    startPosition = CUR_PTR - BASE_PTR;
3347
4.90M
    c = CUR_CHAR(l);
3348
4.90M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349
4.63M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3350
3.47M
  return(NULL);
3351
3.47M
    }
3352
3353
170M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3354
170M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3355
169M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3356
1.52M
            if ((len > XML_MAX_NAME_LENGTH) &&
3357
2.97k
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3358
2.97k
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3359
2.97k
                return(NULL);
3360
2.97k
            }
3361
1.51M
      count = 0;
3362
1.51M
      GROW;
3363
1.51M
            if (ctxt->instate == XML_PARSER_EOF)
3364
0
                return(NULL);
3365
1.51M
  }
3366
169M
  len += l;
3367
169M
  NEXTL(l);
3368
169M
  c = CUR_CHAR(l);
3369
169M
  if (c == 0) {
3370
25.7k
      count = 0;
3371
      /*
3372
       * when shrinking to extend the buffer we really need to preserve
3373
       * the part of the name we already parsed. Hence rolling back
3374
       * by current lenght.
3375
       */
3376
25.7k
      ctxt->input->cur -= l;
3377
25.7k
      GROW;
3378
25.7k
      ctxt->input->cur += l;
3379
25.7k
            if (ctxt->instate == XML_PARSER_EOF)
3380
0
                return(NULL);
3381
25.7k
      c = CUR_CHAR(l);
3382
25.7k
  }
3383
169M
    }
3384
1.42M
    if ((len > XML_MAX_NAME_LENGTH) &&
3385
1.33k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386
1.33k
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3387
1.33k
        return(NULL);
3388
1.33k
    }
3389
1.42M
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3390
1.42M
}
3391
3392
/**
3393
 * xmlParseNCName:
3394
 * @ctxt:  an XML parser context
3395
 * @len:  length of the string parsed
3396
 *
3397
 * parse an XML name.
3398
 *
3399
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3400
 *                      CombiningChar | Extender
3401
 *
3402
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3403
 *
3404
 * Returns the Name parsed or NULL
3405
 */
3406
3407
static const xmlChar *
3408
31.6M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3409
31.6M
    const xmlChar *in, *e;
3410
31.6M
    const xmlChar *ret;
3411
31.6M
    int count = 0;
3412
3413
#ifdef DEBUG
3414
    nbParseNCName++;
3415
#endif
3416
3417
    /*
3418
     * Accelerator for simple ASCII names
3419
     */
3420
31.6M
    in = ctxt->input->cur;
3421
31.6M
    e = ctxt->input->end;
3422
31.6M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3423
20.0M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3424
27.6M
   (*in == '_')) && (in < e)) {
3425
27.6M
  in++;
3426
172M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3427
44.0M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3428
32.0M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3429
28.9M
          (*in == '_') || (*in == '-') ||
3430
144M
          (*in == '.')) && (in < e))
3431
144M
      in++;
3432
27.6M
  if (in >= e)
3433
18.7k
      goto complex;
3434
27.6M
  if ((*in > 0) && (*in < 0x80)) {
3435
26.7M
      count = in - ctxt->input->cur;
3436
26.7M
            if ((count > XML_MAX_NAME_LENGTH) &&
3437
560
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3438
560
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3439
560
                return(NULL);
3440
560
            }
3441
26.7M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3442
26.7M
      ctxt->input->cur = in;
3443
26.7M
      ctxt->nbChars += count;
3444
26.7M
      ctxt->input->col += count;
3445
26.7M
      if (ret == NULL) {
3446
0
          xmlErrMemory(ctxt, NULL);
3447
0
      }
3448
26.7M
      return(ret);
3449
26.7M
  }
3450
27.6M
    }
3451
4.90M
complex:
3452
4.90M
    return(xmlParseNCNameComplex(ctxt));
3453
31.6M
}
3454
3455
/**
3456
 * xmlParseNameAndCompare:
3457
 * @ctxt:  an XML parser context
3458
 *
3459
 * parse an XML name and compares for match
3460
 * (specialized for endtag parsing)
3461
 *
3462
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3463
 * and the name for mismatch
3464
 */
3465
3466
static const xmlChar *
3467
7.14M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3468
7.14M
    register const xmlChar *cmp = other;
3469
7.14M
    register const xmlChar *in;
3470
7.14M
    const xmlChar *ret;
3471
3472
7.14M
    GROW;
3473
7.14M
    if (ctxt->instate == XML_PARSER_EOF)
3474
0
        return(NULL);
3475
3476
7.14M
    in = ctxt->input->cur;
3477
40.3M
    while (*in != 0 && *in == *cmp) {
3478
33.2M
  ++in;
3479
33.2M
  ++cmp;
3480
33.2M
  ctxt->input->col++;
3481
33.2M
    }
3482
7.14M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3483
  /* success */
3484
4.88M
  ctxt->input->cur = in;
3485
4.88M
  return (const xmlChar*) 1;
3486
4.88M
    }
3487
    /* failure (or end of input buffer), check with full function */
3488
2.26M
    ret = xmlParseName (ctxt);
3489
    /* strings coming from the dictionary direct compare possible */
3490
2.26M
    if (ret == other) {
3491
28.5k
  return (const xmlChar*) 1;
3492
28.5k
    }
3493
2.23M
    return ret;
3494
2.26M
}
3495
3496
/**
3497
 * xmlParseStringName:
3498
 * @ctxt:  an XML parser context
3499
 * @str:  a pointer to the string pointer (IN/OUT)
3500
 *
3501
 * parse an XML name.
3502
 *
3503
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3504
 *                  CombiningChar | Extender
3505
 *
3506
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3507
 *
3508
 * [6] Names ::= Name (#x20 Name)*
3509
 *
3510
 * Returns the Name parsed or NULL. The @str pointer
3511
 * is updated to the current location in the string.
3512
 */
3513
3514
static xmlChar *
3515
1.28M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3516
1.28M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3517
1.28M
    const xmlChar *cur = *str;
3518
1.28M
    int len = 0, l;
3519
1.28M
    int c;
3520
3521
#ifdef DEBUG
3522
    nbParseStringName++;
3523
#endif
3524
3525
1.28M
    c = CUR_SCHAR(cur, l);
3526
1.28M
    if (!xmlIsNameStartChar(ctxt, c)) {
3527
23.8k
  return(NULL);
3528
23.8k
    }
3529
3530
1.25M
    COPY_BUF(l,buf,len,c);
3531
1.25M
    cur += l;
3532
1.25M
    c = CUR_SCHAR(cur, l);
3533
7.48M
    while (xmlIsNameChar(ctxt, c)) {
3534
6.24M
  COPY_BUF(l,buf,len,c);
3535
6.24M
  cur += l;
3536
6.24M
  c = CUR_SCHAR(cur, l);
3537
6.24M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3538
      /*
3539
       * Okay someone managed to make a huge name, so he's ready to pay
3540
       * for the processing speed.
3541
       */
3542
12.7k
      xmlChar *buffer;
3543
12.7k
      int max = len * 2;
3544
3545
12.7k
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3546
12.7k
      if (buffer == NULL) {
3547
0
          xmlErrMemory(ctxt, NULL);
3548
0
    return(NULL);
3549
0
      }
3550
12.7k
      memcpy(buffer, buf, len);
3551
53.8M
      while (xmlIsNameChar(ctxt, c)) {
3552
53.8M
    if (len + 10 > max) {
3553
44.8k
        xmlChar *tmp;
3554
3555
44.8k
                    if ((len > XML_MAX_NAME_LENGTH) &&
3556
1.95k
                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3557
1.95k
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3558
1.95k
      xmlFree(buffer);
3559
1.95k
                        return(NULL);
3560
1.95k
                    }
3561
42.8k
        max *= 2;
3562
42.8k
        tmp = (xmlChar *) xmlRealloc(buffer,
3563
42.8k
                                  max * sizeof(xmlChar));
3564
42.8k
        if (tmp == NULL) {
3565
0
      xmlErrMemory(ctxt, NULL);
3566
0
      xmlFree(buffer);
3567
0
      return(NULL);
3568
0
        }
3569
42.8k
        buffer = tmp;
3570
42.8k
    }
3571
53.8M
    COPY_BUF(l,buffer,len,c);
3572
53.8M
    cur += l;
3573
53.8M
    c = CUR_SCHAR(cur, l);
3574
53.8M
      }
3575
10.7k
      buffer[len] = 0;
3576
10.7k
      *str = cur;
3577
10.7k
      return(buffer);
3578
12.7k
  }
3579
6.24M
    }
3580
1.24M
    if ((len > XML_MAX_NAME_LENGTH) &&
3581
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3582
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3583
0
        return(NULL);
3584
0
    }
3585
1.24M
    *str = cur;
3586
1.24M
    return(xmlStrndup(buf, len));
3587
1.24M
}
3588
3589
/**
3590
 * xmlParseNmtoken:
3591
 * @ctxt:  an XML parser context
3592
 *
3593
 * parse an XML Nmtoken.
3594
 *
3595
 * [7] Nmtoken ::= (NameChar)+
3596
 *
3597
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3598
 *
3599
 * Returns the Nmtoken parsed or NULL
3600
 */
3601
3602
xmlChar *
3603
222k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3604
222k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3605
222k
    int len = 0, l;
3606
222k
    int c;
3607
222k
    int count = 0;
3608
3609
#ifdef DEBUG
3610
    nbParseNmToken++;
3611
#endif
3612
3613
222k
    GROW;
3614
222k
    if (ctxt->instate == XML_PARSER_EOF)
3615
0
        return(NULL);
3616
222k
    c = CUR_CHAR(l);
3617
3618
1.37M
    while (xmlIsNameChar(ctxt, c)) {
3619
1.16M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3620
0
      count = 0;
3621
0
      GROW;
3622
0
  }
3623
1.16M
  COPY_BUF(l,buf,len,c);
3624
1.16M
  NEXTL(l);
3625
1.16M
  c = CUR_CHAR(l);
3626
1.16M
  if (c == 0) {
3627
1.41k
      count = 0;
3628
1.41k
      GROW;
3629
1.41k
      if (ctxt->instate == XML_PARSER_EOF)
3630
0
    return(NULL);
3631
1.41k
            c = CUR_CHAR(l);
3632
1.41k
  }
3633
1.16M
  if (len >= XML_MAX_NAMELEN) {
3634
      /*
3635
       * Okay someone managed to make a huge token, so he's ready to pay
3636
       * for the processing speed.
3637
       */
3638
8.20k
      xmlChar *buffer;
3639
8.20k
      int max = len * 2;
3640
3641
8.20k
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3642
8.20k
      if (buffer == NULL) {
3643
0
          xmlErrMemory(ctxt, NULL);
3644
0
    return(NULL);
3645
0
      }
3646
8.20k
      memcpy(buffer, buf, len);
3647
42.8M
      while (xmlIsNameChar(ctxt, c)) {
3648
42.8M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3649
420k
        count = 0;
3650
420k
        GROW;
3651
420k
                    if (ctxt->instate == XML_PARSER_EOF) {
3652
0
                        xmlFree(buffer);
3653
0
                        return(NULL);
3654
0
                    }
3655
420k
    }
3656
42.8M
    if (len + 10 > max) {
3657
23.2k
        xmlChar *tmp;
3658
3659
23.2k
                    if ((max > XML_MAX_NAME_LENGTH) &&
3660
1.49k
                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3661
1.49k
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3662
1.49k
                        xmlFree(buffer);
3663
1.49k
                        return(NULL);
3664
1.49k
                    }
3665
21.7k
        max *= 2;
3666
21.7k
        tmp = (xmlChar *) xmlRealloc(buffer,
3667
21.7k
                                  max * sizeof(xmlChar));
3668
21.7k
        if (tmp == NULL) {
3669
0
      xmlErrMemory(ctxt, NULL);
3670
0
      xmlFree(buffer);
3671
0
      return(NULL);
3672
0
        }
3673
21.7k
        buffer = tmp;
3674
21.7k
    }
3675
42.8M
    COPY_BUF(l,buffer,len,c);
3676
42.8M
    NEXTL(l);
3677
42.8M
    c = CUR_CHAR(l);
3678
42.8M
      }
3679
6.70k
      buffer[len] = 0;
3680
6.70k
      return(buffer);
3681
8.20k
  }
3682
1.16M
    }
3683
214k
    if (len == 0)
3684
77.1k
        return(NULL);
3685
137k
    if ((len > XML_MAX_NAME_LENGTH) &&
3686
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3687
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3688
0
        return(NULL);
3689
0
    }
3690
137k
    return(xmlStrndup(buf, len));
3691
137k
}
3692
3693
/**
3694
 * xmlParseEntityValue:
3695
 * @ctxt:  an XML parser context
3696
 * @orig:  if non-NULL store a copy of the original entity value
3697
 *
3698
 * parse a value for ENTITY declarations
3699
 *
3700
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3701
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3702
 *
3703
 * Returns the EntityValue parsed with reference substituted or NULL
3704
 */
3705
3706
xmlChar *
3707
171k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3708
171k
    xmlChar *buf = NULL;
3709
171k
    int len = 0;
3710
171k
    int size = XML_PARSER_BUFFER_SIZE;
3711
171k
    int c, l;
3712
171k
    xmlChar stop;
3713
171k
    xmlChar *ret = NULL;
3714
171k
    const xmlChar *cur = NULL;
3715
171k
    xmlParserInputPtr input;
3716
3717
171k
    if (RAW == '"') stop = '"';
3718
128k
    else if (RAW == '\'') stop = '\'';
3719
0
    else {
3720
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3721
0
  return(NULL);
3722
0
    }
3723
171k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3724
171k
    if (buf == NULL) {
3725
0
  xmlErrMemory(ctxt, NULL);
3726
0
  return(NULL);
3727
0
    }
3728
3729
    /*
3730
     * The content of the entity definition is copied in a buffer.
3731
     */
3732
3733
171k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3734
171k
    input = ctxt->input;
3735
171k
    GROW;
3736
171k
    if (ctxt->instate == XML_PARSER_EOF)
3737
0
        goto error;
3738
171k
    NEXT;
3739
171k
    c = CUR_CHAR(l);
3740
    /*
3741
     * NOTE: 4.4.5 Included in Literal
3742
     * When a parameter entity reference appears in a literal entity
3743
     * value, ... a single or double quote character in the replacement
3744
     * text is always treated as a normal data character and will not
3745
     * terminate the literal.
3746
     * In practice it means we stop the loop only when back at parsing
3747
     * the initial entity and the quote is found
3748
     */
3749
1.58G
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3750
1.58G
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3751
1.58G
  if (len + 5 >= size) {
3752
111k
      xmlChar *tmp;
3753
3754
111k
      size *= 2;
3755
111k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3756
111k
      if (tmp == NULL) {
3757
0
    xmlErrMemory(ctxt, NULL);
3758
0
                goto error;
3759
0
      }
3760
111k
      buf = tmp;
3761
111k
  }
3762
1.58G
  COPY_BUF(l,buf,len,c);
3763
1.58G
  NEXTL(l);
3764
3765
1.58G
  GROW;
3766
1.58G
  c = CUR_CHAR(l);
3767
1.58G
  if (c == 0) {
3768
255
      GROW;
3769
255
      c = CUR_CHAR(l);
3770
255
  }
3771
1.58G
    }
3772
171k
    buf[len] = 0;
3773
171k
    if (ctxt->instate == XML_PARSER_EOF)
3774
6
        goto error;
3775
171k
    if (c != stop) {
3776
470
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3777
470
        goto error;
3778
470
    }
3779
170k
    NEXT;
3780
3781
    /*
3782
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3783
     * reference constructs. Note Charref will be handled in
3784
     * xmlStringDecodeEntities()
3785
     */
3786
170k
    cur = buf;
3787
4.51G
    while (*cur != 0) { /* non input consuming */
3788
4.51G
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3789
226k
      xmlChar *name;
3790
226k
      xmlChar tmp = *cur;
3791
226k
            int nameOk = 0;
3792
3793
226k
      cur++;
3794
226k
      name = xmlParseStringName(ctxt, &cur);
3795
226k
            if (name != NULL) {
3796
216k
                nameOk = 1;
3797
216k
                xmlFree(name);
3798
216k
            }
3799
226k
            if ((nameOk == 0) || (*cur != ';')) {
3800
17.9k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3801
17.9k
      "EntityValue: '%c' forbidden except for entities references\n",
3802
17.9k
                            tmp);
3803
17.9k
                goto error;
3804
17.9k
      }
3805
208k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3806
2.06k
    (ctxt->inputNr == 1)) {
3807
2.06k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3808
2.06k
                goto error;
3809
2.06k
      }
3810
206k
      if (*cur == 0)
3811
0
          break;
3812
206k
  }
3813
4.51G
  cur++;
3814
4.51G
    }
3815
3816
    /*
3817
     * Then PEReference entities are substituted.
3818
     *
3819
     * NOTE: 4.4.7 Bypassed
3820
     * When a general entity reference appears in the EntityValue in
3821
     * an entity declaration, it is bypassed and left as is.
3822
     * so XML_SUBSTITUTE_REF is not set here.
3823
     */
3824
150k
    ++ctxt->depth;
3825
150k
    ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3826
150k
                                  0, 0, 0);
3827
150k
    --ctxt->depth;
3828
150k
    if (orig != NULL) {
3829
150k
        *orig = buf;
3830
150k
        buf = NULL;
3831
150k
    }
3832
3833
171k
error:
3834
171k
    if (buf != NULL)
3835
20.4k
        xmlFree(buf);
3836
171k
    return(ret);
3837
150k
}
3838
3839
/**
3840
 * xmlParseAttValueComplex:
3841
 * @ctxt:  an XML parser context
3842
 * @len:   the resulting attribute len
3843
 * @normalize:  wether to apply the inner normalization
3844
 *
3845
 * parse a value for an attribute, this is the fallback function
3846
 * of xmlParseAttValue() when the attribute parsing requires handling
3847
 * of non-ASCII characters, or normalization compaction.
3848
 *
3849
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3850
 */
3851
static xmlChar *
3852
1.22M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3853
1.22M
    xmlChar limit = 0;
3854
1.22M
    xmlChar *buf = NULL;
3855
1.22M
    xmlChar *rep = NULL;
3856
1.22M
    size_t len = 0;
3857
1.22M
    size_t buf_size = 0;
3858
1.22M
    int c, l, in_space = 0;
3859
1.22M
    xmlChar *current = NULL;
3860
1.22M
    xmlEntityPtr ent;
3861
3862
1.22M
    if (NXT(0) == '"') {
3863
722k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3864
722k
  limit = '"';
3865
722k
        NEXT;
3866
722k
    } else if (NXT(0) == '\'') {
3867
504k
  limit = '\'';
3868
504k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3869
504k
        NEXT;
3870
504k
    } else {
3871
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3872
0
  return(NULL);
3873
0
    }
3874
3875
    /*
3876
     * allocate a translation buffer.
3877
     */
3878
1.22M
    buf_size = XML_PARSER_BUFFER_SIZE;
3879
1.22M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3880
1.22M
    if (buf == NULL) goto mem_error;
3881
3882
    /*
3883
     * OK loop until we reach one of the ending char or a size limit.
3884
     */
3885
1.22M
    c = CUR_CHAR(l);
3886
1.34G
    while (((NXT(0) != limit) && /* checked */
3887
1.34G
            (IS_CHAR(c)) && (c != '<')) &&
3888
1.34G
            (ctxt->instate != XML_PARSER_EOF)) {
3889
        /*
3890
         * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3891
         * special option is given
3892
         */
3893
1.34G
        if ((len > XML_MAX_TEXT_LENGTH) &&
3894
6
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3895
6
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3896
6
                           "AttValue length too long\n");
3897
6
            goto mem_error;
3898
6
        }
3899
1.34G
  if (c == 0) break;
3900
1.34G
  if (c == '&') {
3901
1.54M
      in_space = 0;
3902
1.54M
      if (NXT(1) == '#') {
3903
375k
    int val = xmlParseCharRef(ctxt);
3904
3905
375k
    if (val == '&') {
3906
19.2k
        if (ctxt->replaceEntities) {
3907
0
      if (len + 10 > buf_size) {
3908
0
          growBuffer(buf, 10);
3909
0
      }
3910
0
      buf[len++] = '&';
3911
19.2k
        } else {
3912
      /*
3913
       * The reparsing will be done in xmlStringGetNodeList()
3914
       * called by the attribute() function in SAX.c
3915
       */
3916
19.2k
      if (len + 10 > buf_size) {
3917
2.45k
          growBuffer(buf, 10);
3918
2.45k
      }
3919
19.2k
      buf[len++] = '&';
3920
19.2k
      buf[len++] = '#';
3921
19.2k
      buf[len++] = '3';
3922
19.2k
      buf[len++] = '8';
3923
19.2k
      buf[len++] = ';';
3924
19.2k
        }
3925
356k
    } else if (val != 0) {
3926
209k
        if (len + 10 > buf_size) {
3927
4.21k
      growBuffer(buf, 10);
3928
4.21k
        }
3929
209k
        len += xmlCopyChar(0, &buf[len], val);
3930
209k
    }
3931
1.17M
      } else {
3932
1.17M
    ent = xmlParseEntityRef(ctxt);
3933
1.17M
    ctxt->nbentities++;
3934
1.17M
    if (ent != NULL)
3935
406k
        ctxt->nbentities += ent->owner;
3936
1.17M
    if ((ent != NULL) &&
3937
406k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3938
171k
        if (len + 10 > buf_size) {
3939
3.97k
      growBuffer(buf, 10);
3940
3.97k
        }
3941
171k
        if ((ctxt->replaceEntities == 0) &&
3942
171k
            (ent->content[0] == '&')) {
3943
121k
      buf[len++] = '&';
3944
121k
      buf[len++] = '#';
3945
121k
      buf[len++] = '3';
3946
121k
      buf[len++] = '8';
3947
121k
      buf[len++] = ';';
3948
121k
        } else {
3949
50.7k
      buf[len++] = ent->content[0];
3950
50.7k
        }
3951
1.00M
    } else if ((ent != NULL) &&
3952
234k
               (ctxt->replaceEntities != 0)) {
3953
0
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3954
0
      ++ctxt->depth;
3955
0
      rep = xmlStringDecodeEntities(ctxt, ent->content,
3956
0
                  XML_SUBSTITUTE_REF,
3957
0
                  0, 0, 0);
3958
0
      --ctxt->depth;
3959
0
      if (rep != NULL) {
3960
0
          current = rep;
3961
0
          while (*current != 0) { /* non input consuming */
3962
0
                                if ((*current == 0xD) || (*current == 0xA) ||
3963
0
                                    (*current == 0x9)) {
3964
0
                                    buf[len++] = 0x20;
3965
0
                                    current++;
3966
0
                                } else
3967
0
                                    buf[len++] = *current++;
3968
0
        if (len + 10 > buf_size) {
3969
0
            growBuffer(buf, 10);
3970
0
        }
3971
0
          }
3972
0
          xmlFree(rep);
3973
0
          rep = NULL;
3974
0
      }
3975
0
        } else {
3976
0
      if (len + 10 > buf_size) {
3977
0
          growBuffer(buf, 10);
3978
0
      }
3979
0
      if (ent->content != NULL)
3980
0
          buf[len++] = ent->content[0];
3981
0
        }
3982
1.00M
    } else if (ent != NULL) {
3983
234k
        int i = xmlStrlen(ent->name);
3984
234k
        const xmlChar *cur = ent->name;
3985
3986
        /*
3987
         * This may look absurd but is needed to detect
3988
         * entities problems
3989
         */
3990
234k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3991
234k
      (ent->content != NULL) && (ent->checked == 0)) {
3992
4.71k
      unsigned long oldnbent = ctxt->nbentities;
3993
3994
4.71k
      ++ctxt->depth;
3995
4.71k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
3996
4.71k
              XML_SUBSTITUTE_REF, 0, 0, 0);
3997
4.71k
      --ctxt->depth;
3998
3999
4.71k
      ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4000
4.71k
      if (rep != NULL) {
4001
4.04k
          if (xmlStrchr(rep, '<'))
4002
1.43k
              ent->checked |= 1;
4003
4.04k
          xmlFree(rep);
4004
4.04k
          rep = NULL;
4005
4.04k
      } else {
4006
671
                            ent->content[0] = 0;
4007
671
                        }
4008
4.71k
        }
4009
4010
        /*
4011
         * Just output the reference
4012
         */
4013
234k
        buf[len++] = '&';
4014
238k
        while (len + i + 10 > buf_size) {
4015
8.12k
      growBuffer(buf, i + 10);
4016
8.12k
        }
4017
484k
        for (;i > 0;i--)
4018
250k
      buf[len++] = *cur++;
4019
234k
        buf[len++] = ';';
4020
234k
    }
4021
1.17M
      }
4022
1.34G
  } else {
4023
1.34G
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4024
25.4M
          if ((len != 0) || (!normalize)) {
4025
25.1M
        if ((!normalize) || (!in_space)) {
4026
24.5M
      COPY_BUF(l,buf,len,0x20);
4027
24.6M
      while (len + 10 > buf_size) {
4028
84.4k
          growBuffer(buf, 10);
4029
84.4k
      }
4030
24.5M
        }
4031
25.1M
        in_space = 1;
4032
25.1M
    }
4033
1.31G
      } else {
4034
1.31G
          in_space = 0;
4035
1.31G
    COPY_BUF(l,buf,len,c);
4036
1.31G
    if (len + 10 > buf_size) {
4037
515k
        growBuffer(buf, 10);
4038
515k
    }
4039
1.31G
      }
4040
1.34G
      NEXTL(l);
4041
1.34G
  }
4042
1.34G
  GROW;
4043
1.34G
  c = CUR_CHAR(l);
4044
1.34G
    }
4045
1.22M
    if (ctxt->instate == XML_PARSER_EOF)
4046
6
        goto error;
4047
4048
1.22M
    if ((in_space) && (normalize)) {
4049
36.0k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4050
14.1k
    }
4051
1.22M
    buf[len] = 0;
4052
1.22M
    if (RAW == '<') {
4053
609k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4054
617k
    } else if (RAW != limit) {
4055
193k
  if ((c != 0) && (!IS_CHAR(c))) {
4056
145k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4057
145k
         "invalid character in attribute value\n");
4058
145k
  } else {
4059
47.4k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4060
47.4k
         "AttValue: ' expected\n");
4061
47.4k
        }
4062
193k
    } else
4063
424k
  NEXT;
4064
4065
    /*
4066
     * There we potentially risk an overflow, don't allow attribute value of
4067
     * length more than INT_MAX it is a very reasonnable assumption !
4068
     */
4069
1.22M
    if (len >= INT_MAX) {
4070
0
        xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4071
0
                       "AttValue length too long\n");
4072
0
        goto mem_error;
4073
0
    }
4074
4075
1.22M
    if (attlen != NULL) *attlen = (int) len;
4076
1.22M
    return(buf);
4077
4078
6
mem_error:
4079
6
    xmlErrMemory(ctxt, NULL);
4080
12
error:
4081
12
    if (buf != NULL)
4082
12
        xmlFree(buf);
4083
12
    if (rep != NULL)
4084
0
        xmlFree(rep);
4085
12
    return(NULL);
4086
6
}
4087
4088
/**
4089
 * xmlParseAttValue:
4090
 * @ctxt:  an XML parser context
4091
 *
4092
 * parse a value for an attribute
4093
 * Note: the parser won't do substitution of entities here, this
4094
 * will be handled later in xmlStringGetNodeList
4095
 *
4096
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4097
 *                   "'" ([^<&'] | Reference)* "'"
4098
 *
4099
 * 3.3.3 Attribute-Value Normalization:
4100
 * Before the value of an attribute is passed to the application or
4101
 * checked for validity, the XML processor must normalize it as follows:
4102
 * - a character reference is processed by appending the referenced
4103
 *   character to the attribute value
4104
 * - an entity reference is processed by recursively processing the
4105
 *   replacement text of the entity
4106
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4107
 *   appending #x20 to the normalized value, except that only a single
4108
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4109
 *   parsed entity or the literal entity value of an internal parsed entity
4110
 * - other characters are processed by appending them to the normalized value
4111
 * If the declared value is not CDATA, then the XML processor must further
4112
 * process the normalized attribute value by discarding any leading and
4113
 * trailing space (#x20) characters, and by replacing sequences of space
4114
 * (#x20) characters by a single space (#x20) character.
4115
 * All attributes for which no declaration has been read should be treated
4116
 * by a non-validating parser as if declared CDATA.
4117
 *
4118
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4119
 */
4120
4121
4122
xmlChar *
4123
209k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4124
209k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4125
209k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4126
209k
}
4127
4128
/**
4129
 * xmlParseSystemLiteral:
4130
 * @ctxt:  an XML parser context
4131
 *
4132
 * parse an XML Literal
4133
 *
4134
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4135
 *
4136
 * Returns the SystemLiteral parsed or NULL
4137
 */
4138
4139
xmlChar *
4140
49.0k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4141
49.0k
    xmlChar *buf = NULL;
4142
49.0k
    int len = 0;
4143
49.0k
    int size = XML_PARSER_BUFFER_SIZE;
4144
49.0k
    int cur, l;
4145
49.0k
    xmlChar stop;
4146
49.0k
    int state = ctxt->instate;
4147
49.0k
    int count = 0;
4148
4149
49.0k
    SHRINK;
4150
49.0k
    if (RAW == '"') {
4151
25.1k
        NEXT;
4152
25.1k
  stop = '"';
4153
25.1k
    } else if (RAW == '\'') {
4154
16.3k
        NEXT;
4155
16.3k
  stop = '\'';
4156
16.3k
    } else {
4157
7.55k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4158
7.55k
  return(NULL);
4159
7.55k
    }
4160
4161
41.4k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4162
41.4k
    if (buf == NULL) {
4163
0
        xmlErrMemory(ctxt, NULL);
4164
0
  return(NULL);
4165
0
    }
4166
41.4k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4167
41.4k
    cur = CUR_CHAR(l);
4168
29.0M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4169
28.9M
  if (len + 5 >= size) {
4170
22.6k
      xmlChar *tmp;
4171
4172
22.6k
            if ((size > XML_MAX_NAME_LENGTH) &&
4173
400
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4174
400
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4175
400
                xmlFree(buf);
4176
400
    ctxt->instate = (xmlParserInputState) state;
4177
400
                return(NULL);
4178
400
            }
4179
22.2k
      size *= 2;
4180
22.2k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4181
22.2k
      if (tmp == NULL) {
4182
0
          xmlFree(buf);
4183
0
    xmlErrMemory(ctxt, NULL);
4184
0
    ctxt->instate = (xmlParserInputState) state;
4185
0
    return(NULL);
4186
0
      }
4187
22.2k
      buf = tmp;
4188
22.2k
  }
4189
28.9M
  count++;
4190
28.9M
  if (count > 50) {
4191
555k
      GROW;
4192
555k
      count = 0;
4193
555k
            if (ctxt->instate == XML_PARSER_EOF) {
4194
0
          xmlFree(buf);
4195
0
    return(NULL);
4196
0
            }
4197
555k
  }
4198
28.9M
  COPY_BUF(l,buf,len,cur);
4199
28.9M
  NEXTL(l);
4200
28.9M
  cur = CUR_CHAR(l);
4201
28.9M
  if (cur == 0) {
4202
3.02k
      GROW;
4203
3.02k
      SHRINK;
4204
3.02k
      cur = CUR_CHAR(l);
4205
3.02k
  }
4206
28.9M
    }
4207
41.0k
    buf[len] = 0;
4208
41.0k
    ctxt->instate = (xmlParserInputState) state;
4209
41.0k
    if (!IS_CHAR(cur)) {
4210
3.77k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4211
37.3k
    } else {
4212
37.3k
  NEXT;
4213
37.3k
    }
4214
41.0k
    return(buf);
4215
41.4k
}
4216
4217
/**
4218
 * xmlParsePubidLiteral:
4219
 * @ctxt:  an XML parser context
4220
 *
4221
 * parse an XML public literal
4222
 *
4223
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4224
 *
4225
 * Returns the PubidLiteral parsed or NULL.
4226
 */
4227
4228
xmlChar *
4229
39.2k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4230
39.2k
    xmlChar *buf = NULL;
4231
39.2k
    int len = 0;
4232
39.2k
    int size = XML_PARSER_BUFFER_SIZE;
4233
39.2k
    xmlChar cur;
4234
39.2k
    xmlChar stop;
4235
39.2k
    int count = 0;
4236
39.2k
    xmlParserInputState oldstate = ctxt->instate;
4237
4238
39.2k
    SHRINK;
4239
39.2k
    if (RAW == '"') {
4240
32.7k
        NEXT;
4241
32.7k
  stop = '"';
4242
32.7k
    } else if (RAW == '\'') {
4243
2.45k
        NEXT;
4244
2.45k
  stop = '\'';
4245
3.97k
    } else {
4246
3.97k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4247
3.97k
  return(NULL);
4248
3.97k
    }
4249
35.2k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4250
35.2k
    if (buf == NULL) {
4251
0
  xmlErrMemory(ctxt, NULL);
4252
0
  return(NULL);
4253
0
    }
4254
35.2k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4255
35.2k
    cur = CUR;
4256
2.69M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4257
2.66M
  if (len + 1 >= size) {
4258
6.40k
      xmlChar *tmp;
4259
4260
6.40k
            if ((size > XML_MAX_NAME_LENGTH) &&
4261
17
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4262
17
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4263
17
                xmlFree(buf);
4264
17
                return(NULL);
4265
17
            }
4266
6.38k
      size *= 2;
4267
6.38k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4268
6.38k
      if (tmp == NULL) {
4269
0
    xmlErrMemory(ctxt, NULL);
4270
0
    xmlFree(buf);
4271
0
    return(NULL);
4272
0
      }
4273
6.38k
      buf = tmp;
4274
6.38k
  }
4275
2.66M
  buf[len++] = cur;
4276
2.66M
  count++;
4277
2.66M
  if (count > 50) {
4278
45.7k
      GROW;
4279
45.7k
      count = 0;
4280
45.7k
            if (ctxt->instate == XML_PARSER_EOF) {
4281
0
    xmlFree(buf);
4282
0
    return(NULL);
4283
0
            }
4284
45.7k
  }
4285
2.66M
  NEXT;
4286
2.66M
  cur = CUR;
4287
2.66M
  if (cur == 0) {
4288
4.78k
      GROW;
4289
4.78k
      SHRINK;
4290
4.78k
      cur = CUR;
4291
4.78k
  }
4292
2.66M
    }
4293
35.2k
    buf[len] = 0;
4294
35.2k
    if (cur != stop) {
4295
12.6k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4296
22.6k
    } else {
4297
22.6k
  NEXT;
4298
22.6k
    }
4299
35.2k
    ctxt->instate = oldstate;
4300
35.2k
    return(buf);
4301
35.2k
}
4302
4303
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4304
4305
/*
4306
 * used for the test in the inner loop of the char data testing
4307
 */
4308
static const unsigned char test_char_data[256] = {
4309
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4310
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4311
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4312
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4313
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4314
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4315
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4316
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4317
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4318
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4319
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4320
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4321
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4322
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4323
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4324
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4325
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4326
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4336
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4337
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4338
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4339
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4340
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4341
};
4342
4343
/**
4344
 * xmlParseCharData:
4345
 * @ctxt:  an XML parser context
4346
 * @cdata:  int indicating whether we are within a CDATA section
4347
 *
4348
 * parse a CharData section.
4349
 * if we are within a CDATA section ']]>' marks an end of section.
4350
 *
4351
 * The right angle bracket (>) may be represented using the string "&gt;",
4352
 * and must, for compatibility, be escaped using "&gt;" or a character
4353
 * reference when it appears in the string "]]>" in content, when that
4354
 * string is not marking the end of a CDATA section.
4355
 *
4356
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4357
 */
4358
4359
void
4360
27.8M
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4361
27.8M
    const xmlChar *in;
4362
27.8M
    int nbchar = 0;
4363
27.8M
    int line = ctxt->input->line;
4364
27.8M
    int col = ctxt->input->col;
4365
27.8M
    int ccol;
4366
4367
27.8M
    SHRINK;
4368
27.8M
    GROW;
4369
    /*
4370
     * Accelerated common case where input don't need to be
4371
     * modified before passing it to the handler.
4372
     */
4373
27.8M
    if (!cdata) {
4374
27.8M
  in = ctxt->input->cur;
4375
31.1M
  do {
4376
33.4M
get_more_space:
4377
64.8M
      while (*in == 0x20) { in++; ctxt->input->col++; }
4378
33.4M
      if (*in == 0xA) {
4379
15.4M
    do {
4380
15.4M
        ctxt->input->line++; ctxt->input->col = 1;
4381
15.4M
        in++;
4382
15.4M
    } while (*in == 0xA);
4383
2.29M
    goto get_more_space;
4384
2.29M
      }
4385
31.1M
      if (*in == '<') {
4386
3.68M
    nbchar = in - ctxt->input->cur;
4387
3.68M
    if (nbchar > 0) {
4388
3.67M
        const xmlChar *tmp = ctxt->input->cur;
4389
3.67M
        ctxt->input->cur = in;
4390
4391
3.67M
        if ((ctxt->sax != NULL) &&
4392
3.67M
            (ctxt->sax->ignorableWhitespace !=
4393
3.67M
             ctxt->sax->characters)) {
4394
3.67M
      if (areBlanks(ctxt, tmp, nbchar, 1)) {
4395
2.35M
          if (ctxt->sax->ignorableWhitespace != NULL)
4396
2.35M
        ctxt->sax->ignorableWhitespace(ctxt->userData,
4397
2.35M
                   tmp, nbchar);
4398
2.35M
      } else {
4399
1.32M
          if (ctxt->sax->characters != NULL)
4400
1.32M
        ctxt->sax->characters(ctxt->userData,
4401
1.32M
                  tmp, nbchar);
4402
1.32M
          if (*ctxt->space == -1)
4403
283k
              *ctxt->space = -2;
4404
1.32M
      }
4405
3.67M
        } else if ((ctxt->sax != NULL) &&
4406
0
                   (ctxt->sax->characters != NULL)) {
4407
0
      ctxt->sax->characters(ctxt->userData,
4408
0
                tmp, nbchar);
4409
0
        }
4410
3.67M
    }
4411
3.68M
    return;
4412
3.68M
      }
4413
4414
28.9M
get_more:
4415
28.9M
            ccol = ctxt->input->col;
4416
251M
      while (test_char_data[*in]) {
4417
222M
    in++;
4418
222M
    ccol++;
4419
222M
      }
4420
28.9M
      ctxt->input->col = ccol;
4421
28.9M
      if (*in == 0xA) {
4422
9.28M
    do {
4423
9.28M
        ctxt->input->line++; ctxt->input->col = 1;
4424
9.28M
        in++;
4425
9.28M
    } while (*in == 0xA);
4426
1.00M
    goto get_more;
4427
1.00M
      }
4428
27.9M
      if (*in == ']') {
4429
516k
    if ((in[1] == ']') && (in[2] == '>')) {
4430
12.7k
        xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4431
12.7k
        ctxt->input->cur = in + 1;
4432
12.7k
        return;
4433
12.7k
    }
4434
503k
    in++;
4435
503k
    ctxt->input->col++;
4436
503k
    goto get_more;
4437
516k
      }
4438
27.4M
      nbchar = in - ctxt->input->cur;
4439
27.4M
      if (nbchar > 0) {
4440
13.6M
    if ((ctxt->sax != NULL) &&
4441
13.6M
        (ctxt->sax->ignorableWhitespace !=
4442
13.6M
         ctxt->sax->characters) &&
4443
13.6M
        (IS_BLANK_CH(*ctxt->input->cur))) {
4444
2.54M
        const xmlChar *tmp = ctxt->input->cur;
4445
2.54M
        ctxt->input->cur = in;
4446
4447
2.54M
        if (areBlanks(ctxt, tmp, nbchar, 0)) {
4448
183k
            if (ctxt->sax->ignorableWhitespace != NULL)
4449
183k
          ctxt->sax->ignorableWhitespace(ctxt->userData,
4450
183k
                 tmp, nbchar);
4451
2.36M
        } else {
4452
2.36M
            if (ctxt->sax->characters != NULL)
4453
2.36M
          ctxt->sax->characters(ctxt->userData,
4454
2.36M
              tmp, nbchar);
4455
2.36M
      if (*ctxt->space == -1)
4456
731k
          *ctxt->space = -2;
4457
2.36M
        }
4458
2.54M
                    line = ctxt->input->line;
4459
2.54M
                    col = ctxt->input->col;
4460
11.1M
    } else if (ctxt->sax != NULL) {
4461
11.1M
        if (ctxt->sax->characters != NULL)
4462
11.1M
      ctxt->sax->characters(ctxt->userData,
4463
11.1M
                ctxt->input->cur, nbchar);
4464
11.1M
                    line = ctxt->input->line;
4465
11.1M
                    col = ctxt->input->col;
4466
11.1M
    }
4467
                /* something really bad happened in the SAX callback */
4468
13.6M
                if (ctxt->instate != XML_PARSER_CONTENT)
4469
0
                    return;
4470
13.6M
      }
4471
27.4M
      ctxt->input->cur = in;
4472
27.4M
      if (*in == 0xD) {
4473
3.96M
    in++;
4474
3.96M
    if (*in == 0xA) {
4475
3.34M
        ctxt->input->cur = in;
4476
3.34M
        in++;
4477
3.34M
        ctxt->input->line++; ctxt->input->col = 1;
4478
3.34M
        continue; /* while */
4479
3.34M
    }
4480
624k
    in--;
4481
624k
      }
4482
24.1M
      if (*in == '<') {
4483
10.0M
    return;
4484
10.0M
      }
4485
14.0M
      if (*in == '&') {
4486
561k
    return;
4487
561k
      }
4488
13.5M
      SHRINK;
4489
13.5M
      GROW;
4490
13.5M
            if (ctxt->instate == XML_PARSER_EOF)
4491
0
    return;
4492
13.5M
      in = ctxt->input->cur;
4493
16.8M
  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4494
13.5M
  nbchar = 0;
4495
13.5M
    }
4496
13.5M
    ctxt->input->line = line;
4497
13.5M
    ctxt->input->col = col;
4498
13.5M
    xmlParseCharDataComplex(ctxt, cdata);
4499
13.5M
}
4500
4501
/**
4502
 * xmlParseCharDataComplex:
4503
 * @ctxt:  an XML parser context
4504
 * @cdata:  int indicating whether we are within a CDATA section
4505
 *
4506
 * parse a CharData section.this is the fallback function
4507
 * of xmlParseCharData() when the parsing requires handling
4508
 * of non-ASCII characters.
4509
 */
4510
static void
4511
13.5M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4512
13.5M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4513
13.5M
    int nbchar = 0;
4514
13.5M
    int cur, l;
4515
13.5M
    int count = 0;
4516
4517
13.5M
    SHRINK;
4518
13.5M
    GROW;
4519
13.5M
    cur = CUR_CHAR(l);
4520
965M
    while ((cur != '<') && /* checked */
4521
962M
           (cur != '&') &&
4522
962M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4523
951M
  if ((cur == ']') && (NXT(1) == ']') &&
4524
98.3k
      (NXT(2) == '>')) {
4525
41.3k
      if (cdata) break;
4526
41.3k
      else {
4527
41.3k
    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4528
41.3k
      }
4529
41.3k
  }
4530
951M
  COPY_BUF(l,buf,nbchar,cur);
4531
951M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4532
8.40M
      buf[nbchar] = 0;
4533
4534
      /*
4535
       * OK the segment is to be consumed as chars.
4536
       */
4537
8.40M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4538
5.02M
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4539
2.12k
        if (ctxt->sax->ignorableWhitespace != NULL)
4540
2.12k
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4541
2.12k
                                     buf, nbchar);
4542
5.02M
    } else {
4543
5.02M
        if (ctxt->sax->characters != NULL)
4544
5.02M
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4545
5.02M
        if ((ctxt->sax->characters !=
4546
5.02M
             ctxt->sax->ignorableWhitespace) &&
4547
5.02M
      (*ctxt->space == -1))
4548
28.6k
      *ctxt->space = -2;
4549
5.02M
    }
4550
5.02M
      }
4551
8.40M
      nbchar = 0;
4552
            /* something really bad happened in the SAX callback */
4553
8.40M
            if (ctxt->instate != XML_PARSER_CONTENT)
4554
15
                return;
4555
8.40M
  }
4556
951M
  count++;
4557
951M
  if (count > 50) {
4558
17.8M
      GROW;
4559
17.8M
      count = 0;
4560
17.8M
            if (ctxt->instate == XML_PARSER_EOF)
4561
1
    return;
4562
17.8M
  }
4563
951M
  NEXTL(l);
4564
951M
  cur = CUR_CHAR(l);
4565
951M
    }
4566
13.5M
    if (nbchar != 0) {
4567
3.44M
        buf[nbchar] = 0;
4568
  /*
4569
   * OK the segment is to be consumed as chars.
4570
   */
4571
3.44M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4572
3.02M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4573
47.1k
    if (ctxt->sax->ignorableWhitespace != NULL)
4574
47.1k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4575
2.97M
      } else {
4576
2.97M
    if (ctxt->sax->characters != NULL)
4577
2.97M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4578
2.97M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4579
2.97M
        (*ctxt->space == -1))
4580
1.16M
        *ctxt->space = -2;
4581
2.97M
      }
4582
3.02M
  }
4583
3.44M
    }
4584
13.5M
    if ((cur != 0) && (!IS_CHAR(cur))) {
4585
  /* Generate the error and skip the offending character */
4586
10.5M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4587
10.5M
                          "PCDATA invalid Char value %d\n",
4588
10.5M
                    cur);
4589
10.5M
  NEXTL(l);
4590
10.5M
    }
4591
13.5M
}
4592
4593
/**
4594
 * xmlParseExternalID:
4595
 * @ctxt:  an XML parser context
4596
 * @publicID:  a xmlChar** receiving PubidLiteral
4597
 * @strict: indicate whether we should restrict parsing to only
4598
 *          production [75], see NOTE below
4599
 *
4600
 * Parse an External ID or a Public ID
4601
 *
4602
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4603
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4604
 *
4605
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4606
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4607
 *
4608
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4609
 *
4610
 * Returns the function returns SystemLiteral and in the second
4611
 *                case publicID receives PubidLiteral, is strict is off
4612
 *                it is possible to return NULL and have publicID set.
4613
 */
4614
4615
xmlChar *
4616
140k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4617
140k
    xmlChar *URI = NULL;
4618
4619
140k
    SHRINK;
4620
4621
140k
    *publicID = NULL;
4622
140k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4623
24.1k
        SKIP(6);
4624
24.1k
  if (SKIP_BLANKS == 0) {
4625
5.86k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4626
5.86k
                     "Space required after 'SYSTEM'\n");
4627
5.86k
  }
4628
24.1k
  URI = xmlParseSystemLiteral(ctxt);
4629
24.1k
  if (URI == NULL) {
4630
1.31k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4631
1.31k
        }
4632
116k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4633
39.2k
        SKIP(6);
4634
39.2k
  if (SKIP_BLANKS == 0) {
4635
9.02k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4636
9.02k
        "Space required after 'PUBLIC'\n");
4637
9.02k
  }
4638
39.2k
  *publicID = xmlParsePubidLiteral(ctxt);
4639
39.2k
  if (*publicID == NULL) {
4640
3.98k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4641
3.98k
  }
4642
39.2k
  if (strict) {
4643
      /*
4644
       * We don't handle [83] so "S SystemLiteral" is required.
4645
       */
4646
21.5k
      if (SKIP_BLANKS == 0) {
4647
10.5k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4648
10.5k
      "Space required after the Public Identifier\n");
4649
10.5k
      }
4650
21.5k
  } else {
4651
      /*
4652
       * We handle [83] so we return immediately, if
4653
       * "S SystemLiteral" is not detected. We skip blanks if no
4654
             * system literal was found, but this is harmless since we must
4655
             * be at the end of a NotationDecl.
4656
       */
4657
17.6k
      if (SKIP_BLANKS == 0) return(NULL);
4658
8.59k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4659
8.59k
  }
4660
24.8k
  URI = xmlParseSystemLiteral(ctxt);
4661
24.8k
  if (URI == NULL) {
4662
6.63k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4663
6.63k
        }
4664
24.8k
    }
4665
126k
    return(URI);
4666
140k
}
4667
4668
/**
4669
 * xmlParseCommentComplex:
4670
 * @ctxt:  an XML parser context
4671
 * @buf:  the already parsed part of the buffer
4672
 * @len:  number of bytes filles in the buffer
4673
 * @size:  allocated size of the buffer
4674
 *
4675
 * Skip an XML (SGML) comment <!-- .... -->
4676
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4677
 *  must not occur within comments. "
4678
 * This is the slow routine in case the accelerator for ascii didn't work
4679
 *
4680
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4681
 */
4682
static void
4683
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4684
378k
                       size_t len, size_t size) {
4685
378k
    int q, ql;
4686
378k
    int r, rl;
4687
378k
    int cur, l;
4688
378k
    size_t count = 0;
4689
378k
    int inputid;
4690
4691
378k
    inputid = ctxt->input->id;
4692
4693
378k
    if (buf == NULL) {
4694
224k
        len = 0;
4695
224k
  size = XML_PARSER_BUFFER_SIZE;
4696
224k
  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4697
224k
  if (buf == NULL) {
4698
0
      xmlErrMemory(ctxt, NULL);
4699
0
      return;
4700
0
  }
4701
224k
    }
4702
378k
    GROW; /* Assure there's enough input data */
4703
378k
    q = CUR_CHAR(ql);
4704
378k
    if (q == 0)
4705
6.20k
        goto not_terminated;
4706
372k
    if (!IS_CHAR(q)) {
4707
14.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4708
14.6k
                          "xmlParseComment: invalid xmlChar value %d\n",
4709
14.6k
                    q);
4710
14.6k
  xmlFree (buf);
4711
14.6k
  return;
4712
14.6k
    }
4713
357k
    NEXTL(ql);
4714
357k
    r = CUR_CHAR(rl);
4715
357k
    if (r == 0)
4716
759
        goto not_terminated;
4717
356k
    if (!IS_CHAR(r)) {
4718
9.21k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4719
9.21k
                          "xmlParseComment: invalid xmlChar value %d\n",
4720
9.21k
                    q);
4721
9.21k
  xmlFree (buf);
4722
9.21k
  return;
4723
9.21k
    }
4724
347k
    NEXTL(rl);
4725
347k
    cur = CUR_CHAR(l);
4726
347k
    if (cur == 0)
4727
1.10k
        goto not_terminated;
4728
255M
    while (IS_CHAR(cur) && /* checked */
4729
255M
           ((cur != '>') ||
4730
255M
      (r != '-') || (q != '-'))) {
4731
255M
  if ((r == '-') && (q == '-')) {
4732
164k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4733
164k
  }
4734
255M
        if ((len > XML_MAX_TEXT_LENGTH) &&
4735
8
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4736
8
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4737
8
                         "Comment too big found", NULL);
4738
8
            xmlFree (buf);
4739
8
            return;
4740
8
        }
4741
255M
  if (len + 5 >= size) {
4742
43.0k
      xmlChar *new_buf;
4743
43.0k
            size_t new_size;
4744
4745
43.0k
      new_size = size * 2;
4746
43.0k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4747
43.0k
      if (new_buf == NULL) {
4748
0
    xmlFree (buf);
4749
0
    xmlErrMemory(ctxt, NULL);
4750
0
    return;
4751
0
      }
4752
43.0k
      buf = new_buf;
4753
43.0k
            size = new_size;
4754
43.0k
  }
4755
255M
  COPY_BUF(ql,buf,len,q);
4756
255M
  q = r;
4757
255M
  ql = rl;
4758
255M
  r = cur;
4759
255M
  rl = l;
4760
4761
255M
  count++;
4762
255M
  if (count > 50) {
4763
4.97M
      GROW;
4764
4.97M
      count = 0;
4765
4.97M
            if (ctxt->instate == XML_PARSER_EOF) {
4766
4
    xmlFree(buf);
4767
4
    return;
4768
4
            }
4769
4.97M
  }
4770
255M
  NEXTL(l);
4771
255M
  cur = CUR_CHAR(l);
4772
255M
  if (cur == 0) {
4773
231k
      SHRINK;
4774
231k
      GROW;
4775
231k
      cur = CUR_CHAR(l);
4776
231k
  }
4777
255M
    }
4778
346k
    buf[len] = 0;
4779
346k
    if (cur == 0) {
4780
231k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4781
231k
                       "Comment not terminated \n<!--%.50s\n", buf);
4782
231k
    } else if (!IS_CHAR(cur)) {
4783
17.4k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4784
17.4k
                          "xmlParseComment: invalid xmlChar value %d\n",
4785
17.4k
                    cur);
4786
97.4k
    } else {
4787
97.4k
  if (inputid != ctxt->input->id) {
4788
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4789
0
               "Comment doesn't start and stop in the same"
4790
0
                           " entity\n");
4791
0
  }
4792
97.4k
        NEXT;
4793
97.4k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4794
97.4k
      (!ctxt->disableSAX))
4795
23.5k
      ctxt->sax->comment(ctxt->userData, buf);
4796
97.4k
    }
4797
346k
    xmlFree(buf);
4798
346k
    return;
4799
8.06k
not_terminated:
4800
8.06k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4801
8.06k
       "Comment not terminated\n", NULL);
4802
8.06k
    xmlFree(buf);
4803
8.06k
    return;
4804
346k
}
4805
4806
/**
4807
 * xmlParseComment:
4808
 * @ctxt:  an XML parser context
4809
 *
4810
 * Skip an XML (SGML) comment <!-- .... -->
4811
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4812
 *  must not occur within comments. "
4813
 *
4814
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4815
 */
4816
void
4817
561k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4818
561k
    xmlChar *buf = NULL;
4819
561k
    size_t size = XML_PARSER_BUFFER_SIZE;
4820
561k
    size_t len = 0;
4821
561k
    xmlParserInputState state;
4822
561k
    const xmlChar *in;
4823
561k
    size_t nbchar = 0;
4824
561k
    int ccol;
4825
561k
    int inputid;
4826
4827
    /*
4828
     * Check that there is a comment right here.
4829
     */
4830
561k
    if ((RAW != '<') || (NXT(1) != '!') ||
4831
561k
        (NXT(2) != '-') || (NXT(3) != '-')) return;
4832
560k
    state = ctxt->instate;
4833
560k
    ctxt->instate = XML_PARSER_COMMENT;
4834
560k
    inputid = ctxt->input->id;
4835
560k
    SKIP(4);
4836
560k
    SHRINK;
4837
560k
    GROW;
4838
4839
    /*
4840
     * Accelerated common case where input don't need to be
4841
     * modified before passing it to the handler.
4842
     */
4843
560k
    in = ctxt->input->cur;
4844
576k
    do {
4845
576k
  if (*in == 0xA) {
4846
324k
      do {
4847
324k
    ctxt->input->line++; ctxt->input->col = 1;
4848
324k
    in++;
4849
324k
      } while (*in == 0xA);
4850
8.26k
  }
4851
1.09M
get_more:
4852
1.09M
        ccol = ctxt->input->col;
4853
7.71M
  while (((*in > '-') && (*in <= 0x7F)) ||
4854
1.71M
         ((*in >= 0x20) && (*in < '-')) ||
4855
6.62M
         (*in == 0x09)) {
4856
6.62M
        in++;
4857
6.62M
        ccol++;
4858
6.62M
  }
4859
1.09M
  ctxt->input->col = ccol;
4860
1.09M
  if (*in == 0xA) {
4861
411k
      do {
4862
411k
    ctxt->input->line++; ctxt->input->col = 1;
4863
411k
    in++;
4864
411k
      } while (*in == 0xA);
4865
18.5k
      goto get_more;
4866
18.5k
  }
4867
1.07M
  nbchar = in - ctxt->input->cur;
4868
  /*
4869
   * save current set of data
4870
   */
4871
1.07M
  if (nbchar > 0) {
4872
694k
      if ((ctxt->sax != NULL) &&
4873
694k
    (ctxt->sax->comment != NULL)) {
4874
694k
    if (buf == NULL) {
4875
232k
        if ((*in == '-') && (in[1] == '-'))
4876
85.2k
            size = nbchar + 1;
4877
147k
        else
4878
147k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4879
232k
        buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4880
232k
        if (buf == NULL) {
4881
0
            xmlErrMemory(ctxt, NULL);
4882
0
      ctxt->instate = state;
4883
0
      return;
4884
0
        }
4885
232k
        len = 0;
4886
461k
    } else if (len + nbchar + 1 >= size) {
4887
29.9k
        xmlChar *new_buf;
4888
29.9k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4889
29.9k
        new_buf = (xmlChar *) xmlRealloc(buf,
4890
29.9k
                                         size * sizeof(xmlChar));
4891
29.9k
        if (new_buf == NULL) {
4892
0
            xmlFree (buf);
4893
0
      xmlErrMemory(ctxt, NULL);
4894
0
      ctxt->instate = state;
4895
0
      return;
4896
0
        }
4897
29.9k
        buf = new_buf;
4898
29.9k
    }
4899
694k
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4900
694k
    len += nbchar;
4901
694k
    buf[len] = 0;
4902
694k
      }
4903
694k
  }
4904
1.07M
        if ((len > XML_MAX_TEXT_LENGTH) &&
4905
0
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4906
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4907
0
                         "Comment too big found", NULL);
4908
0
            xmlFree (buf);
4909
0
            return;
4910
0
        }
4911
1.07M
  ctxt->input->cur = in;
4912
1.07M
  if (*in == 0xA) {
4913
0
      in++;
4914
0
      ctxt->input->line++; ctxt->input->col = 1;
4915
0
  }
4916
1.07M
  if (*in == 0xD) {
4917
31.4k
      in++;
4918
31.4k
      if (*in == 0xA) {
4919
19.7k
    ctxt->input->cur = in;
4920
19.7k
    in++;
4921
19.7k
    ctxt->input->line++; ctxt->input->col = 1;
4922
19.7k
    continue; /* while */
4923
19.7k
      }
4924
11.7k
      in--;
4925
11.7k
  }
4926
1.05M
  SHRINK;
4927
1.05M
  GROW;
4928
1.05M
        if (ctxt->instate == XML_PARSER_EOF) {
4929
0
            xmlFree(buf);
4930
0
            return;
4931
0
        }
4932
1.05M
  in = ctxt->input->cur;
4933
1.05M
  if (*in == '-') {
4934
681k
      if (in[1] == '-') {
4935
585k
          if (in[2] == '>') {
4936
182k
        if (ctxt->input->id != inputid) {
4937
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4938
0
                     "comment doesn't start and stop in the"
4939
0
                                       " same entity\n");
4940
0
        }
4941
182k
        SKIP(3);
4942
182k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4943
182k
            (!ctxt->disableSAX)) {
4944
78.7k
      if (buf != NULL)
4945
49.0k
          ctxt->sax->comment(ctxt->userData, buf);
4946
29.7k
      else
4947
29.7k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4948
78.7k
        }
4949
182k
        if (buf != NULL)
4950
79.4k
            xmlFree(buf);
4951
182k
        if (ctxt->instate != XML_PARSER_EOF)
4952
182k
      ctxt->instate = state;
4953
182k
        return;
4954
182k
    }
4955
403k
    if (buf != NULL) {
4956
389k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957
389k
                          "Double hyphen within comment: "
4958
389k
                                      "<!--%.50s\n",
4959
389k
              buf);
4960
389k
    } else
4961
13.4k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4962
13.4k
                          "Double hyphen within comment\n", NULL);
4963
403k
    in++;
4964
403k
    ctxt->input->col++;
4965
403k
      }
4966
499k
      in++;
4967
499k
      ctxt->input->col++;
4968
499k
      goto get_more;
4969
681k
  }
4970
1.05M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4971
378k
    xmlParseCommentComplex(ctxt, buf, len, size);
4972
378k
    ctxt->instate = state;
4973
378k
    return;
4974
560k
}
4975
4976
4977
/**
4978
 * xmlParsePITarget:
4979
 * @ctxt:  an XML parser context
4980
 *
4981
 * parse the name of a PI
4982
 *
4983
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4984
 *
4985
 * Returns the PITarget name or NULL
4986
 */
4987
4988
const xmlChar *
4989
668k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4990
668k
    const xmlChar *name;
4991
4992
668k
    name = xmlParseName(ctxt);
4993
668k
    if ((name != NULL) &&
4994
420k
        ((name[0] == 'x') || (name[0] == 'X')) &&
4995
162k
        ((name[1] == 'm') || (name[1] == 'M')) &&
4996
100k
        ((name[2] == 'l') || (name[2] == 'L'))) {
4997
86.3k
  int i;
4998
86.3k
  if ((name[0] == 'x') && (name[1] == 'm') &&
4999
72.1k
      (name[2] == 'l') && (name[3] == 0)) {
5000
43.9k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5001
43.9k
     "XML declaration allowed only at the start of the document\n");
5002
43.9k
      return(name);
5003
43.9k
  } else if (name[3] == 0) {
5004
6.90k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5005
6.90k
      return(name);
5006
6.90k
  }
5007
103k
  for (i = 0;;i++) {
5008
103k
      if (xmlW3CPIs[i] == NULL) break;
5009
69.5k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5010
1.68k
          return(name);
5011
69.5k
  }
5012
33.8k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5013
33.8k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5014
33.8k
          NULL, NULL);
5015
33.8k
    }
5016
615k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5017
13.9k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5018
13.9k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5019
13.9k
    }
5020
615k
    return(name);
5021
668k
}
5022
5023
#ifdef LIBXML_CATALOG_ENABLED
5024
/**
5025
 * xmlParseCatalogPI:
5026
 * @ctxt:  an XML parser context
5027
 * @catalog:  the PI value string
5028
 *
5029
 * parse an XML Catalog Processing Instruction.
5030
 *
5031
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5032
 *
5033
 * Occurs only if allowed by the user and if happening in the Misc
5034
 * part of the document before any doctype informations
5035
 * This will add the given catalog to the parsing context in order
5036
 * to be used if there is a resolution need further down in the document
5037
 */
5038
5039
static void
5040
44.5k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5041
44.5k
    xmlChar *URL = NULL;
5042
44.5k
    const xmlChar *tmp, *base;
5043
44.5k
    xmlChar marker;
5044
5045
44.5k
    tmp = catalog;
5046
44.5k
    while (IS_BLANK_CH(*tmp)) tmp++;
5047
44.5k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5048
12.3k
  goto error;
5049
32.1k
    tmp += 7;
5050
223k
    while (IS_BLANK_CH(*tmp)) tmp++;
5051
32.1k
    if (*tmp != '=') {
5052
7.55k
  return;
5053
7.55k
    }
5054
24.6k
    tmp++;
5055
199k
    while (IS_BLANK_CH(*tmp)) tmp++;
5056
24.6k
    marker = *tmp;
5057
24.6k
    if ((marker != '\'') && (marker != '"'))
5058
10.2k
  goto error;
5059
14.3k
    tmp++;
5060
14.3k
    base = tmp;
5061
294k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5062
14.3k
    if (*tmp == 0)
5063
1.93k
  goto error;
5064
12.3k
    URL = xmlStrndup(base, tmp - base);
5065
12.3k
    tmp++;
5066
43.6k
    while (IS_BLANK_CH(*tmp)) tmp++;
5067
12.3k
    if (*tmp != 0)
5068
4.76k
  goto error;
5069
5070
7.62k
    if (URL != NULL) {
5071
7.62k
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5072
7.62k
  xmlFree(URL);
5073
7.62k
    }
5074
7.62k
    return;
5075
5076
29.3k
error:
5077
29.3k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5078
29.3k
            "Catalog PI syntax error: %s\n",
5079
29.3k
      catalog, NULL);
5080
29.3k
    if (URL != NULL)
5081
4.76k
  xmlFree(URL);
5082
29.3k
}
5083
#endif
5084
5085
/**
5086
 * xmlParsePI:
5087
 * @ctxt:  an XML parser context
5088
 *
5089
 * parse an XML Processing Instruction.
5090
 *
5091
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5092
 *
5093
 * The processing is transfered to SAX once parsed.
5094
 */
5095
5096
void
5097
668k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5098
668k
    xmlChar *buf = NULL;
5099
668k
    size_t len = 0;
5100
668k
    size_t size = XML_PARSER_BUFFER_SIZE;
5101
668k
    int cur, l;
5102
668k
    const xmlChar *target;
5103
668k
    xmlParserInputState state;
5104
668k
    int count = 0;
5105
5106
668k
    if ((RAW == '<') && (NXT(1) == '?')) {
5107
668k
  int inputid = ctxt->input->id;
5108
668k
  state = ctxt->instate;
5109
668k
        ctxt->instate = XML_PARSER_PI;
5110
  /*
5111
   * this is a Processing Instruction.
5112
   */
5113
668k
  SKIP(2);
5114
668k
  SHRINK;
5115
5116
  /*
5117
   * Parse the target name and check for special support like
5118
   * namespace.
5119
   */
5120
668k
        target = xmlParsePITarget(ctxt);
5121
668k
  if (target != NULL) {
5122
420k
      if ((RAW == '?') && (NXT(1) == '>')) {
5123
80.2k
    if (inputid != ctxt->input->id) {
5124
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5125
0
                             "PI declaration doesn't start and stop in"
5126
0
                                   " the same entity\n");
5127
0
    }
5128
80.2k
    SKIP(2);
5129
5130
    /*
5131
     * SAX: PI detected.
5132
     */
5133
80.2k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5134
65.0k
        (ctxt->sax->processingInstruction != NULL))
5135
65.0k
        ctxt->sax->processingInstruction(ctxt->userData,
5136
65.0k
                                         target, NULL);
5137
80.2k
    if (ctxt->instate != XML_PARSER_EOF)
5138
80.2k
        ctxt->instate = state;
5139
80.2k
    return;
5140
80.2k
      }
5141
339k
      buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5142
339k
      if (buf == NULL) {
5143
0
    xmlErrMemory(ctxt, NULL);
5144
0
    ctxt->instate = state;
5145
0
    return;
5146
0
      }
5147
339k
      if (SKIP_BLANKS == 0) {
5148
81.3k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5149
81.3k
        "ParsePI: PI %s space expected\n", target);
5150
81.3k
      }
5151
339k
      cur = CUR_CHAR(l);
5152
626M
      while (IS_CHAR(cur) && /* checked */
5153
626M
       ((cur != '?') || (NXT(1) != '>'))) {
5154
626M
    if (len + 5 >= size) {
5155
86.9k
        xmlChar *tmp;
5156
86.9k
                    size_t new_size = size * 2;
5157
86.9k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5158
86.9k
        if (tmp == NULL) {
5159
0
      xmlErrMemory(ctxt, NULL);
5160
0
      xmlFree(buf);
5161
0
      ctxt->instate = state;
5162
0
      return;
5163
0
        }
5164
86.9k
        buf = tmp;
5165
86.9k
                    size = new_size;
5166
86.9k
    }
5167
626M
    count++;
5168
626M
    if (count > 50) {
5169
12.1M
        GROW;
5170
12.1M
                    if (ctxt->instate == XML_PARSER_EOF) {
5171
6
                        xmlFree(buf);
5172
6
                        return;
5173
6
                    }
5174
12.1M
        count = 0;
5175
12.1M
                    if ((len > XML_MAX_TEXT_LENGTH) &&
5176
9
                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5177
9
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5178
9
                                          "PI %s too big found", target);
5179
9
                        xmlFree(buf);
5180
9
                        ctxt->instate = state;
5181
9
                        return;
5182
9
                    }
5183
12.1M
    }
5184
626M
    COPY_BUF(l,buf,len,cur);
5185
626M
    NEXTL(l);
5186
626M
    cur = CUR_CHAR(l);
5187
626M
    if (cur == 0) {
5188
12.6k
        SHRINK;
5189
12.6k
        GROW;
5190
12.6k
        cur = CUR_CHAR(l);
5191
12.6k
    }
5192
626M
      }
5193
339k
            if ((len > XML_MAX_TEXT_LENGTH) &&
5194
8
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5195
8
                xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5196
8
                                  "PI %s too big found", target);
5197
8
                xmlFree(buf);
5198
8
                ctxt->instate = state;
5199
8
                return;
5200
8
            }
5201
339k
      buf[len] = 0;
5202
339k
      if (cur != '?') {
5203
68.3k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5204
68.3k
          "ParsePI: PI %s never end ...\n", target);
5205
271k
      } else {
5206
271k
    if (inputid != ctxt->input->id) {
5207
841
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5208
841
                             "PI declaration doesn't start and stop in"
5209
841
                                   " the same entity\n");
5210
841
    }
5211
271k
    SKIP(2);
5212
5213
271k
#ifdef LIBXML_CATALOG_ENABLED
5214
271k
    if (((state == XML_PARSER_MISC) ||
5215
117k
               (state == XML_PARSER_START)) &&
5216
154k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5217
44.5k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5218
44.5k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5219
44.5k
      (allow == XML_CATA_ALLOW_ALL))
5220
44.5k
      xmlParseCatalogPI(ctxt, buf);
5221
44.5k
    }
5222
271k
#endif
5223
5224
5225
    /*
5226
     * SAX: PI detected.
5227
     */
5228
271k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5229
240k
        (ctxt->sax->processingInstruction != NULL))
5230
240k
        ctxt->sax->processingInstruction(ctxt->userData,
5231
240k
                                         target, buf);
5232
271k
      }
5233
339k
      xmlFree(buf);
5234
339k
  } else {
5235
248k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5236
248k
  }
5237
587k
  if (ctxt->instate != XML_PARSER_EOF)
5238
587k
      ctxt->instate = state;
5239
587k
    }
5240
668k
}
5241
5242
/**
5243
 * xmlParseNotationDecl:
5244
 * @ctxt:  an XML parser context
5245
 *
5246
 * parse a notation declaration
5247
 *
5248
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5249
 *
5250
 * Hence there is actually 3 choices:
5251
 *     'PUBLIC' S PubidLiteral
5252
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5253
 * and 'SYSTEM' S SystemLiteral
5254
 *
5255
 * See the NOTE on xmlParseExternalID().
5256
 */
5257
5258
void
5259
50.5k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5260
50.5k
    const xmlChar *name;
5261
50.5k
    xmlChar *Pubid;
5262
50.5k
    xmlChar *Systemid;
5263
5264
50.5k
    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5265
40.6k
  int inputid = ctxt->input->id;
5266
40.6k
  SHRINK;
5267
40.6k
  SKIP(10);
5268
40.6k
  if (SKIP_BLANKS == 0) {
5269
1.36k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5270
1.36k
         "Space required after '<!NOTATION'\n");
5271
1.36k
      return;
5272
1.36k
  }
5273
5274
39.2k
        name = xmlParseName(ctxt);
5275
39.2k
  if (name == NULL) {
5276
1.72k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5277
1.72k
      return;
5278
1.72k
  }
5279
37.5k
  if (xmlStrchr(name, ':') != NULL) {
5280
1.92k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5281
1.92k
         "colons are forbidden from notation names '%s'\n",
5282
1.92k
         name, NULL, NULL);
5283
1.92k
  }
5284
37.5k
  if (SKIP_BLANKS == 0) {
5285
4.33k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5286
4.33k
         "Space required after the NOTATION name'\n");
5287
4.33k
      return;
5288
4.33k
  }
5289
5290
  /*
5291
   * Parse the IDs.
5292
   */
5293
33.2k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5294
33.2k
  SKIP_BLANKS;
5295
5296
33.2k
  if (RAW == '>') {
5297
9.52k
      if (inputid != ctxt->input->id) {
5298
1.03k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5299
1.03k
                         "Notation declaration doesn't start and stop"
5300
1.03k
                               " in the same entity\n");
5301
1.03k
      }
5302
9.52k
      NEXT;
5303
9.52k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5304
9.52k
    (ctxt->sax->notationDecl != NULL))
5305
9.52k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5306
23.7k
  } else {
5307
23.7k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5308
23.7k
  }
5309
33.2k
  if (Systemid != NULL) xmlFree(Systemid);
5310
33.2k
  if (Pubid != NULL) xmlFree(Pubid);
5311
33.2k
    }
5312
50.5k
}
5313
5314
/**
5315
 * xmlParseEntityDecl:
5316
 * @ctxt:  an XML parser context
5317
 *
5318
 * parse <!ENTITY declarations
5319
 *
5320
 * [70] EntityDecl ::= GEDecl | PEDecl
5321
 *
5322
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5323
 *
5324
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5325
 *
5326
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5327
 *
5328
 * [74] PEDef ::= EntityValue | ExternalID
5329
 *
5330
 * [76] NDataDecl ::= S 'NDATA' S Name
5331
 *
5332
 * [ VC: Notation Declared ]
5333
 * The Name must match the declared name of a notation.
5334
 */
5335
5336
void
5337
224k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5338
224k
    const xmlChar *name = NULL;
5339
224k
    xmlChar *value = NULL;
5340
224k
    xmlChar *URI = NULL, *literal = NULL;
5341
224k
    const xmlChar *ndata = NULL;
5342
224k
    int isParameter = 0;
5343
224k
    xmlChar *orig = NULL;
5344
5345
    /* GROW; done in the caller */
5346
224k
    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5347
218k
  int inputid = ctxt->input->id;
5348
218k
  SHRINK;
5349
218k
  SKIP(8);
5350
218k
  if (SKIP_BLANKS == 0) {
5351
90.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352
90.6k
         "Space required after '<!ENTITY'\n");
5353
90.6k
  }
5354
5355
218k
  if (RAW == '%') {
5356
37.0k
      NEXT;
5357
37.0k
      if (SKIP_BLANKS == 0) {
5358
24.2k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5359
24.2k
             "Space required after '%%'\n");
5360
24.2k
      }
5361
37.0k
      isParameter = 1;
5362
37.0k
  }
5363
5364
218k
        name = xmlParseName(ctxt);
5365
218k
  if (name == NULL) {
5366
4.52k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5367
4.52k
                     "xmlParseEntityDecl: no name\n");
5368
4.52k
            return;
5369
4.52k
  }
5370
213k
  if (xmlStrchr(name, ':') != NULL) {
5371
8.51k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5372
8.51k
         "colons are forbidden from entities names '%s'\n",
5373
8.51k
         name, NULL, NULL);
5374
8.51k
  }
5375
213k
  if (SKIP_BLANKS == 0) {
5376
76.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5377
76.1k
         "Space required after the entity name\n");
5378
76.1k
  }
5379
5380
213k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5381
  /*
5382
   * handle the various case of definitions...
5383
   */
5384
213k
  if (isParameter) {
5385
36.8k
      if ((RAW == '"') || (RAW == '\'')) {
5386
23.1k
          value = xmlParseEntityValue(ctxt, &orig);
5387
23.1k
    if (value) {
5388
16.9k
        if ((ctxt->sax != NULL) &&
5389
16.9k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5390
16.9k
      ctxt->sax->entityDecl(ctxt->userData, name,
5391
16.9k
                        XML_INTERNAL_PARAMETER_ENTITY,
5392
16.9k
            NULL, NULL, value);
5393
16.9k
    }
5394
23.1k
      } else {
5395
13.7k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5396
13.7k
    if ((URI == NULL) && (literal == NULL)) {
5397
1.61k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5398
1.61k
    }
5399
13.7k
    if (URI) {
5400
9.66k
        xmlURIPtr uri;
5401
5402
9.66k
        uri = xmlParseURI((const char *) URI);
5403
9.66k
        if (uri == NULL) {
5404
2.96k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5405
2.96k
             "Invalid URI: %s\n", URI);
5406
      /*
5407
       * This really ought to be a well formedness error
5408
       * but the XML Core WG decided otherwise c.f. issue
5409
       * E26 of the XML erratas.
5410
       */
5411
6.69k
        } else {
5412
6.69k
      if (uri->fragment != NULL) {
5413
          /*
5414
           * Okay this is foolish to block those but not
5415
           * invalid URIs.
5416
           */
5417
634
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5418
6.06k
      } else {
5419
6.06k
          if ((ctxt->sax != NULL) &&
5420
6.06k
        (!ctxt->disableSAX) &&
5421
6.06k
        (ctxt->sax->entityDecl != NULL))
5422
6.06k
        ctxt->sax->entityDecl(ctxt->userData, name,
5423
6.06k
              XML_EXTERNAL_PARAMETER_ENTITY,
5424
6.06k
              literal, URI, NULL);
5425
6.06k
      }
5426
6.69k
      xmlFreeURI(uri);
5427
6.69k
        }
5428
9.66k
    }
5429
13.7k
      }
5430
176k
  } else {
5431
176k
      if ((RAW == '"') || (RAW == '\'')) {
5432
148k
          value = xmlParseEntityValue(ctxt, &orig);
5433
148k
    if ((ctxt->sax != NULL) &&
5434
148k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5435
148k
        ctxt->sax->entityDecl(ctxt->userData, name,
5436
148k
        XML_INTERNAL_GENERAL_ENTITY,
5437
148k
        NULL, NULL, value);
5438
    /*
5439
     * For expat compatibility in SAX mode.
5440
     */
5441
148k
    if ((ctxt->myDoc == NULL) ||
5442
148k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5443
0
        if (ctxt->myDoc == NULL) {
5444
0
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5445
0
      if (ctxt->myDoc == NULL) {
5446
0
          xmlErrMemory(ctxt, "New Doc failed");
5447
0
          return;
5448
0
      }
5449
0
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5450
0
        }
5451
0
        if (ctxt->myDoc->intSubset == NULL)
5452
0
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5453
0
              BAD_CAST "fake", NULL, NULL);
5454
5455
0
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5456
0
                    NULL, NULL, value);
5457
0
    }
5458
148k
      } else {
5459
28.6k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5460
28.6k
    if ((URI == NULL) && (literal == NULL)) {
5461
5.57k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5462
5.57k
    }
5463
28.6k
    if (URI) {
5464
20.7k
        xmlURIPtr uri;
5465
5466
20.7k
        uri = xmlParseURI((const char *)URI);
5467
20.7k
        if (uri == NULL) {
5468
7.07k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5469
7.07k
             "Invalid URI: %s\n", URI);
5470
      /*
5471
       * This really ought to be a well formedness error
5472
       * but the XML Core WG decided otherwise c.f. issue
5473
       * E26 of the XML erratas.
5474
       */
5475
13.6k
        } else {
5476
13.6k
      if (uri->fragment != NULL) {
5477
          /*
5478
           * Okay this is foolish to block those but not
5479
           * invalid URIs.
5480
           */
5481
5.66k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5482
5.66k
      }
5483
13.6k
      xmlFreeURI(uri);
5484
13.6k
        }
5485
20.7k
    }
5486
28.6k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5487
5.90k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5488
5.90k
           "Space required before 'NDATA'\n");
5489
5.90k
    }
5490
28.6k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5491
5.06k
        SKIP(5);
5492
5.06k
        if (SKIP_BLANKS == 0) {
5493
897
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5494
897
               "Space required after 'NDATA'\n");
5495
897
        }
5496
5.06k
        ndata = xmlParseName(ctxt);
5497
5.06k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5498
5.06k
            (ctxt->sax->unparsedEntityDecl != NULL))
5499
5.06k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5500
5.06k
            literal, URI, ndata);
5501
23.6k
    } else {
5502
23.6k
        if ((ctxt->sax != NULL) &&
5503
23.6k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5504
23.6k
      ctxt->sax->entityDecl(ctxt->userData, name,
5505
23.6k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5506
23.6k
            literal, URI, NULL);
5507
        /*
5508
         * For expat compatibility in SAX mode.
5509
         * assuming the entity repalcement was asked for
5510
         */
5511
23.6k
        if ((ctxt->replaceEntities != 0) &&
5512
0
      ((ctxt->myDoc == NULL) ||
5513
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5514
0
      if (ctxt->myDoc == NULL) {
5515
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5516
0
          if (ctxt->myDoc == NULL) {
5517
0
              xmlErrMemory(ctxt, "New Doc failed");
5518
0
        return;
5519
0
          }
5520
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5521
0
      }
5522
5523
0
      if (ctxt->myDoc->intSubset == NULL)
5524
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525
0
            BAD_CAST "fake", NULL, NULL);
5526
0
      xmlSAX2EntityDecl(ctxt, name,
5527
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5528
0
                  literal, URI, NULL);
5529
0
        }
5530
23.6k
    }
5531
28.6k
      }
5532
176k
  }
5533
213k
  if (ctxt->instate == XML_PARSER_EOF)
5534
6
      goto done;
5535
213k
  SKIP_BLANKS;
5536
213k
  if (RAW != '>') {
5537
6.63k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5538
6.63k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5539
6.63k
      xmlHaltParser(ctxt);
5540
206k
  } else {
5541
206k
      if (inputid != ctxt->input->id) {
5542
1.04k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5543
1.04k
                         "Entity declaration doesn't start and stop in"
5544
1.04k
                               " the same entity\n");
5545
1.04k
      }
5546
206k
      NEXT;
5547
206k
  }
5548
213k
  if (orig != NULL) {
5549
      /*
5550
       * Ugly mechanism to save the raw entity value.
5551
       */
5552
150k
      xmlEntityPtr cur = NULL;
5553
5554
150k
      if (isParameter) {
5555
20.3k
          if ((ctxt->sax != NULL) &&
5556
20.3k
        (ctxt->sax->getParameterEntity != NULL))
5557
20.3k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5558
130k
      } else {
5559
130k
          if ((ctxt->sax != NULL) &&
5560
130k
        (ctxt->sax->getEntity != NULL))
5561
130k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5562
130k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5563
0
        cur = xmlSAX2GetEntity(ctxt, name);
5564
0
    }
5565
130k
      }
5566
150k
            if ((cur != NULL) && (cur->orig == NULL)) {
5567
62.7k
    cur->orig = orig;
5568
62.7k
                orig = NULL;
5569
62.7k
      }
5570
150k
  }
5571
5572
213k
done:
5573
213k
  if (value != NULL) xmlFree(value);
5574
213k
  if (URI != NULL) xmlFree(URI);
5575
213k
  if (literal != NULL) xmlFree(literal);
5576
213k
        if (orig != NULL) xmlFree(orig);
5577
213k
    }
5578
224k
}
5579
5580
/**
5581
 * xmlParseDefaultDecl:
5582
 * @ctxt:  an XML parser context
5583
 * @value:  Receive a possible fixed default value for the attribute
5584
 *
5585
 * Parse an attribute default declaration
5586
 *
5587
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5588
 *
5589
 * [ VC: Required Attribute ]
5590
 * if the default declaration is the keyword #REQUIRED, then the
5591
 * attribute must be specified for all elements of the type in the
5592
 * attribute-list declaration.
5593
 *
5594
 * [ VC: Attribute Default Legal ]
5595
 * The declared default value must meet the lexical constraints of
5596
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5597
 *
5598
 * [ VC: Fixed Attribute Default ]
5599
 * if an attribute has a default value declared with the #FIXED
5600
 * keyword, instances of that attribute must match the default value.
5601
 *
5602
 * [ WFC: No < in Attribute Values ]
5603
 * handled in xmlParseAttValue()
5604
 *
5605
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5606
 *          or XML_ATTRIBUTE_FIXED.
5607
 */
5608
5609
int
5610
238k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5611
238k
    int val;
5612
238k
    xmlChar *ret;
5613
5614
238k
    *value = NULL;
5615
238k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5616
2.86k
  SKIP(9);
5617
2.86k
  return(XML_ATTRIBUTE_REQUIRED);
5618
2.86k
    }
5619
235k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5620
25.6k
  SKIP(8);
5621
25.6k
  return(XML_ATTRIBUTE_IMPLIED);
5622
25.6k
    }
5623
209k
    val = XML_ATTRIBUTE_NONE;
5624
209k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5625
4.20k
  SKIP(6);
5626
4.20k
  val = XML_ATTRIBUTE_FIXED;
5627
4.20k
  if (SKIP_BLANKS == 0) {
5628
430
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5629
430
         "Space required after '#FIXED'\n");
5630
430
  }
5631
4.20k
    }
5632
209k
    ret = xmlParseAttValue(ctxt);
5633
209k
    ctxt->instate = XML_PARSER_DTD;
5634
209k
    if (ret == NULL) {
5635
26.6k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5636
26.6k
           "Attribute default value declaration error\n");
5637
26.6k
    } else
5638
183k
        *value = ret;
5639
209k
    return(val);
5640
235k
}
5641
5642
/**
5643
 * xmlParseNotationType:
5644
 * @ctxt:  an XML parser context
5645
 *
5646
 * parse an Notation attribute type.
5647
 *
5648
 * Note: the leading 'NOTATION' S part has already being parsed...
5649
 *
5650
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5651
 *
5652
 * [ VC: Notation Attributes ]
5653
 * Values of this type must match one of the notation names included
5654
 * in the declaration; all notation names in the declaration must be declared.
5655
 *
5656
 * Returns: the notation attribute tree built while parsing
5657
 */
5658
5659
xmlEnumerationPtr
5660
22.3k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5661
22.3k
    const xmlChar *name;
5662
22.3k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5663
5664
22.3k
    if (RAW != '(') {
5665
1.20k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5666
1.20k
  return(NULL);
5667
1.20k
    }
5668
21.1k
    SHRINK;
5669
28.3k
    do {
5670
28.3k
        NEXT;
5671
28.3k
  SKIP_BLANKS;
5672
28.3k
        name = xmlParseName(ctxt);
5673
28.3k
  if (name == NULL) {
5674
4.29k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5675
4.29k
         "Name expected in NOTATION declaration\n");
5676
4.29k
            xmlFreeEnumeration(ret);
5677
4.29k
      return(NULL);
5678
4.29k
  }
5679
24.0k
  tmp = ret;
5680
50.5k
  while (tmp != NULL) {
5681
31.2k
      if (xmlStrEqual(name, tmp->name)) {
5682
4.76k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5683
4.76k
    "standalone: attribute notation value token %s duplicated\n",
5684
4.76k
         name, NULL);
5685
4.76k
    if (!xmlDictOwns(ctxt->dict, name))
5686
0
        xmlFree((xmlChar *) name);
5687
4.76k
    break;
5688
4.76k
      }
5689
26.4k
      tmp = tmp->next;
5690
26.4k
  }
5691
24.0k
  if (tmp == NULL) {
5692
19.2k
      cur = xmlCreateEnumeration(name);
5693
19.2k
      if (cur == NULL) {
5694
0
                xmlFreeEnumeration(ret);
5695
0
                return(NULL);
5696
0
            }
5697
19.2k
      if (last == NULL) ret = last = cur;
5698
2.32k
      else {
5699
2.32k
    last->next = cur;
5700
2.32k
    last = cur;
5701
2.32k
      }
5702
19.2k
  }
5703
24.0k
  SKIP_BLANKS;
5704
24.0k
    } while (RAW == '|');
5705
16.8k
    if (RAW != ')') {
5706
1.65k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5707
1.65k
        xmlFreeEnumeration(ret);
5708
1.65k
  return(NULL);
5709
1.65k
    }
5710
15.2k
    NEXT;
5711
15.2k
    return(ret);
5712
16.8k
}
5713
5714
/**
5715
 * xmlParseEnumerationType:
5716
 * @ctxt:  an XML parser context
5717
 *
5718
 * parse an Enumeration attribute type.
5719
 *
5720
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5721
 *
5722
 * [ VC: Enumeration ]
5723
 * Values of this type must match one of the Nmtoken tokens in
5724
 * the declaration
5725
 *
5726
 * Returns: the enumeration attribute tree built while parsing
5727
 */
5728
5729
xmlEnumerationPtr
5730
113k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5731
113k
    xmlChar *name;
5732
113k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5733
5734
113k
    if (RAW != '(') {
5735
29.2k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5736
29.2k
  return(NULL);
5737
29.2k
    }
5738
84.1k
    SHRINK;
5739
115k
    do {
5740
115k
        NEXT;
5741
115k
  SKIP_BLANKS;
5742
115k
        name = xmlParseNmtoken(ctxt);
5743
115k
  if (name == NULL) {
5744
2.56k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5745
2.56k
      return(ret);
5746
2.56k
  }
5747
113k
  tmp = ret;
5748
197k
  while (tmp != NULL) {
5749
108k
      if (xmlStrEqual(name, tmp->name)) {
5750
23.9k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5751
23.9k
    "standalone: attribute enumeration value token %s duplicated\n",
5752
23.9k
         name, NULL);
5753
23.9k
    if (!xmlDictOwns(ctxt->dict, name))
5754
23.9k
        xmlFree(name);
5755
23.9k
    break;
5756
23.9k
      }
5757
84.4k
      tmp = tmp->next;
5758
84.4k
  }
5759
113k
  if (tmp == NULL) {
5760
89.4k
      cur = xmlCreateEnumeration(name);
5761
89.4k
      if (!xmlDictOwns(ctxt->dict, name))
5762
89.4k
    xmlFree(name);
5763
89.4k
      if (cur == NULL) {
5764
0
                xmlFreeEnumeration(ret);
5765
0
                return(NULL);
5766
0
            }
5767
89.4k
      if (last == NULL) ret = last = cur;
5768
7.56k
      else {
5769
7.56k
    last->next = cur;
5770
7.56k
    last = cur;
5771
7.56k
      }
5772
89.4k
  }
5773
113k
  SKIP_BLANKS;
5774
113k
    } while (RAW == '|');
5775
81.5k
    if (RAW != ')') {
5776
6.12k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5777
6.12k
  return(ret);
5778
6.12k
    }
5779
75.4k
    NEXT;
5780
75.4k
    return(ret);
5781
81.5k
}
5782
5783
/**
5784
 * xmlParseEnumeratedType:
5785
 * @ctxt:  an XML parser context
5786
 * @tree:  the enumeration tree built while parsing
5787
 *
5788
 * parse an Enumerated attribute type.
5789
 *
5790
 * [57] EnumeratedType ::= NotationType | Enumeration
5791
 *
5792
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5793
 *
5794
 *
5795
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5796
 */
5797
5798
int
5799
136k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5800
136k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5801
23.3k
  SKIP(8);
5802
23.3k
  if (SKIP_BLANKS == 0) {
5803
1.01k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5804
1.01k
         "Space required after 'NOTATION'\n");
5805
1.01k
      return(0);
5806
1.01k
  }
5807
22.3k
  *tree = xmlParseNotationType(ctxt);
5808
22.3k
  if (*tree == NULL) return(0);
5809
15.2k
  return(XML_ATTRIBUTE_NOTATION);
5810
22.3k
    }
5811
113k
    *tree = xmlParseEnumerationType(ctxt);
5812
113k
    if (*tree == NULL) return(0);
5813
81.8k
    return(XML_ATTRIBUTE_ENUMERATION);
5814
113k
}
5815
5816
/**
5817
 * xmlParseAttributeType:
5818
 * @ctxt:  an XML parser context
5819
 * @tree:  the enumeration tree built while parsing
5820
 *
5821
 * parse the Attribute list def for an element
5822
 *
5823
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5824
 *
5825
 * [55] StringType ::= 'CDATA'
5826
 *
5827
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5828
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5829
 *
5830
 * Validity constraints for attribute values syntax are checked in
5831
 * xmlValidateAttributeValue()
5832
 *
5833
 * [ VC: ID ]
5834
 * Values of type ID must match the Name production. A name must not
5835
 * appear more than once in an XML document as a value of this type;
5836
 * i.e., ID values must uniquely identify the elements which bear them.
5837
 *
5838
 * [ VC: One ID per Element Type ]
5839
 * No element type may have more than one ID attribute specified.
5840
 *
5841
 * [ VC: ID Attribute Default ]
5842
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5843
 *
5844
 * [ VC: IDREF ]
5845
 * Values of type IDREF must match the Name production, and values
5846
 * of type IDREFS must match Names; each IDREF Name must match the value
5847
 * of an ID attribute on some element in the XML document; i.e. IDREF
5848
 * values must match the value of some ID attribute.
5849
 *
5850
 * [ VC: Entity Name ]
5851
 * Values of type ENTITY must match the Name production, values
5852
 * of type ENTITIES must match Names; each Entity Name must match the
5853
 * name of an unparsed entity declared in the DTD.
5854
 *
5855
 * [ VC: Name Token ]
5856
 * Values of type NMTOKEN must match the Nmtoken production; values
5857
 * of type NMTOKENS must match Nmtokens.
5858
 *
5859
 * Returns the attribute type
5860
 */
5861
int
5862
289k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5863
289k
    SHRINK;
5864
289k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5865
15.9k
  SKIP(5);
5866
15.9k
  return(XML_ATTRIBUTE_CDATA);
5867
273k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5868
32.3k
  SKIP(6);
5869
32.3k
  return(XML_ATTRIBUTE_IDREFS);
5870
240k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5871
1.95k
  SKIP(5);
5872
1.95k
  return(XML_ATTRIBUTE_IDREF);
5873
238k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5874
77.9k
        SKIP(2);
5875
77.9k
  return(XML_ATTRIBUTE_ID);
5876
160k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5877
4.53k
  SKIP(6);
5878
4.53k
  return(XML_ATTRIBUTE_ENTITY);
5879
156k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5880
2.86k
  SKIP(8);
5881
2.86k
  return(XML_ATTRIBUTE_ENTITIES);
5882
153k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5883
733
  SKIP(8);
5884
733
  return(XML_ATTRIBUTE_NMTOKENS);
5885
152k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5886
16.0k
  SKIP(7);
5887
16.0k
  return(XML_ATTRIBUTE_NMTOKEN);
5888
16.0k
     }
5889
136k
     return(xmlParseEnumeratedType(ctxt, tree));
5890
289k
}
5891
5892
/**
5893
 * xmlParseAttributeListDecl:
5894
 * @ctxt:  an XML parser context
5895
 *
5896
 * : parse the Attribute list def for an element
5897
 *
5898
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5899
 *
5900
 * [53] AttDef ::= S Name S AttType S DefaultDecl
5901
 *
5902
 */
5903
void
5904
218k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5905
218k
    const xmlChar *elemName;
5906
218k
    const xmlChar *attrName;
5907
218k
    xmlEnumerationPtr tree;
5908
5909
218k
    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5910
209k
  int inputid = ctxt->input->id;
5911
5912
209k
  SKIP(9);
5913
209k
  if (SKIP_BLANKS == 0) {
5914
163k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5915
163k
                     "Space required after '<!ATTLIST'\n");
5916
163k
  }
5917
209k
        elemName = xmlParseName(ctxt);
5918
209k
  if (elemName == NULL) {
5919
2.35k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5920
2.35k
         "ATTLIST: no name for Element\n");
5921
2.35k
      return;
5922
2.35k
  }
5923
207k
  SKIP_BLANKS;
5924
207k
  GROW;
5925
409k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5926
374k
      int type;
5927
374k
      int def;
5928
374k
      xmlChar *defaultValue = NULL;
5929
5930
374k
      GROW;
5931
374k
            tree = NULL;
5932
374k
      attrName = xmlParseName(ctxt);
5933
374k
      if (attrName == NULL) {
5934
70.6k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5935
70.6k
             "ATTLIST: no name for Attribute\n");
5936
70.6k
    break;
5937
70.6k
      }
5938
304k
      GROW;
5939
304k
      if (SKIP_BLANKS == 0) {
5940
14.9k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5941
14.9k
            "Space required after the attribute name\n");
5942
14.9k
    break;
5943
14.9k
      }
5944
5945
289k
      type = xmlParseAttributeType(ctxt, &tree);
5946
289k
      if (type <= 0) {
5947
39.6k
          break;
5948
39.6k
      }
5949
5950
249k
      GROW;
5951
249k
      if (SKIP_BLANKS == 0) {
5952
11.1k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5953
11.1k
             "Space required after the attribute type\n");
5954
11.1k
          if (tree != NULL)
5955
6.66k
        xmlFreeEnumeration(tree);
5956
11.1k
    break;
5957
11.1k
      }
5958
5959
238k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
5960
238k
      if (def <= 0) {
5961
0
                if (defaultValue != NULL)
5962
0
        xmlFree(defaultValue);
5963
0
          if (tree != NULL)
5964
0
        xmlFreeEnumeration(tree);
5965
0
          break;
5966
0
      }
5967
238k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5968
173k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
5969
5970
238k
      GROW;
5971
238k
            if (RAW != '>') {
5972
214k
    if (SKIP_BLANKS == 0) {
5973
35.7k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5974
35.7k
      "Space required after the attribute default value\n");
5975
35.7k
        if (defaultValue != NULL)
5976
11.4k
      xmlFree(defaultValue);
5977
35.7k
        if (tree != NULL)
5978
15.7k
      xmlFreeEnumeration(tree);
5979
35.7k
        break;
5980
35.7k
    }
5981
214k
      }
5982
202k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5983
202k
    (ctxt->sax->attributeDecl != NULL))
5984
202k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5985
202k
                          type, def, defaultValue, tree);
5986
0
      else if (tree != NULL)
5987
0
    xmlFreeEnumeration(tree);
5988
5989
202k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
5990
171k
          (def != XML_ATTRIBUTE_IMPLIED) &&
5991
171k
    (def != XML_ATTRIBUTE_REQUIRED)) {
5992
171k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5993
171k
      }
5994
202k
      if (ctxt->sax2) {
5995
202k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5996
202k
      }
5997
202k
      if (defaultValue != NULL)
5998
171k
          xmlFree(defaultValue);
5999
202k
      GROW;
6000
202k
  }
6001
207k
  if (RAW == '>') {
6002
38.2k
      if (inputid != ctxt->input->id) {
6003
2.97k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6004
2.97k
                               "Attribute list declaration doesn't start and"
6005
2.97k
                               " stop in the same entity\n");
6006
2.97k
      }
6007
38.2k
      NEXT;
6008
38.2k
  }
6009
207k
    }
6010
218k
}
6011
6012
/**
6013
 * xmlParseElementMixedContentDecl:
6014
 * @ctxt:  an XML parser context
6015
 * @inputchk:  the input used for the current entity, needed for boundary checks
6016
 *
6017
 * parse the declaration for a Mixed Element content
6018
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6019
 *
6020
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6021
 *                '(' S? '#PCDATA' S? ')'
6022
 *
6023
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6024
 *
6025
 * [ VC: No Duplicate Types ]
6026
 * The same name must not appear more than once in a single
6027
 * mixed-content declaration.
6028
 *
6029
 * returns: the list of the xmlElementContentPtr describing the element choices
6030
 */
6031
xmlElementContentPtr
6032
25.5k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6033
25.5k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6034
25.5k
    const xmlChar *elem = NULL;
6035
6036
25.5k
    GROW;
6037
25.5k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6038
25.5k
  SKIP(7);
6039
25.5k
  SKIP_BLANKS;
6040
25.5k
  SHRINK;
6041
25.5k
  if (RAW == ')') {
6042
11.7k
      if (ctxt->input->id != inputchk) {
6043
3.91k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6044
3.91k
                               "Element content declaration doesn't start and"
6045
3.91k
                               " stop in the same entity\n");
6046
3.91k
      }
6047
11.7k
      NEXT;
6048
11.7k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6049
11.7k
      if (ret == NULL)
6050
0
          return(NULL);
6051
11.7k
      if (RAW == '*') {
6052
1.30k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6053
1.30k
    NEXT;
6054
1.30k
      }
6055
11.7k
      return(ret);
6056
11.7k
  }
6057
13.8k
  if ((RAW == '(') || (RAW == '|')) {
6058
10.6k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6059
10.6k
      if (ret == NULL) return(NULL);
6060
10.6k
  }
6061
30.9k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6062
18.1k
      NEXT;
6063
18.1k
      if (elem == NULL) {
6064
10.0k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6065
10.0k
    if (ret == NULL) return(NULL);
6066
10.0k
    ret->c1 = cur;
6067
10.0k
    if (cur != NULL)
6068
10.0k
        cur->parent = ret;
6069
10.0k
    cur = ret;
6070
10.0k
      } else {
6071
8.09k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6072
8.09k
    if (n == NULL) return(NULL);
6073
8.09k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6074
8.09k
    if (n->c1 != NULL)
6075
8.09k
        n->c1->parent = n;
6076
8.09k
          cur->c2 = n;
6077
8.09k
    if (n != NULL)
6078
8.09k
        n->parent = cur;
6079
8.09k
    cur = n;
6080
8.09k
      }
6081
18.1k
      SKIP_BLANKS;
6082
18.1k
      elem = xmlParseName(ctxt);
6083
18.1k
      if (elem == NULL) {
6084
1.04k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6085
1.04k
      "xmlParseElementMixedContentDecl : Name expected\n");
6086
1.04k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6087
1.04k
    return(NULL);
6088
1.04k
      }
6089
17.1k
      SKIP_BLANKS;
6090
17.1k
      GROW;
6091
17.1k
  }
6092
12.7k
  if ((RAW == ')') && (NXT(1) == '*')) {
6093
1.53k
      if (elem != NULL) {
6094
1.53k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6095
1.53k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6096
1.53k
    if (cur->c2 != NULL)
6097
1.53k
        cur->c2->parent = cur;
6098
1.53k
            }
6099
1.53k
            if (ret != NULL)
6100
1.53k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6101
1.53k
      if (ctxt->input->id != inputchk) {
6102
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6103
0
                               "Element content declaration doesn't start and"
6104
0
                               " stop in the same entity\n");
6105
0
      }
6106
1.53k
      SKIP(2);
6107
11.2k
  } else {
6108
11.2k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6109
11.2k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6110
11.2k
      return(NULL);
6111
11.2k
  }
6112
6113
12.7k
    } else {
6114
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6115
0
    }
6116
1.53k
    return(ret);
6117
25.5k
}
6118
6119
/**
6120
 * xmlParseElementChildrenContentDeclPriv:
6121
 * @ctxt:  an XML parser context
6122
 * @inputchk:  the input used for the current entity, needed for boundary checks
6123
 * @depth: the level of recursion
6124
 *
6125
 * parse the declaration for a Mixed Element content
6126
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6127
 *
6128
 *
6129
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6130
 *
6131
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6132
 *
6133
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6134
 *
6135
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6136
 *
6137
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6138
 * TODO Parameter-entity replacement text must be properly nested
6139
 *  with parenthesized groups. That is to say, if either of the
6140
 *  opening or closing parentheses in a choice, seq, or Mixed
6141
 *  construct is contained in the replacement text for a parameter
6142
 *  entity, both must be contained in the same replacement text. For
6143
 *  interoperability, if a parameter-entity reference appears in a
6144
 *  choice, seq, or Mixed construct, its replacement text should not
6145
 *  be empty, and neither the first nor last non-blank character of
6146
 *  the replacement text should be a connector (| or ,).
6147
 *
6148
 * Returns the tree of xmlElementContentPtr describing the element
6149
 *          hierarchy.
6150
 */
6151
static xmlElementContentPtr
6152
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6153
370k
                                       int depth) {
6154
370k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6155
370k
    const xmlChar *elem;
6156
370k
    xmlChar type = 0;
6157
6158
370k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6159
370k
        (depth >  2048)) {
6160
482
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6161
482
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6162
482
                          depth);
6163
482
  return(NULL);
6164
482
    }
6165
370k
    SKIP_BLANKS;
6166
370k
    GROW;
6167
370k
    if (RAW == '(') {
6168
231k
  int inputid = ctxt->input->id;
6169
6170
        /* Recurse on first child */
6171
231k
  NEXT;
6172
231k
  SKIP_BLANKS;
6173
231k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6174
231k
                                                           depth + 1);
6175
231k
  SKIP_BLANKS;
6176
231k
  GROW;
6177
231k
    } else {
6178
139k
  elem = xmlParseName(ctxt);
6179
139k
  if (elem == NULL) {
6180
34.7k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6181
34.7k
      return(NULL);
6182
34.7k
  }
6183
104k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6184
104k
  if (cur == NULL) {
6185
0
      xmlErrMemory(ctxt, NULL);
6186
0
      return(NULL);
6187
0
  }
6188
104k
  GROW;
6189
104k
  if (RAW == '?') {
6190
5.87k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6191
5.87k
      NEXT;
6192
98.6k
  } else if (RAW == '*') {
6193
11.9k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6194
11.9k
      NEXT;
6195
86.7k
  } else if (RAW == '+') {
6196
1.71k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6197
1.71k
      NEXT;
6198
85.0k
  } else {
6199
85.0k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6200
85.0k
  }
6201
104k
  GROW;
6202
104k
    }
6203
335k
    SKIP_BLANKS;
6204
335k
    SHRINK;
6205
478k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6206
        /*
6207
   * Each loop we parse one separator and one element.
6208
   */
6209
387k
        if (RAW == ',') {
6210
64.0k
      if (type == 0) type = CUR;
6211
6212
      /*
6213
       * Detect "Name | Name , Name" error
6214
       */
6215
19.1k
      else if (type != CUR) {
6216
4.57k
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6217
4.57k
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6218
4.57k
                      type);
6219
4.57k
    if ((last != NULL) && (last != ret))
6220
1.94k
        xmlFreeDocElementContent(ctxt->myDoc, last);
6221
4.57k
    if (ret != NULL)
6222
4.57k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6223
4.57k
    return(NULL);
6224
4.57k
      }
6225
59.5k
      NEXT;
6226
6227
59.5k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6228
59.5k
      if (op == NULL) {
6229
0
    if ((last != NULL) && (last != ret))
6230
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6231
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6232
0
    return(NULL);
6233
0
      }
6234
59.5k
      if (last == NULL) {
6235
51.3k
    op->c1 = ret;
6236
51.3k
    if (ret != NULL)
6237
37.9k
        ret->parent = op;
6238
51.3k
    ret = cur = op;
6239
51.3k
      } else {
6240
8.12k
          cur->c2 = op;
6241
8.12k
    if (op != NULL)
6242
8.12k
        op->parent = cur;
6243
8.12k
    op->c1 = last;
6244
8.12k
    if (last != NULL)
6245
8.12k
        last->parent = op;
6246
8.12k
    cur =op;
6247
8.12k
    last = NULL;
6248
8.12k
      }
6249
323k
  } else if (RAW == '|') {
6250
113k
      if (type == 0) type = CUR;
6251
6252
      /*
6253
       * Detect "Name , Name | Name" error
6254
       */
6255
42.8k
      else if (type != CUR) {
6256
10.3k
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6257
10.3k
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6258
10.3k
          type);
6259
10.3k
    if ((last != NULL) && (last != ret))
6260
3.63k
        xmlFreeDocElementContent(ctxt->myDoc, last);
6261
10.3k
    if (ret != NULL)
6262
10.3k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6263
10.3k
    return(NULL);
6264
10.3k
      }
6265
103k
      NEXT;
6266
6267
103k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6268
103k
      if (op == NULL) {
6269
0
    if ((last != NULL) && (last != ret))
6270
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6271
0
    if (ret != NULL)
6272
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6273
0
    return(NULL);
6274
0
      }
6275
103k
      if (last == NULL) {
6276
75.1k
    op->c1 = ret;
6277
75.1k
    if (ret != NULL)
6278
57.1k
        ret->parent = op;
6279
75.1k
    ret = cur = op;
6280
75.1k
      } else {
6281
28.1k
          cur->c2 = op;
6282
28.1k
    if (op != NULL)
6283
28.1k
        op->parent = cur;
6284
28.1k
    op->c1 = last;
6285
28.1k
    if (last != NULL)
6286
28.1k
        last->parent = op;
6287
28.1k
    cur =op;
6288
28.1k
    last = NULL;
6289
28.1k
      }
6290
210k
  } else {
6291
210k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6292
210k
      if ((last != NULL) && (last != ret))
6293
6.04k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6294
210k
      if (ret != NULL)
6295
62.9k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6296
210k
      return(NULL);
6297
210k
  }
6298
162k
  GROW;
6299
162k
  SKIP_BLANKS;
6300
162k
  GROW;
6301
162k
  if (RAW == '(') {
6302
93.5k
      int inputid = ctxt->input->id;
6303
      /* Recurse on second child */
6304
93.5k
      NEXT;
6305
93.5k
      SKIP_BLANKS;
6306
93.5k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6307
93.5k
                                                          depth + 1);
6308
93.5k
      SKIP_BLANKS;
6309
93.5k
  } else {
6310
69.1k
      elem = xmlParseName(ctxt);
6311
69.1k
      if (elem == NULL) {
6312
19.4k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6313
19.4k
    if (ret != NULL)
6314
19.4k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6315
19.4k
    return(NULL);
6316
19.4k
      }
6317
49.7k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6318
49.7k
      if (last == NULL) {
6319
0
    if (ret != NULL)
6320
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6321
0
    return(NULL);
6322
0
      }
6323
49.7k
      if (RAW == '?') {
6324
10.4k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6325
10.4k
    NEXT;
6326
39.3k
      } else if (RAW == '*') {
6327
2.86k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6328
2.86k
    NEXT;
6329
36.4k
      } else if (RAW == '+') {
6330
1.72k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6331
1.72k
    NEXT;
6332
34.7k
      } else {
6333
34.7k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6334
34.7k
      }
6335
49.7k
  }
6336
143k
  SKIP_BLANKS;
6337
143k
  GROW;
6338
143k
    }
6339
90.9k
    if ((cur != NULL) && (last != NULL)) {
6340
33.5k
        cur->c2 = last;
6341
33.5k
  if (last != NULL)
6342
33.5k
      last->parent = cur;
6343
33.5k
    }
6344
90.9k
    if (ctxt->input->id != inputchk) {
6345
1.79k
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6346
1.79k
                       "Element content declaration doesn't start and stop in"
6347
1.79k
                       " the same entity\n");
6348
1.79k
    }
6349
90.9k
    NEXT;
6350
90.9k
    if (RAW == '?') {
6351
15.1k
  if (ret != NULL) {
6352
14.1k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6353
13.1k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6354
4.72k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6355
9.40k
      else
6356
9.40k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6357
14.1k
  }
6358
15.1k
  NEXT;
6359
75.7k
    } else if (RAW == '*') {
6360
21.3k
  if (ret != NULL) {
6361
17.7k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6362
17.7k
      cur = ret;
6363
      /*
6364
       * Some normalization:
6365
       * (a | b* | c?)* == (a | b | c)*
6366
       */
6367
58.6k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6368
40.8k
    if ((cur->c1 != NULL) &&
6369
34.9k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6370
31.6k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6371
11.2k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6372
40.8k
    if ((cur->c2 != NULL) &&
6373
37.0k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6374
33.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6375
8.75k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6376
40.8k
    cur = cur->c2;
6377
40.8k
      }
6378
17.7k
  }
6379
21.3k
  NEXT;
6380
54.4k
    } else if (RAW == '+') {
6381
19.4k
  if (ret != NULL) {
6382
17.7k
      int found = 0;
6383
6384
17.7k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6385
15.2k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6386
7.50k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6387
10.2k
      else
6388
10.2k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6389
      /*
6390
       * Some normalization:
6391
       * (a | b*)+ == (a | b)*
6392
       * (a | b?)+ == (a | b)*
6393
       */
6394
61.4k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6395
43.6k
    if ((cur->c1 != NULL) &&
6396
31.9k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6397
29.5k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6398
6.58k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6399
6.58k
        found = 1;
6400
6.58k
    }
6401
43.6k
    if ((cur->c2 != NULL) &&
6402
38.2k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6403
33.9k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6404
7.54k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6405
7.54k
        found = 1;
6406
7.54k
    }
6407
43.6k
    cur = cur->c2;
6408
43.6k
      }
6409
17.7k
      if (found)
6410
8.83k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6411
17.7k
  }
6412
19.4k
  NEXT;
6413
19.4k
    }
6414
90.9k
    return(ret);
6415
335k
}
6416
6417
/**
6418
 * xmlParseElementChildrenContentDecl:
6419
 * @ctxt:  an XML parser context
6420
 * @inputchk:  the input used for the current entity, needed for boundary checks
6421
 *
6422
 * parse the declaration for a Mixed Element content
6423
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6424
 *
6425
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6426
 *
6427
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6428
 *
6429
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6430
 *
6431
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6432
 *
6433
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6434
 * TODO Parameter-entity replacement text must be properly nested
6435
 *  with parenthesized groups. That is to say, if either of the
6436
 *  opening or closing parentheses in a choice, seq, or Mixed
6437
 *  construct is contained in the replacement text for a parameter
6438
 *  entity, both must be contained in the same replacement text. For
6439
 *  interoperability, if a parameter-entity reference appears in a
6440
 *  choice, seq, or Mixed construct, its replacement text should not
6441
 *  be empty, and neither the first nor last non-blank character of
6442
 *  the replacement text should be a connector (| or ,).
6443
 *
6444
 * Returns the tree of xmlElementContentPtr describing the element
6445
 *          hierarchy.
6446
 */
6447
xmlElementContentPtr
6448
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6449
    /* stub left for API/ABI compat */
6450
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6451
0
}
6452
6453
/**
6454
 * xmlParseElementContentDecl:
6455
 * @ctxt:  an XML parser context
6456
 * @name:  the name of the element being defined.
6457
 * @result:  the Element Content pointer will be stored here if any
6458
 *
6459
 * parse the declaration for an Element content either Mixed or Children,
6460
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6461
 *
6462
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6463
 *
6464
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6465
 */
6466
6467
int
6468
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6469
71.7k
                           xmlElementContentPtr *result) {
6470
6471
71.7k
    xmlElementContentPtr tree = NULL;
6472
71.7k
    int inputid = ctxt->input->id;
6473
71.7k
    int res;
6474
6475
71.7k
    *result = NULL;
6476
6477
71.7k
    if (RAW != '(') {
6478
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6479
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6480
0
  return(-1);
6481
0
    }
6482
71.7k
    NEXT;
6483
71.7k
    GROW;
6484
71.7k
    if (ctxt->instate == XML_PARSER_EOF)
6485
0
        return(-1);
6486
71.7k
    SKIP_BLANKS;
6487
71.7k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6488
25.5k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6489
25.5k
  res = XML_ELEMENT_TYPE_MIXED;
6490
46.2k
    } else {
6491
46.2k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6492
46.2k
  res = XML_ELEMENT_TYPE_ELEMENT;
6493
46.2k
    }
6494
71.7k
    SKIP_BLANKS;
6495
71.7k
    *result = tree;
6496
71.7k
    return(res);
6497
71.7k
}
6498
6499
/**
6500
 * xmlParseElementDecl:
6501
 * @ctxt:  an XML parser context
6502
 *
6503
 * parse an Element declaration.
6504
 *
6505
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6506
 *
6507
 * [ VC: Unique Element Type Declaration ]
6508
 * No element type may be declared more than once
6509
 *
6510
 * Returns the type of the element, or -1 in case of error
6511
 */
6512
int
6513
90.5k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6514
90.5k
    const xmlChar *name;
6515
90.5k
    int ret = -1;
6516
90.5k
    xmlElementContentPtr content  = NULL;
6517
6518
    /* GROW; done in the caller */
6519
90.5k
    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6520
83.4k
  int inputid = ctxt->input->id;
6521
6522
83.4k
  SKIP(9);
6523
83.4k
  if (SKIP_BLANKS == 0) {
6524
1.52k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6525
1.52k
               "Space required after 'ELEMENT'\n");
6526
1.52k
      return(-1);
6527
1.52k
  }
6528
81.9k
        name = xmlParseName(ctxt);
6529
81.9k
  if (name == NULL) {
6530
2.14k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6531
2.14k
         "xmlParseElementDecl: no name for Element\n");
6532
2.14k
      return(-1);
6533
2.14k
  }
6534
79.7k
  if (SKIP_BLANKS == 0) {
6535
45.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6536
45.9k
         "Space required after the element name\n");
6537
45.9k
  }
6538
79.7k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6539
754
      SKIP(5);
6540
      /*
6541
       * Element must always be empty.
6542
       */
6543
754
      ret = XML_ELEMENT_TYPE_EMPTY;
6544
79.0k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6545
885
             (NXT(2) == 'Y')) {
6546
436
      SKIP(3);
6547
      /*
6548
       * Element is a generic container.
6549
       */
6550
436
      ret = XML_ELEMENT_TYPE_ANY;
6551
78.5k
  } else if (RAW == '(') {
6552
71.7k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6553
71.7k
  } else {
6554
      /*
6555
       * [ WFC: PEs in Internal Subset ] error handling.
6556
       */
6557
6.79k
      if ((RAW == '%') && (ctxt->external == 0) &&
6558
433
          (ctxt->inputNr == 1)) {
6559
433
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6560
433
    "PEReference: forbidden within markup decl in internal subset\n");
6561
6.36k
      } else {
6562
6.36k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6563
6.36k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6564
6.36k
            }
6565
6.79k
      return(-1);
6566
6.79k
  }
6567
6568
72.9k
  SKIP_BLANKS;
6569
6570
72.9k
  if (RAW != '>') {
6571
42.5k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6572
42.5k
      if (content != NULL) {
6573
5.95k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6574
5.95k
      }
6575
42.5k
  } else {
6576
30.3k
      if (inputid != ctxt->input->id) {
6577
3.57k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6578
3.57k
                               "Element declaration doesn't start and stop in"
6579
3.57k
                               " the same entity\n");
6580
3.57k
      }
6581
6582
30.3k
      NEXT;
6583
30.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6584
30.3k
    (ctxt->sax->elementDecl != NULL)) {
6585
30.3k
    if (content != NULL)
6586
14.2k
        content->parent = NULL;
6587
30.3k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6588
30.3k
                           content);
6589
30.3k
    if ((content != NULL) && (content->parent == NULL)) {
6590
        /*
6591
         * this is a trick: if xmlAddElementDecl is called,
6592
         * instead of copying the full tree it is plugged directly
6593
         * if called from the parser. Avoid duplicating the
6594
         * interfaces or change the API/ABI
6595
         */
6596
12.2k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6597
12.2k
    }
6598
30.3k
      } else if (content != NULL) {
6599
0
    xmlFreeDocElementContent(ctxt->myDoc, content);
6600
0
      }
6601
30.3k
  }
6602
72.9k
    }
6603
80.0k
    return(ret);
6604
90.5k
}
6605
6606
/**
6607
 * xmlParseConditionalSections
6608
 * @ctxt:  an XML parser context
6609
 *
6610
 * [61] conditionalSect ::= includeSect | ignoreSect
6611
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6612
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6613
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6614
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6615
 */
6616
6617
static void
6618
235k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6619
235k
    int id = ctxt->input->id;
6620
6621
235k
    SKIP(3);
6622
235k
    SKIP_BLANKS;
6623
235k
    if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6624
229k
  SKIP(7);
6625
229k
  SKIP_BLANKS;
6626
229k
  if (RAW != '[') {
6627
20
      xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6628
20
      xmlHaltParser(ctxt);
6629
20
      return;
6630
229k
  } else {
6631
229k
      if (ctxt->input->id != id) {
6632
1
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6633
1
                         "All markup of the conditional section is not"
6634
1
                               " in the same entity\n");
6635
1
      }
6636
229k
      NEXT;
6637
229k
  }
6638
229k
  if (xmlParserDebugEntities) {
6639
0
      if ((ctxt->input != NULL) && (ctxt->input->filename))
6640
0
    xmlGenericError(xmlGenericErrorContext,
6641
0
      "%s(%d): ", ctxt->input->filename,
6642
0
      ctxt->input->line);
6643
0
      xmlGenericError(xmlGenericErrorContext,
6644
0
        "Entering INCLUDE Conditional Section\n");
6645
0
  }
6646
6647
229k
        SKIP_BLANKS;
6648
229k
        GROW;
6649
691k
  while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6650
462k
          (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6651
462k
      const xmlChar *check = CUR_PTR;
6652
462k
      unsigned int cons = ctxt->input->consumed;
6653
6654
462k
      if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6655
229k
    xmlParseConditionalSections(ctxt);
6656
229k
      } else
6657
232k
    xmlParseMarkupDecl(ctxt);
6658
6659
462k
            SKIP_BLANKS;
6660
462k
            GROW;
6661
6662
462k
      if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6663
326
    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6664
326
    xmlHaltParser(ctxt);
6665
326
    break;
6666
326
      }
6667
462k
  }
6668
229k
  if (xmlParserDebugEntities) {
6669
0
      if ((ctxt->input != NULL) && (ctxt->input->filename))
6670
0
    xmlGenericError(xmlGenericErrorContext,
6671
0
      "%s(%d): ", ctxt->input->filename,
6672
0
      ctxt->input->line);
6673
0
      xmlGenericError(xmlGenericErrorContext,
6674
0
        "Leaving INCLUDE Conditional Section\n");
6675
0
  }
6676
6677
229k
    } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6678
6.16k
  int state;
6679
6.16k
  xmlParserInputState instate;
6680
6.16k
  int depth = 0;
6681
6682
6.16k
  SKIP(6);
6683
6.16k
  SKIP_BLANKS;
6684
6.16k
  if (RAW != '[') {
6685
21
      xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6686
21
      xmlHaltParser(ctxt);
6687
21
      return;
6688
6.13k
  } else {
6689
6.13k
      if (ctxt->input->id != id) {
6690
215
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6691
215
                         "All markup of the conditional section is not"
6692
215
                               " in the same entity\n");
6693
215
      }
6694
6.13k
      NEXT;
6695
6.13k
  }
6696
6.13k
  if (xmlParserDebugEntities) {
6697
0
      if ((ctxt->input != NULL) && (ctxt->input->filename))
6698
0
    xmlGenericError(xmlGenericErrorContext,
6699
0
      "%s(%d): ", ctxt->input->filename,
6700
0
      ctxt->input->line);
6701
0
      xmlGenericError(xmlGenericErrorContext,
6702
0
        "Entering IGNORE Conditional Section\n");
6703
0
  }
6704
6705
  /*
6706
   * Parse up to the end of the conditional section
6707
   * But disable SAX event generating DTD building in the meantime
6708
   */
6709
6.13k
  state = ctxt->disableSAX;
6710
6.13k
  instate = ctxt->instate;
6711
6.13k
  if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6712
6.13k
  ctxt->instate = XML_PARSER_IGNORE;
6713
6714
2.22M
  while (((depth >= 0) && (RAW != 0)) &&
6715
2.22M
               (ctxt->instate != XML_PARSER_EOF)) {
6716
2.22M
    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6717
20.2k
      depth++;
6718
20.2k
      SKIP(3);
6719
20.2k
      continue;
6720
20.2k
    }
6721
2.20M
    if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6722
4.11k
      if (--depth >= 0) SKIP(3);
6723
4.11k
      continue;
6724
4.11k
    }
6725
2.19M
    NEXT;
6726
2.19M
    continue;
6727
2.20M
  }
6728
6729
6.13k
  ctxt->disableSAX = state;
6730
6.13k
  ctxt->instate = instate;
6731
6732
6.13k
  if (xmlParserDebugEntities) {
6733
0
      if ((ctxt->input != NULL) && (ctxt->input->filename))
6734
0
    xmlGenericError(xmlGenericErrorContext,
6735
0
      "%s(%d): ", ctxt->input->filename,
6736
0
      ctxt->input->line);
6737
0
      xmlGenericError(xmlGenericErrorContext,
6738
0
        "Leaving IGNORE Conditional Section\n");
6739
0
  }
6740
6741
6.13k
    } else {
6742
127
  xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6743
127
  xmlHaltParser(ctxt);
6744
127
  return;
6745
127
    }
6746
6747
235k
    if (RAW == 0)
6748
233k
        SHRINK;
6749
6750
235k
    if (RAW == 0) {
6751
233k
  xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6752
233k
    } else {
6753
2.01k
  if (ctxt->input->id != id) {
6754
129
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6755
129
                     "All markup of the conditional section is not in"
6756
129
                           " the same entity\n");
6757
129
  }
6758
2.01k
  if ((ctxt-> instate != XML_PARSER_EOF) &&
6759
2.01k
      ((ctxt->input->cur + 3) <= ctxt->input->end))
6760
2.01k
      SKIP(3);
6761
2.01k
    }
6762
235k
}
6763
6764
/**
6765
 * xmlParseMarkupDecl:
6766
 * @ctxt:  an XML parser context
6767
 *
6768
 * parse Markup declarations
6769
 *
6770
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6771
 *                     NotationDecl | PI | Comment
6772
 *
6773
 * [ VC: Proper Declaration/PE Nesting ]
6774
 * Parameter-entity replacement text must be properly nested with
6775
 * markup declarations. That is to say, if either the first character
6776
 * or the last character of a markup declaration (markupdecl above) is
6777
 * contained in the replacement text for a parameter-entity reference,
6778
 * both must be contained in the same replacement text.
6779
 *
6780
 * [ WFC: PEs in Internal Subset ]
6781
 * In the internal DTD subset, parameter-entity references can occur
6782
 * only where markup declarations can occur, not within markup declarations.
6783
 * (This does not apply to references that occur in external parameter
6784
 * entities or to the external subset.)
6785
 */
6786
void
6787
1.42M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6788
1.42M
    GROW;
6789
1.42M
    if (CUR == '<') {
6790
896k
        if (NXT(1) == '!') {
6791
848k
      switch (NXT(2)) {
6792
317k
          case 'E':
6793
317k
        if (NXT(3) == 'L')
6794
90.5k
      xmlParseElementDecl(ctxt);
6795
226k
        else if (NXT(3) == 'N')
6796
224k
      xmlParseEntityDecl(ctxt);
6797
317k
        break;
6798
218k
          case 'A':
6799
218k
        xmlParseAttributeListDecl(ctxt);
6800
218k
        break;
6801
50.5k
          case 'N':
6802
50.5k
        xmlParseNotationDecl(ctxt);
6803
50.5k
        break;
6804
256k
          case '-':
6805
256k
        xmlParseComment(ctxt);
6806
256k
        break;
6807
5.66k
    default:
6808
        /* there is an error but it will be detected later */
6809
5.66k
        break;
6810
848k
      }
6811
848k
  } else if (NXT(1) == '?') {
6812
41.3k
      xmlParsePI(ctxt);
6813
41.3k
  }
6814
896k
    }
6815
6816
    /*
6817
     * detect requirement to exit there and act accordingly
6818
     * and avoid having instate overriden later on
6819
     */
6820
1.42M
    if (ctxt->instate == XML_PARSER_EOF)
6821
6.63k
        return;
6822
6823
    /*
6824
     * Conditional sections are allowed from entities included
6825
     * by PE References in the internal subset.
6826
     */
6827
1.41M
    if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6828
745k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6829
5.43k
      xmlParseConditionalSections(ctxt);
6830
5.43k
  }
6831
745k
    }
6832
6833
1.41M
    ctxt->instate = XML_PARSER_DTD;
6834
1.41M
}
6835
6836
/**
6837
 * xmlParseTextDecl:
6838
 * @ctxt:  an XML parser context
6839
 *
6840
 * parse an XML declaration header for external entities
6841
 *
6842
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6843
 */
6844
6845
void
6846
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6847
0
    xmlChar *version;
6848
0
    const xmlChar *encoding;
6849
6850
    /*
6851
     * We know that '<?xml' is here.
6852
     */
6853
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6854
0
  SKIP(5);
6855
0
    } else {
6856
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6857
0
  return;
6858
0
    }
6859
6860
0
    if (SKIP_BLANKS == 0) {
6861
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6862
0
           "Space needed after '<?xml'\n");
6863
0
    }
6864
6865
    /*
6866
     * We may have the VersionInfo here.
6867
     */
6868
0
    version = xmlParseVersionInfo(ctxt);
6869
0
    if (version == NULL)
6870
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6871
0
    else {
6872
0
  if (SKIP_BLANKS == 0) {
6873
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6874
0
               "Space needed here\n");
6875
0
  }
6876
0
    }
6877
0
    ctxt->input->version = version;
6878
6879
    /*
6880
     * We must have the encoding declaration
6881
     */
6882
0
    encoding = xmlParseEncodingDecl(ctxt);
6883
0
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6884
  /*
6885
   * The XML REC instructs us to stop parsing right here
6886
   */
6887
0
        return;
6888
0
    }
6889
0
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6890
0
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6891
0
           "Missing encoding in text declaration\n");
6892
0
    }
6893
6894
0
    SKIP_BLANKS;
6895
0
    if ((RAW == '?') && (NXT(1) == '>')) {
6896
0
        SKIP(2);
6897
0
    } else if (RAW == '>') {
6898
        /* Deprecated old WD ... */
6899
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6900
0
  NEXT;
6901
0
    } else {
6902
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6903
0
  MOVETO_ENDTAG(CUR_PTR);
6904
0
  NEXT;
6905
0
    }
6906
0
}
6907
6908
/**
6909
 * xmlParseExternalSubset:
6910
 * @ctxt:  an XML parser context
6911
 * @ExternalID: the external identifier
6912
 * @SystemID: the system identifier (or URL)
6913
 *
6914
 * parse Markup declarations from an external subset
6915
 *
6916
 * [30] extSubset ::= textDecl? extSubsetDecl
6917
 *
6918
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6919
 */
6920
void
6921
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6922
0
                       const xmlChar *SystemID) {
6923
0
    xmlDetectSAX2(ctxt);
6924
0
    GROW;
6925
6926
0
    if ((ctxt->encoding == NULL) &&
6927
0
        (ctxt->input->end - ctxt->input->cur >= 4)) {
6928
0
        xmlChar start[4];
6929
0
  xmlCharEncoding enc;
6930
6931
0
  start[0] = RAW;
6932
0
  start[1] = NXT(1);
6933
0
  start[2] = NXT(2);
6934
0
  start[3] = NXT(3);
6935
0
  enc = xmlDetectCharEncoding(start, 4);
6936
0
  if (enc != XML_CHAR_ENCODING_NONE)
6937
0
      xmlSwitchEncoding(ctxt, enc);
6938
0
    }
6939
6940
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6941
0
  xmlParseTextDecl(ctxt);
6942
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6943
      /*
6944
       * The XML REC instructs us to stop parsing right here
6945
       */
6946
0
      xmlHaltParser(ctxt);
6947
0
      return;
6948
0
  }
6949
0
    }
6950
0
    if (ctxt->myDoc == NULL) {
6951
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6952
0
  if (ctxt->myDoc == NULL) {
6953
0
      xmlErrMemory(ctxt, "New Doc failed");
6954
0
      return;
6955
0
  }
6956
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
6957
0
    }
6958
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6959
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6960
6961
0
    ctxt->instate = XML_PARSER_DTD;
6962
0
    ctxt->external = 1;
6963
0
    SKIP_BLANKS;
6964
0
    while (((RAW == '<') && (NXT(1) == '?')) ||
6965
0
           ((RAW == '<') && (NXT(1) == '!')) ||
6966
0
     (RAW == '%')) {
6967
0
  const xmlChar *check = CUR_PTR;
6968
0
  unsigned int cons = ctxt->input->consumed;
6969
6970
0
  GROW;
6971
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6972
0
      xmlParseConditionalSections(ctxt);
6973
0
  } else
6974
0
      xmlParseMarkupDecl(ctxt);
6975
0
        SKIP_BLANKS;
6976
6977
0
  if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6978
0
      xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6979
0
      break;
6980
0
  }
6981
0
    }
6982
6983
0
    if (RAW != 0) {
6984
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6985
0
    }
6986
6987
0
}
6988
6989
/**
6990
 * xmlParseReference:
6991
 * @ctxt:  an XML parser context
6992
 *
6993
 * parse and handle entity references in content, depending on the SAX
6994
 * interface, this may end-up in a call to character() if this is a
6995
 * CharRef, a predefined entity, if there is no reference() callback.
6996
 * or if the parser was asked to switch to that mode.
6997
 *
6998
 * [67] Reference ::= EntityRef | CharRef
6999
 */
7000
void
7001
2.01M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7002
2.01M
    xmlEntityPtr ent;
7003
2.01M
    xmlChar *val;
7004
2.01M
    int was_checked;
7005
2.01M
    xmlNodePtr list = NULL;
7006
2.01M
    xmlParserErrors ret = XML_ERR_OK;
7007
7008
7009
2.01M
    if (RAW != '&')
7010
0
        return;
7011
7012
    /*
7013
     * Simple case of a CharRef
7014
     */
7015
2.01M
    if (NXT(1) == '#') {
7016
368k
  int i = 0;
7017
368k
  xmlChar out[10];
7018
368k
  int hex = NXT(2);
7019
368k
  int value = xmlParseCharRef(ctxt);
7020
7021
368k
  if (value == 0)
7022
236k
      return;
7023
132k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7024
      /*
7025
       * So we are using non-UTF-8 buffers
7026
       * Check that the char fit on 8bits, if not
7027
       * generate a CharRef.
7028
       */
7029
64.1k
      if (value <= 0xFF) {
7030
41.8k
    out[0] = value;
7031
41.8k
    out[1] = 0;
7032
41.8k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7033
41.8k
        (!ctxt->disableSAX))
7034
41.8k
        ctxt->sax->characters(ctxt->userData, out, 1);
7035
41.8k
      } else {
7036
22.2k
    if ((hex == 'x') || (hex == 'X'))
7037
4.87k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7038
17.3k
    else
7039
17.3k
        snprintf((char *)out, sizeof(out), "#%d", value);
7040
22.2k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7041
22.2k
        (!ctxt->disableSAX))
7042
22.2k
        ctxt->sax->reference(ctxt->userData, out);
7043
22.2k
      }
7044
68.2k
  } else {
7045
      /*
7046
       * Just encode the value in UTF-8
7047
       */
7048
68.2k
      COPY_BUF(0 ,out, i, value);
7049
68.2k
      out[i] = 0;
7050
68.2k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7051
68.2k
    (!ctxt->disableSAX))
7052
48.2k
    ctxt->sax->characters(ctxt->userData, out, i);
7053
68.2k
  }
7054
132k
  return;
7055
368k
    }
7056
7057
    /*
7058
     * We are seeing an entity reference
7059
     */
7060
1.64M
    ent = xmlParseEntityRef(ctxt);
7061
1.64M
    if (ent == NULL) return;
7062
643k
    if (!ctxt->wellFormed)
7063
423k
  return;
7064
219k
    was_checked = ent->checked;
7065
7066
    /* special case of predefined entities */
7067
219k
    if ((ent->name == NULL) ||
7068
219k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7069
11.0k
  val = ent->content;
7070
11.0k
  if (val == NULL) return;
7071
  /*
7072
   * inline the entity.
7073
   */
7074
11.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7075
11.0k
      (!ctxt->disableSAX))
7076
11.0k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7077
11.0k
  return;
7078
11.0k
    }
7079
7080
    /*
7081
     * The first reference to the entity trigger a parsing phase
7082
     * where the ent->children is filled with the result from
7083
     * the parsing.
7084
     * Note: external parsed entities will not be loaded, it is not
7085
     * required for a non-validating parser, unless the parsing option
7086
     * of validating, or substituting entities were given. Doing so is
7087
     * far more secure as the parser will only process data coming from
7088
     * the document entity by default.
7089
     */
7090
208k
    if (((ent->checked == 0) ||
7091
25.2k
         ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7092
183k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7093
183k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7094
183k
  unsigned long oldnbent = ctxt->nbentities;
7095
7096
  /*
7097
   * This is a bit hackish but this seems the best
7098
   * way to make sure both SAX and DOM entity support
7099
   * behaves okay.
7100
   */
7101
183k
  void *user_data;
7102
183k
  if (ctxt->userData == ctxt)
7103
183k
      user_data = NULL;
7104
0
  else
7105
0
      user_data = ctxt->userData;
7106
7107
  /*
7108
   * Check that this entity is well formed
7109
   * 4.3.2: An internal general parsed entity is well-formed
7110
   * if its replacement text matches the production labeled
7111
   * content.
7112
   */
7113
183k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7114
183k
      ctxt->depth++;
7115
183k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7116
183k
                                                user_data, &list);
7117
183k
      ctxt->depth--;
7118
7119
183k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7120
0
      ctxt->depth++;
7121
0
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7122
0
                                     user_data, ctxt->depth, ent->URI,
7123
0
             ent->ExternalID, &list);
7124
0
      ctxt->depth--;
7125
0
  } else {
7126
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7127
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7128
0
       "invalid entity type found\n", NULL);
7129
0
  }
7130
7131
  /*
7132
   * Store the number of entities needing parsing for this entity
7133
   * content and do checkings
7134
   */
7135
183k
  ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7136
183k
  if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7137
164k
      ent->checked |= 1;
7138
183k
  if (ret == XML_ERR_ENTITY_LOOP) {
7139
18.3k
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7140
18.3k
      xmlFreeNodeList(list);
7141
18.3k
      return;
7142
18.3k
  }
7143
164k
  if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7144
35.2k
      xmlFreeNodeList(list);
7145
35.2k
      return;
7146
35.2k
  }
7147
7148
129k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7149
732
      if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7150
0
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7151
732
    (ent->children == NULL)) {
7152
732
    ent->children = list;
7153
732
    if (ctxt->replaceEntities) {
7154
        /*
7155
         * Prune it directly in the generated document
7156
         * except for single text nodes.
7157
         */
7158
0
        if (((list->type == XML_TEXT_NODE) &&
7159
0
       (list->next == NULL)) ||
7160
0
      (ctxt->parseMode == XML_PARSE_READER)) {
7161
0
      list->parent = (xmlNodePtr) ent;
7162
0
      list = NULL;
7163
0
      ent->owner = 1;
7164
0
        } else {
7165
0
      ent->owner = 0;
7166
0
      while (list != NULL) {
7167
0
          list->parent = (xmlNodePtr) ctxt->node;
7168
0
          list->doc = ctxt->myDoc;
7169
0
          if (list->next == NULL)
7170
0
        ent->last = list;
7171
0
          list = list->next;
7172
0
      }
7173
0
      list = ent->children;
7174
0
#ifdef LIBXML_LEGACY_ENABLED
7175
0
      if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7176
0
        xmlAddEntityReference(ent, list, NULL);
7177
0
#endif /* LIBXML_LEGACY_ENABLED */
7178
0
        }
7179
732
    } else {
7180
732
        ent->owner = 1;
7181
3.79k
        while (list != NULL) {
7182
3.06k
      list->parent = (xmlNodePtr) ent;
7183
3.06k
      xmlSetTreeDoc(list, ent->doc);
7184
3.06k
      if (list->next == NULL)
7185
732
          ent->last = list;
7186
3.06k
      list = list->next;
7187
3.06k
        }
7188
732
    }
7189
732
      } else {
7190
0
    xmlFreeNodeList(list);
7191
0
    list = NULL;
7192
0
      }
7193
128k
  } else if ((ret != XML_ERR_OK) &&
7194
128k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7195
127k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7196
127k
         "Entity '%s' failed to parse\n", ent->name);
7197
127k
      xmlParserEntityCheck(ctxt, 0, ent, 0);
7198
127k
  } else if (list != NULL) {
7199
0
      xmlFreeNodeList(list);
7200
0
      list = NULL;
7201
0
  }
7202
129k
  if (ent->checked == 0)
7203
0
      ent->checked = 2;
7204
7205
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7206
129k
        was_checked = 0;
7207
129k
    } else if (ent->checked != 1) {
7208
25.5k
  ctxt->nbentities += ent->checked / 2;
7209
25.5k
    }
7210
7211
    /*
7212
     * Now that the entity content has been gathered
7213
     * provide it to the application, this can take different forms based
7214
     * on the parsing modes.
7215
     */
7216
154k
    if (ent->children == NULL) {
7217
  /*
7218
   * Probably running in SAX mode and the callbacks don't
7219
   * build the entity content. So unless we already went
7220
   * though parsing for first checking go though the entity
7221
   * content to generate callbacks associated to the entity
7222
   */
7223
144k
  if (was_checked != 0) {
7224
15.1k
      void *user_data;
7225
      /*
7226
       * This is a bit hackish but this seems the best
7227
       * way to make sure both SAX and DOM entity support
7228
       * behaves okay.
7229
       */
7230
15.1k
      if (ctxt->userData == ctxt)
7231
15.1k
    user_data = NULL;
7232
0
      else
7233
0
    user_data = ctxt->userData;
7234
7235
15.1k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7236
15.1k
    ctxt->depth++;
7237
15.1k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7238
15.1k
           ent->content, user_data, NULL);
7239
15.1k
    ctxt->depth--;
7240
15.1k
      } else if (ent->etype ==
7241
0
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7242
0
    ctxt->depth++;
7243
0
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7244
0
         ctxt->sax, user_data, ctxt->depth,
7245
0
         ent->URI, ent->ExternalID, NULL);
7246
0
    ctxt->depth--;
7247
0
      } else {
7248
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7249
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7250
0
           "invalid entity type found\n", NULL);
7251
0
      }
7252
15.1k
      if (ret == XML_ERR_ENTITY_LOOP) {
7253
33
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7254
33
    return;
7255
33
      }
7256
15.1k
  }
7257
144k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7258
144k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7259
      /*
7260
       * Entity reference callback comes second, it's somewhat
7261
       * superfluous but a compatibility to historical behaviour
7262
       */
7263
30.1k
      ctxt->sax->reference(ctxt->userData, ent->name);
7264
30.1k
  }
7265
144k
  return;
7266
144k
    }
7267
7268
    /*
7269
     * If we didn't get any children for the entity being built
7270
     */
7271
10.7k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7272
10.7k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7273
  /*
7274
   * Create a node.
7275
   */
7276
10.7k
  ctxt->sax->reference(ctxt->userData, ent->name);
7277
10.7k
  return;
7278
10.7k
    }
7279
7280
0
    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7281
  /*
7282
   * There is a problem on the handling of _private for entities
7283
   * (bug 155816): Should we copy the content of the field from
7284
   * the entity (possibly overwriting some value set by the user
7285
   * when a copy is created), should we leave it alone, or should
7286
   * we try to take care of different situations?  The problem
7287
   * is exacerbated by the usage of this field by the xmlReader.
7288
   * To fix this bug, we look at _private on the created node
7289
   * and, if it's NULL, we copy in whatever was in the entity.
7290
   * If it's not NULL we leave it alone.  This is somewhat of a
7291
   * hack - maybe we should have further tests to determine
7292
   * what to do.
7293
   */
7294
0
  if ((ctxt->node != NULL) && (ent->children != NULL)) {
7295
      /*
7296
       * Seems we are generating the DOM content, do
7297
       * a simple tree copy for all references except the first
7298
       * In the first occurrence list contains the replacement.
7299
       */
7300
0
      if (((list == NULL) && (ent->owner == 0)) ||
7301
0
    (ctxt->parseMode == XML_PARSE_READER)) {
7302
0
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7303
7304
    /*
7305
     * We are copying here, make sure there is no abuse
7306
     */
7307
0
    ctxt->sizeentcopy += ent->length + 5;
7308
0
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7309
0
        return;
7310
7311
    /*
7312
     * when operating on a reader, the entities definitions
7313
     * are always owning the entities subtree.
7314
    if (ctxt->parseMode == XML_PARSE_READER)
7315
        ent->owner = 1;
7316
     */
7317
7318
0
    cur = ent->children;
7319
0
    while (cur != NULL) {
7320
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7321
0
        if (nw != NULL) {
7322
0
      if (nw->_private == NULL)
7323
0
          nw->_private = cur->_private;
7324
0
      if (firstChild == NULL){
7325
0
          firstChild = nw;
7326
0
      }
7327
0
      nw = xmlAddChild(ctxt->node, nw);
7328
0
        }
7329
0
        if (cur == ent->last) {
7330
      /*
7331
       * needed to detect some strange empty
7332
       * node cases in the reader tests
7333
       */
7334
0
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7335
0
          (nw != NULL) &&
7336
0
          (nw->type == XML_ELEMENT_NODE) &&
7337
0
          (nw->children == NULL))
7338
0
          nw->extra = 1;
7339
7340
0
      break;
7341
0
        }
7342
0
        cur = cur->next;
7343
0
    }
7344
0
#ifdef LIBXML_LEGACY_ENABLED
7345
0
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7346
0
      xmlAddEntityReference(ent, firstChild, nw);
7347
0
#endif /* LIBXML_LEGACY_ENABLED */
7348
0
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7349
0
    xmlNodePtr nw = NULL, cur, next, last,
7350
0
         firstChild = NULL;
7351
7352
    /*
7353
     * We are copying here, make sure there is no abuse
7354
     */
7355
0
    ctxt->sizeentcopy += ent->length + 5;
7356
0
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7357
0
        return;
7358
7359
    /*
7360
     * Copy the entity child list and make it the new
7361
     * entity child list. The goal is to make sure any
7362
     * ID or REF referenced will be the one from the
7363
     * document content and not the entity copy.
7364
     */
7365
0
    cur = ent->children;
7366
0
    ent->children = NULL;
7367
0
    last = ent->last;
7368
0
    ent->last = NULL;
7369
0
    while (cur != NULL) {
7370
0
        next = cur->next;
7371
0
        cur->next = NULL;
7372
0
        cur->parent = NULL;
7373
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7374
0
        if (nw != NULL) {
7375
0
      if (nw->_private == NULL)
7376
0
          nw->_private = cur->_private;
7377
0
      if (firstChild == NULL){
7378
0
          firstChild = cur;
7379
0
      }
7380
0
      xmlAddChild((xmlNodePtr) ent, nw);
7381
0
      xmlAddChild(ctxt->node, cur);
7382
0
        }
7383
0
        if (cur == last)
7384
0
      break;
7385
0
        cur = next;
7386
0
    }
7387
0
    if (ent->owner == 0)
7388
0
        ent->owner = 1;
7389
0
#ifdef LIBXML_LEGACY_ENABLED
7390
0
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7391
0
      xmlAddEntityReference(ent, firstChild, nw);
7392
0
#endif /* LIBXML_LEGACY_ENABLED */
7393
0
      } else {
7394
0
    const xmlChar *nbktext;
7395
7396
    /*
7397
     * the name change is to avoid coalescing of the
7398
     * node with a possible previous text one which
7399
     * would make ent->children a dangling pointer
7400
     */
7401
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7402
0
          -1);
7403
0
    if (ent->children->type == XML_TEXT_NODE)
7404
0
        ent->children->name = nbktext;
7405
0
    if ((ent->last != ent->children) &&
7406
0
        (ent->last->type == XML_TEXT_NODE))
7407
0
        ent->last->name = nbktext;
7408
0
    xmlAddChildList(ctxt->node, ent->children);
7409
0
      }
7410
7411
      /*
7412
       * This is to avoid a nasty side effect, see
7413
       * characters() in SAX.c
7414
       */
7415
0
      ctxt->nodemem = 0;
7416
0
      ctxt->nodelen = 0;
7417
0
      return;
7418
0
  }
7419
0
    }
7420
0
}
7421
7422
/**
7423
 * xmlParseEntityRef:
7424
 * @ctxt:  an XML parser context
7425
 *
7426
 * parse ENTITY references declarations
7427
 *
7428
 * [68] EntityRef ::= '&' Name ';'
7429
 *
7430
 * [ WFC: Entity Declared ]
7431
 * In a document without any DTD, a document with only an internal DTD
7432
 * subset which contains no parameter entity references, or a document
7433
 * with "standalone='yes'", the Name given in the entity reference
7434
 * must match that in an entity declaration, except that well-formed
7435
 * documents need not declare any of the following entities: amp, lt,
7436
 * gt, apos, quot.  The declaration of a parameter entity must precede
7437
 * any reference to it.  Similarly, the declaration of a general entity
7438
 * must precede any reference to it which appears in a default value in an
7439
 * attribute-list declaration. Note that if entities are declared in the
7440
 * external subset or in external parameter entities, a non-validating
7441
 * processor is not obligated to read and process their declarations;
7442
 * for such documents, the rule that an entity must be declared is a
7443
 * well-formedness constraint only if standalone='yes'.
7444
 *
7445
 * [ WFC: Parsed Entity ]
7446
 * An entity reference must not contain the name of an unparsed entity
7447
 *
7448
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7449
 */
7450
xmlEntityPtr
7451
2.81M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7452
2.81M
    const xmlChar *name;
7453
2.81M
    xmlEntityPtr ent = NULL;
7454
7455
2.81M
    GROW;
7456
2.81M
    if (ctxt->instate == XML_PARSER_EOF)
7457
0
        return(NULL);
7458
7459
2.81M
    if (RAW != '&')
7460
0
        return(NULL);
7461
2.81M
    NEXT;
7462
2.81M
    name = xmlParseName(ctxt);
7463
2.81M
    if (name == NULL) {
7464
763k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7465
763k
           "xmlParseEntityRef: no name\n");
7466
763k
        return(NULL);
7467
763k
    }
7468
2.05M
    if (RAW != ';') {
7469
635k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7470
635k
  return(NULL);
7471
635k
    }
7472
1.41M
    NEXT;
7473
7474
    /*
7475
     * Predefined entities override any extra definition
7476
     */
7477
1.41M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7478
1.41M
        ent = xmlGetPredefinedEntity(name);
7479
1.41M
        if (ent != NULL)
7480
500k
            return(ent);
7481
1.41M
    }
7482
7483
    /*
7484
     * Increase the number of entity references parsed
7485
     */
7486
917k
    ctxt->nbentities++;
7487
7488
    /*
7489
     * Ask first SAX for entity resolution, otherwise try the
7490
     * entities which may have stored in the parser context.
7491
     */
7492
917k
    if (ctxt->sax != NULL) {
7493
917k
  if (ctxt->sax->getEntity != NULL)
7494
917k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7495
917k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7496
4.21k
      (ctxt->options & XML_PARSE_OLDSAX))
7497
0
      ent = xmlGetPredefinedEntity(name);
7498
917k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7499
4.21k
      (ctxt->userData==ctxt)) {
7500
4.21k
      ent = xmlSAX2GetEntity(ctxt, name);
7501
4.21k
  }
7502
917k
    }
7503
917k
    if (ctxt->instate == XML_PARSER_EOF)
7504
0
  return(NULL);
7505
    /*
7506
     * [ WFC: Entity Declared ]
7507
     * In a document without any DTD, a document with only an
7508
     * internal DTD subset which contains no parameter entity
7509
     * references, or a document with "standalone='yes'", the
7510
     * Name given in the entity reference must match that in an
7511
     * entity declaration, except that well-formed documents
7512
     * need not declare any of the following entities: amp, lt,
7513
     * gt, apos, quot.
7514
     * The declaration of a parameter entity must precede any
7515
     * reference to it.
7516
     * Similarly, the declaration of a general entity must
7517
     * precede any reference to it which appears in a default
7518
     * value in an attribute-list declaration. Note that if
7519
     * entities are declared in the external subset or in
7520
     * external parameter entities, a non-validating processor
7521
     * is not obligated to read and process their declarations;
7522
     * for such documents, the rule that an entity must be
7523
     * declared is a well-formedness constraint only if
7524
     * standalone='yes'.
7525
     */
7526
917k
    if (ent == NULL) {
7527
368k
  if ((ctxt->standalone == 1) ||
7528
365k
      ((ctxt->hasExternalSubset == 0) &&
7529
342k
       (ctxt->hasPErefs == 0))) {
7530
125k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7531
125k
         "Entity '%s' not defined\n", name);
7532
242k
  } else {
7533
242k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7534
242k
         "Entity '%s' not defined\n", name);
7535
242k
      if ((ctxt->inSubset == 0) &&
7536
30.2k
    (ctxt->sax != NULL) &&
7537
30.2k
    (ctxt->sax->reference != NULL)) {
7538
30.2k
    ctxt->sax->reference(ctxt->userData, name);
7539
30.2k
      }
7540
242k
  }
7541
368k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7542
368k
  ctxt->valid = 0;
7543
368k
    }
7544
7545
    /*
7546
     * [ WFC: Parsed Entity ]
7547
     * An entity reference must not contain the name of an
7548
     * unparsed entity
7549
     */
7550
549k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7551
293
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7552
293
     "Entity reference to unparsed entity %s\n", name);
7553
293
    }
7554
7555
    /*
7556
     * [ WFC: No External Entity References ]
7557
     * Attribute values cannot contain direct or indirect
7558
     * entity references to external entities.
7559
     */
7560
549k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7561
234k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7562
962
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7563
962
       "Attribute references external entity '%s'\n", name);
7564
962
    }
7565
    /*
7566
     * [ WFC: No < in Attribute Values ]
7567
     * The replacement text of any entity referred to directly or
7568
     * indirectly in an attribute value (other than "&lt;") must
7569
     * not contain a <.
7570
     */
7571
548k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7572
233k
       (ent != NULL) && 
7573
233k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7574
233k
  if (((ent->checked & 1) || (ent->checked == 0)) &&
7575
65.7k
       (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7576
49.5k
      xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7577
49.5k
  "'<' in entity '%s' is not allowed in attributes values\n", name);
7578
49.5k
        }
7579
233k
    }
7580
7581
    /*
7582
     * Internal check, no parameter entities here ...
7583
     */
7584
314k
    else {
7585
314k
  switch (ent->etype) {
7586
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7587
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7588
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7589
0
       "Attempt to reference the parameter entity '%s'\n",
7590
0
            name);
7591
0
      break;
7592
314k
      default:
7593
314k
      break;
7594
314k
  }
7595
314k
    }
7596
7597
    /*
7598
     * [ WFC: No Recursion ]
7599
     * A parsed entity must not contain a recursive reference
7600
     * to itself, either directly or indirectly.
7601
     * Done somewhere else
7602
     */
7603
917k
    return(ent);
7604
917k
}
7605
7606
/**
7607
 * xmlParseStringEntityRef:
7608
 * @ctxt:  an XML parser context
7609
 * @str:  a pointer to an index in the string
7610
 *
7611
 * parse ENTITY references declarations, but this version parses it from
7612
 * a string value.
7613
 *
7614
 * [68] EntityRef ::= '&' Name ';'
7615
 *
7616
 * [ WFC: Entity Declared ]
7617
 * In a document without any DTD, a document with only an internal DTD
7618
 * subset which contains no parameter entity references, or a document
7619
 * with "standalone='yes'", the Name given in the entity reference
7620
 * must match that in an entity declaration, except that well-formed
7621
 * documents need not declare any of the following entities: amp, lt,
7622
 * gt, apos, quot.  The declaration of a parameter entity must precede
7623
 * any reference to it.  Similarly, the declaration of a general entity
7624
 * must precede any reference to it which appears in a default value in an
7625
 * attribute-list declaration. Note that if entities are declared in the
7626
 * external subset or in external parameter entities, a non-validating
7627
 * processor is not obligated to read and process their declarations;
7628
 * for such documents, the rule that an entity must be declared is a
7629
 * well-formedness constraint only if standalone='yes'.
7630
 *
7631
 * [ WFC: Parsed Entity ]
7632
 * An entity reference must not contain the name of an unparsed entity
7633
 *
7634
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7635
 * is updated to the current location in the string.
7636
 */
7637
static xmlEntityPtr
7638
1.05M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7639
1.05M
    xmlChar *name;
7640
1.05M
    const xmlChar *ptr;
7641
1.05M
    xmlChar cur;
7642
1.05M
    xmlEntityPtr ent = NULL;
7643
7644
1.05M
    if ((str == NULL) || (*str == NULL))
7645
0
        return(NULL);
7646
1.05M
    ptr = *str;
7647
1.05M
    cur = *ptr;
7648
1.05M
    if (cur != '&')
7649
0
  return(NULL);
7650
7651
1.05M
    ptr++;
7652
1.05M
    name = xmlParseStringName(ctxt, &ptr);
7653
1.05M
    if (name == NULL) {
7654
15.6k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7655
15.6k
           "xmlParseStringEntityRef: no name\n");
7656
15.6k
  *str = ptr;
7657
15.6k
  return(NULL);
7658
15.6k
    }
7659
1.03M
    if (*ptr != ';') {
7660
34.3k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7661
34.3k
        xmlFree(name);
7662
34.3k
  *str = ptr;
7663
34.3k
  return(NULL);
7664
34.3k
    }
7665
1.00M
    ptr++;
7666
7667
7668
    /*
7669
     * Predefined entities override any extra definition
7670
     */
7671
1.00M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7672
1.00M
        ent = xmlGetPredefinedEntity(name);
7673
1.00M
        if (ent != NULL) {
7674
51.9k
            xmlFree(name);
7675
51.9k
            *str = ptr;
7676
51.9k
            return(ent);
7677
51.9k
        }
7678
1.00M
    }
7679
7680
    /*
7681
     * Increate the number of entity references parsed
7682
     */
7683
953k
    ctxt->nbentities++;
7684
7685
    /*
7686
     * Ask first SAX for entity resolution, otherwise try the
7687
     * entities which may have stored in the parser context.
7688
     */
7689
953k
    if (ctxt->sax != NULL) {
7690
953k
  if (ctxt->sax->getEntity != NULL)
7691
953k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7692
953k
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7693
0
      ent = xmlGetPredefinedEntity(name);
7694
953k
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7695
325k
      ent = xmlSAX2GetEntity(ctxt, name);
7696
325k
  }
7697
953k
    }
7698
953k
    if (ctxt->instate == XML_PARSER_EOF) {
7699
0
  xmlFree(name);
7700
0
  return(NULL);
7701
0
    }
7702
7703
    /*
7704
     * [ WFC: Entity Declared ]
7705
     * In a document without any DTD, a document with only an
7706
     * internal DTD subset which contains no parameter entity
7707
     * references, or a document with "standalone='yes'", the
7708
     * Name given in the entity reference must match that in an
7709
     * entity declaration, except that well-formed documents
7710
     * need not declare any of the following entities: amp, lt,
7711
     * gt, apos, quot.
7712
     * The declaration of a parameter entity must precede any
7713
     * reference to it.
7714
     * Similarly, the declaration of a general entity must
7715
     * precede any reference to it which appears in a default
7716
     * value in an attribute-list declaration. Note that if
7717
     * entities are declared in the external subset or in
7718
     * external parameter entities, a non-validating processor
7719
     * is not obligated to read and process their declarations;
7720
     * for such documents, the rule that an entity must be
7721
     * declared is a well-formedness constraint only if
7722
     * standalone='yes'.
7723
     */
7724
953k
    if (ent == NULL) {
7725
325k
  if ((ctxt->standalone == 1) ||
7726
325k
      ((ctxt->hasExternalSubset == 0) &&
7727
322k
       (ctxt->hasPErefs == 0))) {
7728
320k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7729
320k
         "Entity '%s' not defined\n", name);
7730
320k
  } else {
7731
5.70k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7732
5.70k
        "Entity '%s' not defined\n",
7733
5.70k
        name);
7734
5.70k
  }
7735
325k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7736
  /* TODO ? check regressions ctxt->valid = 0; */
7737
325k
    }
7738
7739
    /*
7740
     * [ WFC: Parsed Entity ]
7741
     * An entity reference must not contain the name of an
7742
     * unparsed entity
7743
     */
7744
627k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7745
54
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7746
54
     "Entity reference to unparsed entity %s\n", name);
7747
54
    }
7748
7749
    /*
7750
     * [ WFC: No External Entity References ]
7751
     * Attribute values cannot contain direct or indirect
7752
     * entity references to external entities.
7753
     */
7754
627k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7755
627k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7756
2.20k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7757
2.20k
   "Attribute references external entity '%s'\n", name);
7758
2.20k
    }
7759
    /*
7760
     * [ WFC: No < in Attribute Values ]
7761
     * The replacement text of any entity referred to directly or
7762
     * indirectly in an attribute value (other than "&lt;") must
7763
     * not contain a <.
7764
     */
7765
625k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7766
625k
       (ent != NULL) && (ent->content != NULL) &&
7767
597k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7768
597k
       (xmlStrchr(ent->content, '<'))) {
7769
63.3k
  xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7770
63.3k
     "'<' in entity '%s' is not allowed in attributes values\n",
7771
63.3k
        name);
7772
63.3k
    }
7773
7774
    /*
7775
     * Internal check, no parameter entities here ...
7776
     */
7777
562k
    else {
7778
562k
  switch (ent->etype) {
7779
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7780
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7781
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7782
0
       "Attempt to reference the parameter entity '%s'\n",
7783
0
          name);
7784
0
      break;
7785
562k
      default:
7786
562k
      break;
7787
562k
  }
7788
562k
    }
7789
7790
    /*
7791
     * [ WFC: No Recursion ]
7792
     * A parsed entity must not contain a recursive reference
7793
     * to itself, either directly or indirectly.
7794
     * Done somewhere else
7795
     */
7796
7797
953k
    xmlFree(name);
7798
953k
    *str = ptr;
7799
953k
    return(ent);
7800
953k
}
7801
7802
/**
7803
 * xmlParsePEReference:
7804
 * @ctxt:  an XML parser context
7805
 *
7806
 * parse PEReference declarations
7807
 * The entity content is handled directly by pushing it's content as
7808
 * a new input stream.
7809
 *
7810
 * [69] PEReference ::= '%' Name ';'
7811
 *
7812
 * [ WFC: No Recursion ]
7813
 * A parsed entity must not contain a recursive
7814
 * reference to itself, either directly or indirectly.
7815
 *
7816
 * [ WFC: Entity Declared ]
7817
 * In a document without any DTD, a document with only an internal DTD
7818
 * subset which contains no parameter entity references, or a document
7819
 * with "standalone='yes'", ...  ... The declaration of a parameter
7820
 * entity must precede any reference to it...
7821
 *
7822
 * [ VC: Entity Declared ]
7823
 * In a document with an external subset or external parameter entities
7824
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7825
 * must precede any reference to it...
7826
 *
7827
 * [ WFC: In DTD ]
7828
 * Parameter-entity references may only appear in the DTD.
7829
 * NOTE: misleading but this is handled.
7830
 */
7831
void
7832
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7833
1.60M
{
7834
1.60M
    const xmlChar *name;
7835
1.60M
    xmlEntityPtr entity = NULL;
7836
1.60M
    xmlParserInputPtr input;
7837
7838
1.60M
    if (RAW != '%')
7839
769k
        return;
7840
837k
    NEXT;
7841
837k
    name = xmlParseName(ctxt);
7842
837k
    if (name == NULL) {
7843
173k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7844
173k
  return;
7845
173k
    }
7846
663k
    if (xmlParserDebugEntities)
7847
0
  xmlGenericError(xmlGenericErrorContext,
7848
0
    "PEReference: %s\n", name);
7849
663k
    if (RAW != ';') {
7850
184k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7851
184k
        return;
7852
184k
    }
7853
7854
478k
    NEXT;
7855
7856
    /*
7857
     * Increate the number of entity references parsed
7858
     */
7859
478k
    ctxt->nbentities++;
7860
7861
    /*
7862
     * Request the entity from SAX
7863
     */
7864
478k
    if ((ctxt->sax != NULL) &&
7865
478k
  (ctxt->sax->getParameterEntity != NULL))
7866
478k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7867
478k
    if (ctxt->instate == XML_PARSER_EOF)
7868
0
  return;
7869
478k
    if (entity == NULL) {
7870
  /*
7871
   * [ WFC: Entity Declared ]
7872
   * In a document without any DTD, a document with only an
7873
   * internal DTD subset which contains no parameter entity
7874
   * references, or a document with "standalone='yes'", ...
7875
   * ... The declaration of a parameter entity must precede
7876
   * any reference to it...
7877
   */
7878
52.0k
  if ((ctxt->standalone == 1) ||
7879
51.7k
      ((ctxt->hasExternalSubset == 0) &&
7880
51.3k
       (ctxt->hasPErefs == 0))) {
7881
2.62k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7882
2.62k
            "PEReference: %%%s; not found\n",
7883
2.62k
            name);
7884
49.4k
  } else {
7885
      /*
7886
       * [ VC: Entity Declared ]
7887
       * In a document with an external subset or external
7888
       * parameter entities with "standalone='no'", ...
7889
       * ... The declaration of a parameter entity must
7890
       * precede any reference to it...
7891
       */
7892
49.4k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7893
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7894
0
                                 "PEReference: %%%s; not found\n",
7895
0
                                 name, NULL);
7896
0
            } else
7897
49.4k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7898
49.4k
                              "PEReference: %%%s; not found\n",
7899
49.4k
                              name, NULL);
7900
49.4k
            ctxt->valid = 0;
7901
49.4k
  }
7902
52.0k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
7903
426k
    } else {
7904
  /*
7905
   * Internal checking in case the entity quest barfed
7906
   */
7907
426k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7908
1.57k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7909
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7910
0
      "Internal: %%%s; is not a parameter entity\n",
7911
0
        name, NULL);
7912
426k
  } else {
7913
426k
            xmlChar start[4];
7914
426k
            xmlCharEncoding enc;
7915
7916
426k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7917
1.57k
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7918
1.57k
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7919
1.57k
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7920
1.57k
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7921
1.57k
    (ctxt->replaceEntities == 0) &&
7922
1.57k
    (ctxt->validate == 0))
7923
1.57k
    return;
7924
7925
425k
      input = xmlNewEntityInputStream(ctxt, entity);
7926
425k
      if (xmlPushInput(ctxt, input) < 0) {
7927
0
                xmlFreeInputStream(input);
7928
0
    return;
7929
0
            }
7930
7931
425k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7932
                /*
7933
                 * Get the 4 first bytes and decode the charset
7934
                 * if enc != XML_CHAR_ENCODING_NONE
7935
                 * plug some encoding conversion routines.
7936
                 * Note that, since we may have some non-UTF8
7937
                 * encoding (like UTF16, bug 135229), the 'length'
7938
                 * is not known, but we can calculate based upon
7939
                 * the amount of data in the buffer.
7940
                 */
7941
0
                GROW
7942
0
                if (ctxt->instate == XML_PARSER_EOF)
7943
0
                    return;
7944
0
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
7945
0
                    start[0] = RAW;
7946
0
                    start[1] = NXT(1);
7947
0
                    start[2] = NXT(2);
7948
0
                    start[3] = NXT(3);
7949
0
                    enc = xmlDetectCharEncoding(start, 4);
7950
0
                    if (enc != XML_CHAR_ENCODING_NONE) {
7951
0
                        xmlSwitchEncoding(ctxt, enc);
7952
0
                    }
7953
0
                }
7954
7955
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7956
0
                    (IS_BLANK_CH(NXT(5)))) {
7957
0
                    xmlParseTextDecl(ctxt);
7958
0
                }
7959
0
            }
7960
425k
  }
7961
426k
    }
7962
477k
    ctxt->hasPErefs = 1;
7963
477k
}
7964
7965
/**
7966
 * xmlLoadEntityContent:
7967
 * @ctxt:  an XML parser context
7968
 * @entity: an unloaded system entity
7969
 *
7970
 * Load the original content of the given system entity from the
7971
 * ExternalID/SystemID given. This is to be used for Included in Literal
7972
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7973
 *
7974
 * Returns 0 in case of success and -1 in case of failure
7975
 */
7976
static int
7977
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7978
0
    xmlParserInputPtr input;
7979
0
    xmlBufferPtr buf;
7980
0
    int l, c;
7981
0
    int count = 0;
7982
7983
0
    if ((ctxt == NULL) || (entity == NULL) ||
7984
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7985
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7986
0
  (entity->content != NULL)) {
7987
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7988
0
              "xmlLoadEntityContent parameter error");
7989
0
        return(-1);
7990
0
    }
7991
7992
0
    if (xmlParserDebugEntities)
7993
0
  xmlGenericError(xmlGenericErrorContext,
7994
0
    "Reading %s entity content input\n", entity->name);
7995
7996
0
    buf = xmlBufferCreate();
7997
0
    if (buf == NULL) {
7998
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7999
0
              "xmlLoadEntityContent parameter error");
8000
0
        return(-1);
8001
0
    }
8002
8003
0
    input = xmlNewEntityInputStream(ctxt, entity);
8004
0
    if (input == NULL) {
8005
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8006
0
              "xmlLoadEntityContent input error");
8007
0
  xmlBufferFree(buf);
8008
0
        return(-1);
8009
0
    }
8010
8011
    /*
8012
     * Push the entity as the current input, read char by char
8013
     * saving to the buffer until the end of the entity or an error
8014
     */
8015
0
    if (xmlPushInput(ctxt, input) < 0) {
8016
0
        xmlBufferFree(buf);
8017
0
  return(-1);
8018
0
    }
8019
8020
0
    GROW;
8021
0
    c = CUR_CHAR(l);
8022
0
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8023
0
           (IS_CHAR(c))) {
8024
0
        xmlBufferAdd(buf, ctxt->input->cur, l);
8025
0
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8026
0
      count = 0;
8027
0
      GROW;
8028
0
            if (ctxt->instate == XML_PARSER_EOF) {
8029
0
                xmlBufferFree(buf);
8030
0
                return(-1);
8031
0
            }
8032
0
  }
8033
0
  NEXTL(l);
8034
0
  c = CUR_CHAR(l);
8035
0
  if (c == 0) {
8036
0
      count = 0;
8037
0
      GROW;
8038
0
            if (ctxt->instate == XML_PARSER_EOF) {
8039
0
                xmlBufferFree(buf);
8040
0
                return(-1);
8041
0
            }
8042
0
      c = CUR_CHAR(l);
8043
0
  }
8044
0
    }
8045
8046
0
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8047
0
        xmlPopInput(ctxt);
8048
0
    } else if (!IS_CHAR(c)) {
8049
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8050
0
                          "xmlLoadEntityContent: invalid char value %d\n",
8051
0
                    c);
8052
0
  xmlBufferFree(buf);
8053
0
  return(-1);
8054
0
    }
8055
0
    entity->content = buf->content;
8056
0
    buf->content = NULL;
8057
0
    xmlBufferFree(buf);
8058
8059
0
    return(0);
8060
0
}
8061
8062
/**
8063
 * xmlParseStringPEReference:
8064
 * @ctxt:  an XML parser context
8065
 * @str:  a pointer to an index in the string
8066
 *
8067
 * parse PEReference declarations
8068
 *
8069
 * [69] PEReference ::= '%' Name ';'
8070
 *
8071
 * [ WFC: No Recursion ]
8072
 * A parsed entity must not contain a recursive
8073
 * reference to itself, either directly or indirectly.
8074
 *
8075
 * [ WFC: Entity Declared ]
8076
 * In a document without any DTD, a document with only an internal DTD
8077
 * subset which contains no parameter entity references, or a document
8078
 * with "standalone='yes'", ...  ... The declaration of a parameter
8079
 * entity must precede any reference to it...
8080
 *
8081
 * [ VC: Entity Declared ]
8082
 * In a document with an external subset or external parameter entities
8083
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8084
 * must precede any reference to it...
8085
 *
8086
 * [ WFC: In DTD ]
8087
 * Parameter-entity references may only appear in the DTD.
8088
 * NOTE: misleading but this is handled.
8089
 *
8090
 * Returns the string of the entity content.
8091
 *         str is updated to the current value of the index
8092
 */
8093
static xmlEntityPtr
8094
0
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8095
0
    const xmlChar *ptr;
8096
0
    xmlChar cur;
8097
0
    xmlChar *name;
8098
0
    xmlEntityPtr entity = NULL;
8099
8100
0
    if ((str == NULL) || (*str == NULL)) return(NULL);
8101
0
    ptr = *str;
8102
0
    cur = *ptr;
8103
0
    if (cur != '%')
8104
0
        return(NULL);
8105
0
    ptr++;
8106
0
    name = xmlParseStringName(ctxt, &ptr);
8107
0
    if (name == NULL) {
8108
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8109
0
           "xmlParseStringPEReference: no name\n");
8110
0
  *str = ptr;
8111
0
  return(NULL);
8112
0
    }
8113
0
    cur = *ptr;
8114
0
    if (cur != ';') {
8115
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8116
0
  xmlFree(name);
8117
0
  *str = ptr;
8118
0
  return(NULL);
8119
0
    }
8120
0
    ptr++;
8121
8122
    /*
8123
     * Increate the number of entity references parsed
8124
     */
8125
0
    ctxt->nbentities++;
8126
8127
    /*
8128
     * Request the entity from SAX
8129
     */
8130
0
    if ((ctxt->sax != NULL) &&
8131
0
  (ctxt->sax->getParameterEntity != NULL))
8132
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8133
0
    if (ctxt->instate == XML_PARSER_EOF) {
8134
0
  xmlFree(name);
8135
0
  *str = ptr;
8136
0
  return(NULL);
8137
0
    }
8138
0
    if (entity == NULL) {
8139
  /*
8140
   * [ WFC: Entity Declared ]
8141
   * In a document without any DTD, a document with only an
8142
   * internal DTD subset which contains no parameter entity
8143
   * references, or a document with "standalone='yes'", ...
8144
   * ... The declaration of a parameter entity must precede
8145
   * any reference to it...
8146
   */
8147
0
  if ((ctxt->standalone == 1) ||
8148
0
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8149
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8150
0
     "PEReference: %%%s; not found\n", name);
8151
0
  } else {
8152
      /*
8153
       * [ VC: Entity Declared ]
8154
       * In a document with an external subset or external
8155
       * parameter entities with "standalone='no'", ...
8156
       * ... The declaration of a parameter entity must
8157
       * precede any reference to it...
8158
       */
8159
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8160
0
        "PEReference: %%%s; not found\n",
8161
0
        name, NULL);
8162
0
      ctxt->valid = 0;
8163
0
  }
8164
0
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8165
0
    } else {
8166
  /*
8167
   * Internal checking in case the entity quest barfed
8168
   */
8169
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8170
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8171
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8172
0
        "%%%s; is not a parameter entity\n",
8173
0
        name, NULL);
8174
0
  }
8175
0
    }
8176
0
    ctxt->hasPErefs = 1;
8177
0
    xmlFree(name);
8178
0
    *str = ptr;
8179
0
    return(entity);
8180
0
}
8181
8182
/**
8183
 * xmlParseDocTypeDecl:
8184
 * @ctxt:  an XML parser context
8185
 *
8186
 * parse a DOCTYPE declaration
8187
 *
8188
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8189
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8190
 *
8191
 * [ VC: Root Element Type ]
8192
 * The Name in the document type declaration must match the element
8193
 * type of the root element.
8194
 */
8195
8196
void
8197
65.0k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8198
65.0k
    const xmlChar *name = NULL;
8199
65.0k
    xmlChar *ExternalID = NULL;
8200
65.0k
    xmlChar *URI = NULL;
8201
8202
    /*
8203
     * We know that '<!DOCTYPE' has been detected.
8204
     */
8205
65.0k
    SKIP(9);
8206
8207
65.0k
    SKIP_BLANKS;
8208
8209
    /*
8210
     * Parse the DOCTYPE name.
8211
     */
8212
65.0k
    name = xmlParseName(ctxt);
8213
65.0k
    if (name == NULL) {
8214
7.31k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8215
7.31k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8216
7.31k
    }
8217
65.0k
    ctxt->intSubName = name;
8218
8219
65.0k
    SKIP_BLANKS;
8220
8221
    /*
8222
     * Check for SystemID and ExternalID
8223
     */
8224
65.0k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8225
8226
65.0k
    if ((URI != NULL) || (ExternalID != NULL)) {
8227
3.13k
        ctxt->hasExternalSubset = 1;
8228
3.13k
    }
8229
65.0k
    ctxt->extSubURI = URI;
8230
65.0k
    ctxt->extSubSystem = ExternalID;
8231
8232
65.0k
    SKIP_BLANKS;
8233
8234
    /*
8235
     * Create and update the internal subset.
8236
     */
8237
65.0k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8238
65.0k
  (!ctxt->disableSAX))
8239
65.0k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8240
65.0k
    if (ctxt->instate == XML_PARSER_EOF)
8241
0
  return;
8242
8243
    /*
8244
     * Is there any internal subset declarations ?
8245
     * they are handled separately in xmlParseInternalSubset()
8246
     */
8247
65.0k
    if (RAW == '[')
8248
54.5k
  return;
8249
8250
    /*
8251
     * We should be at the end of the DOCTYPE declaration.
8252
     */
8253
10.5k
    if (RAW != '>') {
8254
4.45k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8255
4.45k
    }
8256
10.5k
    NEXT;
8257
10.5k
}
8258
8259
/**
8260
 * xmlParseInternalSubset:
8261
 * @ctxt:  an XML parser context
8262
 *
8263
 * parse the internal subset declaration
8264
 *
8265
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8266
 */
8267
8268
static void
8269
56.0k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8270
    /*
8271
     * Is there any DTD definition ?
8272
     */
8273
56.0k
    if (RAW == '[') {
8274
56.0k
        int baseInputNr = ctxt->inputNr;
8275
56.0k
        ctxt->instate = XML_PARSER_DTD;
8276
56.0k
        NEXT;
8277
  /*
8278
   * Parse the succession of Markup declarations and
8279
   * PEReferences.
8280
   * Subsequence (markupdecl | PEReference | S)*
8281
   */
8282
1.22M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8283
1.19M
               (ctxt->instate != XML_PARSER_EOF)) {
8284
1.18M
      const xmlChar *check = CUR_PTR;
8285
1.18M
      unsigned int cons = ctxt->input->consumed;
8286
8287
1.18M
      SKIP_BLANKS;
8288
1.18M
      xmlParseMarkupDecl(ctxt);
8289
1.18M
      xmlParsePEReference(ctxt);
8290
8291
1.18M
      if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8292
162k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8293
162k
       "xmlParseInternalSubset: error detected in Markup declaration\n");
8294
162k
                if (ctxt->inputNr > baseInputNr)
8295
146k
                    xmlPopInput(ctxt);
8296
15.8k
                else
8297
15.8k
        break;
8298
162k
      }
8299
1.18M
  }
8300
56.0k
  if (RAW == ']') {
8301
33.5k
      NEXT;
8302
33.5k
      SKIP_BLANKS;
8303
33.5k
  }
8304
56.0k
    }
8305
8306
    /*
8307
     * We should be at the end of the DOCTYPE declaration.
8308
     */
8309
56.0k
    if (RAW != '>') {
8310
22.2k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8311
22.2k
  return;
8312
22.2k
    }
8313
33.8k
    NEXT;
8314
33.8k
}
8315
8316
#ifdef LIBXML_SAX1_ENABLED
8317
/**
8318
 * xmlParseAttribute:
8319
 * @ctxt:  an XML parser context
8320
 * @value:  a xmlChar ** used to store the value of the attribute
8321
 *
8322
 * parse an attribute
8323
 *
8324
 * [41] Attribute ::= Name Eq AttValue
8325
 *
8326
 * [ WFC: No External Entity References ]
8327
 * Attribute values cannot contain direct or indirect entity references
8328
 * to external entities.
8329
 *
8330
 * [ WFC: No < in Attribute Values ]
8331
 * The replacement text of any entity referred to directly or indirectly in
8332
 * an attribute value (other than "&lt;") must not contain a <.
8333
 *
8334
 * [ VC: Attribute Value Type ]
8335
 * The attribute must have been declared; the value must be of the type
8336
 * declared for it.
8337
 *
8338
 * [25] Eq ::= S? '=' S?
8339
 *
8340
 * With namespace:
8341
 *
8342
 * [NS 11] Attribute ::= QName Eq AttValue
8343
 *
8344
 * Also the case QName == xmlns:??? is handled independently as a namespace
8345
 * definition.
8346
 *
8347
 * Returns the attribute name, and the value in *value.
8348
 */
8349
8350
const xmlChar *
8351
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8352
0
    const xmlChar *name;
8353
0
    xmlChar *val;
8354
8355
0
    *value = NULL;
8356
0
    GROW;
8357
0
    name = xmlParseName(ctxt);
8358
0
    if (name == NULL) {
8359
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8360
0
                 "error parsing attribute name\n");
8361
0
        return(NULL);
8362
0
    }
8363
8364
    /*
8365
     * read the value
8366
     */
8367
0
    SKIP_BLANKS;
8368
0
    if (RAW == '=') {
8369
0
        NEXT;
8370
0
  SKIP_BLANKS;
8371
0
  val = xmlParseAttValue(ctxt);
8372
0
  ctxt->instate = XML_PARSER_CONTENT;
8373
0
    } else {
8374
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8375
0
         "Specification mandates value for attribute %s\n", name);
8376
0
  return(NULL);
8377
0
    }
8378
8379
    /*
8380
     * Check that xml:lang conforms to the specification
8381
     * No more registered as an error, just generate a warning now
8382
     * since this was deprecated in XML second edition
8383
     */
8384
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8385
0
  if (!xmlCheckLanguageID(val)) {
8386
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8387
0
              "Malformed value for xml:lang : %s\n",
8388
0
        val, NULL);
8389
0
  }
8390
0
    }
8391
8392
    /*
8393
     * Check that xml:space conforms to the specification
8394
     */
8395
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8396
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8397
0
      *(ctxt->space) = 0;
8398
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8399
0
      *(ctxt->space) = 1;
8400
0
  else {
8401
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8402
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8403
0
                                 val, NULL);
8404
0
  }
8405
0
    }
8406
8407
0
    *value = val;
8408
0
    return(name);
8409
0
}
8410
8411
/**
8412
 * xmlParseStartTag:
8413
 * @ctxt:  an XML parser context
8414
 *
8415
 * parse a start of tag either for rule element or
8416
 * EmptyElement. In both case we don't parse the tag closing chars.
8417
 *
8418
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8419
 *
8420
 * [ WFC: Unique Att Spec ]
8421
 * No attribute name may appear more than once in the same start-tag or
8422
 * empty-element tag.
8423
 *
8424
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8425
 *
8426
 * [ WFC: Unique Att Spec ]
8427
 * No attribute name may appear more than once in the same start-tag or
8428
 * empty-element tag.
8429
 *
8430
 * With namespace:
8431
 *
8432
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8433
 *
8434
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8435
 *
8436
 * Returns the element name parsed
8437
 */
8438
8439
const xmlChar *
8440
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8441
0
    const xmlChar *name;
8442
0
    const xmlChar *attname;
8443
0
    xmlChar *attvalue;
8444
0
    const xmlChar **atts = ctxt->atts;
8445
0
    int nbatts = 0;
8446
0
    int maxatts = ctxt->maxatts;
8447
0
    int i;
8448
8449
0
    if (RAW != '<') return(NULL);
8450
0
    NEXT1;
8451
8452
0
    name = xmlParseName(ctxt);
8453
0
    if (name == NULL) {
8454
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8455
0
       "xmlParseStartTag: invalid element name\n");
8456
0
        return(NULL);
8457
0
    }
8458
8459
    /*
8460
     * Now parse the attributes, it ends up with the ending
8461
     *
8462
     * (S Attribute)* S?
8463
     */
8464
0
    SKIP_BLANKS;
8465
0
    GROW;
8466
8467
0
    while (((RAW != '>') &&
8468
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8469
0
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8470
0
  const xmlChar *q = CUR_PTR;
8471
0
  unsigned int cons = ctxt->input->consumed;
8472
8473
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8474
0
        if ((attname != NULL) && (attvalue != NULL)) {
8475
      /*
8476
       * [ WFC: Unique Att Spec ]
8477
       * No attribute name may appear more than once in the same
8478
       * start-tag or empty-element tag.
8479
       */
8480
0
      for (i = 0; i < nbatts;i += 2) {
8481
0
          if (xmlStrEqual(atts[i], attname)) {
8482
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8483
0
        xmlFree(attvalue);
8484
0
        goto failed;
8485
0
    }
8486
0
      }
8487
      /*
8488
       * Add the pair to atts
8489
       */
8490
0
      if (atts == NULL) {
8491
0
          maxatts = 22; /* allow for 10 attrs by default */
8492
0
          atts = (const xmlChar **)
8493
0
           xmlMalloc(maxatts * sizeof(xmlChar *));
8494
0
    if (atts == NULL) {
8495
0
        xmlErrMemory(ctxt, NULL);
8496
0
        if (attvalue != NULL)
8497
0
      xmlFree(attvalue);
8498
0
        goto failed;
8499
0
    }
8500
0
    ctxt->atts = atts;
8501
0
    ctxt->maxatts = maxatts;
8502
0
      } else if (nbatts + 4 > maxatts) {
8503
0
          const xmlChar **n;
8504
8505
0
          maxatts *= 2;
8506
0
          n = (const xmlChar **) xmlRealloc((void *) atts,
8507
0
               maxatts * sizeof(const xmlChar *));
8508
0
    if (n == NULL) {
8509
0
        xmlErrMemory(ctxt, NULL);
8510
0
        if (attvalue != NULL)
8511
0
      xmlFree(attvalue);
8512
0
        goto failed;
8513
0
    }
8514
0
    atts = n;
8515
0
    ctxt->atts = atts;
8516
0
    ctxt->maxatts = maxatts;
8517
0
      }
8518
0
      atts[nbatts++] = attname;
8519
0
      atts[nbatts++] = attvalue;
8520
0
      atts[nbatts] = NULL;
8521
0
      atts[nbatts + 1] = NULL;
8522
0
  } else {
8523
0
      if (attvalue != NULL)
8524
0
    xmlFree(attvalue);
8525
0
  }
8526
8527
0
failed:
8528
8529
0
  GROW
8530
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8531
0
      break;
8532
0
  if (SKIP_BLANKS == 0) {
8533
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8534
0
         "attributes construct error\n");
8535
0
  }
8536
0
        if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8537
0
            (attname == NULL) && (attvalue == NULL)) {
8538
0
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8539
0
         "xmlParseStartTag: problem parsing attributes\n");
8540
0
      break;
8541
0
  }
8542
0
  SHRINK;
8543
0
        GROW;
8544
0
    }
8545
8546
    /*
8547
     * SAX: Start of Element !
8548
     */
8549
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8550
0
  (!ctxt->disableSAX)) {
8551
0
  if (nbatts > 0)
8552
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8553
0
  else
8554
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8555
0
    }
8556
8557
0
    if (atts != NULL) {
8558
        /* Free only the content strings */
8559
0
        for (i = 1;i < nbatts;i+=2)
8560
0
      if (atts[i] != NULL)
8561
0
         xmlFree((xmlChar *) atts[i]);
8562
0
    }
8563
0
    return(name);
8564
0
}
8565
8566
/**
8567
 * xmlParseEndTag1:
8568
 * @ctxt:  an XML parser context
8569
 * @line:  line of the start tag
8570
 * @nsNr:  number of namespaces on the start tag
8571
 *
8572
 * parse an end of tag
8573
 *
8574
 * [42] ETag ::= '</' Name S? '>'
8575
 *
8576
 * With namespace
8577
 *
8578
 * [NS 9] ETag ::= '</' QName S? '>'
8579
 */
8580
8581
static void
8582
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8583
0
    const xmlChar *name;
8584
8585
0
    GROW;
8586
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8587
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8588
0
           "xmlParseEndTag: '</' not found\n");
8589
0
  return;
8590
0
    }
8591
0
    SKIP(2);
8592
8593
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8594
8595
    /*
8596
     * We should definitely be at the ending "S? '>'" part
8597
     */
8598
0
    GROW;
8599
0
    SKIP_BLANKS;
8600
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8601
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8602
0
    } else
8603
0
  NEXT1;
8604
8605
    /*
8606
     * [ WFC: Element Type Match ]
8607
     * The Name in an element's end-tag must match the element type in the
8608
     * start-tag.
8609
     *
8610
     */
8611
0
    if (name != (xmlChar*)1) {
8612
0
        if (name == NULL) name = BAD_CAST "unparseable";
8613
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8614
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8615
0
                    ctxt->name, line, name);
8616
0
    }
8617
8618
    /*
8619
     * SAX: End of Tag
8620
     */
8621
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8622
0
  (!ctxt->disableSAX))
8623
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8624
8625
0
    namePop(ctxt);
8626
0
    spacePop(ctxt);
8627
0
    return;
8628
0
}
8629
8630
/**
8631
 * xmlParseEndTag:
8632
 * @ctxt:  an XML parser context
8633
 *
8634
 * parse an end of tag
8635
 *
8636
 * [42] ETag ::= '</' Name S? '>'
8637
 *
8638
 * With namespace
8639
 *
8640
 * [NS 9] ETag ::= '</' QName S? '>'
8641
 */
8642
8643
void
8644
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8645
0
    xmlParseEndTag1(ctxt, 0);
8646
0
}
8647
#endif /* LIBXML_SAX1_ENABLED */
8648
8649
/************************************************************************
8650
 *                  *
8651
 *          SAX 2 specific operations       *
8652
 *                  *
8653
 ************************************************************************/
8654
8655
/*
8656
 * xmlGetNamespace:
8657
 * @ctxt:  an XML parser context
8658
 * @prefix:  the prefix to lookup
8659
 *
8660
 * Lookup the namespace name for the @prefix (which ca be NULL)
8661
 * The prefix must come from the @ctxt->dict dictionary
8662
 *
8663
 * Returns the namespace name or NULL if not bound
8664
 */
8665
static const xmlChar *
8666
19.7M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8667
19.7M
    int i;
8668
8669
19.7M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8670
1.02G
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8671
1.01G
        if (ctxt->nsTab[i] == prefix) {
8672
11.8M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8673
93.9k
          return(NULL);
8674
11.7M
      return(ctxt->nsTab[i + 1]);
8675
11.8M
  }
8676
7.54M
    return(NULL);
8677
19.3M
}
8678
8679
/**
8680
 * xmlParseQName:
8681
 * @ctxt:  an XML parser context
8682
 * @prefix:  pointer to store the prefix part
8683
 *
8684
 * parse an XML Namespace QName
8685
 *
8686
 * [6]  QName  ::= (Prefix ':')? LocalPart
8687
 * [7]  Prefix  ::= NCName
8688
 * [8]  LocalPart  ::= NCName
8689
 *
8690
 * Returns the Name parsed or NULL
8691
 */
8692
8693
static const xmlChar *
8694
29.9M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8695
29.9M
    const xmlChar *l, *p;
8696
8697
29.9M
    GROW;
8698
8699
29.9M
    l = xmlParseNCName(ctxt);
8700
29.9M
    if (l == NULL) {
8701
3.37M
        if (CUR == ':') {
8702
326k
      l = xmlParseName(ctxt);
8703
326k
      if (l != NULL) {
8704
325k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8705
325k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8706
325k
    *prefix = NULL;
8707
325k
    return(l);
8708
325k
      }
8709
326k
  }
8710
3.04M
        return(NULL);
8711
3.37M
    }
8712
26.6M
    if (CUR == ':') {
8713
1.63M
        NEXT;
8714
1.63M
  p = l;
8715
1.63M
  l = xmlParseNCName(ctxt);
8716
1.63M
  if (l == NULL) {
8717
106k
      xmlChar *tmp;
8718
8719
106k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8720
106k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8721
106k
      l = xmlParseNmtoken(ctxt);
8722
106k
      if (l == NULL)
8723
76.0k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8724
30.7k
      else {
8725
30.7k
    tmp = xmlBuildQName(l, p, NULL, 0);
8726
30.7k
    xmlFree((char *)l);
8727
30.7k
      }
8728
106k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8729
106k
      if (tmp != NULL) xmlFree(tmp);
8730
106k
      *prefix = NULL;
8731
106k
      return(p);
8732
106k
  }
8733
1.52M
  if (CUR == ':') {
8734
239k
      xmlChar *tmp;
8735
8736
239k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8737
239k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8738
239k
      NEXT;
8739
239k
      tmp = (xmlChar *) xmlParseName(ctxt);
8740
239k
      if (tmp != NULL) {
8741
225k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8742
225k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8743
225k
    if (tmp != NULL) xmlFree(tmp);
8744
225k
    *prefix = p;
8745
225k
    return(l);
8746
225k
      }
8747
14.6k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8748
14.6k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8749
14.6k
      if (tmp != NULL) xmlFree(tmp);
8750
14.6k
      *prefix = p;
8751
14.6k
      return(l);
8752
239k
  }
8753
1.28M
  *prefix = p;
8754
1.28M
    } else
8755
24.9M
        *prefix = NULL;
8756
26.2M
    return(l);
8757
26.6M
}
8758
8759
/**
8760
 * xmlParseQNameAndCompare:
8761
 * @ctxt:  an XML parser context
8762
 * @name:  the localname
8763
 * @prefix:  the prefix, if any.
8764
 *
8765
 * parse an XML name and compares for match
8766
 * (specialized for endtag parsing)
8767
 *
8768
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8769
 * and the name for mismatch
8770
 */
8771
8772
static const xmlChar *
8773
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8774
224k
                        xmlChar const *prefix) {
8775
224k
    const xmlChar *cmp;
8776
224k
    const xmlChar *in;
8777
224k
    const xmlChar *ret;
8778
224k
    const xmlChar *prefix2;
8779
8780
224k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8781
8782
224k
    GROW;
8783
224k
    in = ctxt->input->cur;
8784
8785
224k
    cmp = prefix;
8786
581k
    while (*in != 0 && *in == *cmp) {
8787
356k
  ++in;
8788
356k
  ++cmp;
8789
356k
    }
8790
224k
    if ((*cmp == 0) && (*in == ':')) {
8791
116k
        in++;
8792
116k
  cmp = name;
8793
905k
  while (*in != 0 && *in == *cmp) {
8794
788k
      ++in;
8795
788k
      ++cmp;
8796
788k
  }
8797
116k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8798
      /* success */
8799
85.4k
      ctxt->input->cur = in;
8800
85.4k
      return((const xmlChar*) 1);
8801
85.4k
  }
8802
116k
    }
8803
    /*
8804
     * all strings coms from the dictionary, equality can be done directly
8805
     */
8806
139k
    ret = xmlParseQName (ctxt, &prefix2);
8807
139k
    if ((ret == name) && (prefix == prefix2))
8808
17.8k
  return((const xmlChar*) 1);
8809
121k
    return ret;
8810
139k
}
8811
8812
/**
8813
 * xmlParseAttValueInternal:
8814
 * @ctxt:  an XML parser context
8815
 * @len:  attribute len result
8816
 * @alloc:  whether the attribute was reallocated as a new string
8817
 * @normalize:  if 1 then further non-CDATA normalization must be done
8818
 *
8819
 * parse a value for an attribute.
8820
 * NOTE: if no normalization is needed, the routine will return pointers
8821
 *       directly from the data buffer.
8822
 *
8823
 * 3.3.3 Attribute-Value Normalization:
8824
 * Before the value of an attribute is passed to the application or
8825
 * checked for validity, the XML processor must normalize it as follows:
8826
 * - a character reference is processed by appending the referenced
8827
 *   character to the attribute value
8828
 * - an entity reference is processed by recursively processing the
8829
 *   replacement text of the entity
8830
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8831
 *   appending #x20 to the normalized value, except that only a single
8832
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8833
 *   parsed entity or the literal entity value of an internal parsed entity
8834
 * - other characters are processed by appending them to the normalized value
8835
 * If the declared value is not CDATA, then the XML processor must further
8836
 * process the normalized attribute value by discarding any leading and
8837
 * trailing space (#x20) characters, and by replacing sequences of space
8838
 * (#x20) characters by a single space (#x20) character.
8839
 * All attributes for which no declaration has been read should be treated
8840
 * by a non-validating parser as if declared CDATA.
8841
 *
8842
 * Returns the AttValue parsed or NULL. The value has to be freed by the
8843
 *     caller if it was copied, this can be detected by val[*len] == 0.
8844
 */
8845
8846
static xmlChar *
8847
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8848
                         int normalize)
8849
7.66M
{
8850
7.66M
    xmlChar limit = 0;
8851
7.66M
    const xmlChar *in = NULL, *start, *end, *last;
8852
7.66M
    xmlChar *ret = NULL;
8853
7.66M
    int line, col;
8854
8855
7.66M
    GROW;
8856
7.66M
    in = (xmlChar *) CUR_PTR;
8857
7.66M
    line = ctxt->input->line;
8858
7.66M
    col = ctxt->input->col;
8859
7.66M
    if (*in != '"' && *in != '\'') {
8860
242k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8861
242k
        return (NULL);
8862
242k
    }
8863
7.42M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8864
8865
    /*
8866
     * try to handle in this routine the most common case where no
8867
     * allocation of a new string is required and where content is
8868
     * pure ASCII.
8869
     */
8870
7.42M
    limit = *in++;
8871
7.42M
    col++;
8872
7.42M
    end = ctxt->input->end;
8873
7.42M
    start = in;
8874
7.42M
    if (in >= end) {
8875
991
        const xmlChar *oldbase = ctxt->input->base;
8876
991
  GROW;
8877
991
  if (oldbase != ctxt->input->base) {
8878
0
      long delta = ctxt->input->base - oldbase;
8879
0
      start = start + delta;
8880
0
      in = in + delta;
8881
0
  }
8882
991
  end = ctxt->input->end;
8883
991
    }
8884
7.42M
    if (normalize) {
8885
        /*
8886
   * Skip any leading spaces
8887
   */
8888
334k
  while ((in < end) && (*in != limit) &&
8889
329k
         ((*in == 0x20) || (*in == 0x9) ||
8890
272k
          (*in == 0xA) || (*in == 0xD))) {
8891
253k
      if (*in == 0xA) {
8892
187k
          line++; col = 1;
8893
187k
      } else {
8894
65.9k
          col++;
8895
65.9k
      }
8896
253k
      in++;
8897
253k
      start = in;
8898
253k
      if (in >= end) {
8899
334
    const xmlChar *oldbase = ctxt->input->base;
8900
334
    GROW;
8901
334
                if (ctxt->instate == XML_PARSER_EOF)
8902
0
                    return(NULL);
8903
334
    if (oldbase != ctxt->input->base) {
8904
0
        long delta = ctxt->input->base - oldbase;
8905
0
        start = start + delta;
8906
0
        in = in + delta;
8907
0
    }
8908
334
    end = ctxt->input->end;
8909
334
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8910
0
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8911
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8912
0
                                   "AttValue length too long\n");
8913
0
                    return(NULL);
8914
0
                }
8915
334
      }
8916
253k
  }
8917
587k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8918
563k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8919
511k
      col++;
8920
511k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
8921
506k
      if (in >= end) {
8922
689
    const xmlChar *oldbase = ctxt->input->base;
8923
689
    GROW;
8924
689
                if (ctxt->instate == XML_PARSER_EOF)
8925
0
                    return(NULL);
8926
689
    if (oldbase != ctxt->input->base) {
8927
0
        long delta = ctxt->input->base - oldbase;
8928
0
        start = start + delta;
8929
0
        in = in + delta;
8930
0
    }
8931
689
    end = ctxt->input->end;
8932
689
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8933
0
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8934
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8935
0
                                   "AttValue length too long\n");
8936
0
                    return(NULL);
8937
0
                }
8938
689
      }
8939
506k
  }
8940
81.1k
  last = in;
8941
  /*
8942
   * skip the trailing blanks
8943
   */
8944
90.2k
  while ((last[-1] == 0x20) && (last > start)) last--;
8945
191k
  while ((in < end) && (*in != limit) &&
8946
176k
         ((*in == 0x20) || (*in == 0x9) ||
8947
138k
          (*in == 0xA) || (*in == 0xD))) {
8948
110k
      if (*in == 0xA) {
8949
70.6k
          line++, col = 1;
8950
70.6k
      } else {
8951
39.5k
          col++;
8952
39.5k
      }
8953
110k
      in++;
8954
110k
      if (in >= end) {
8955
654
    const xmlChar *oldbase = ctxt->input->base;
8956
654
    GROW;
8957
654
                if (ctxt->instate == XML_PARSER_EOF)
8958
0
                    return(NULL);
8959
654
    if (oldbase != ctxt->input->base) {
8960
0
        long delta = ctxt->input->base - oldbase;
8961
0
        start = start + delta;
8962
0
        in = in + delta;
8963
0
        last = last + delta;
8964
0
    }
8965
654
    end = ctxt->input->end;
8966
654
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8967
0
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8968
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8969
0
                                   "AttValue length too long\n");
8970
0
                    return(NULL);
8971
0
                }
8972
654
      }
8973
110k
  }
8974
81.1k
        if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8975
0
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8976
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8977
0
                           "AttValue length too long\n");
8978
0
            return(NULL);
8979
0
        }
8980
81.1k
  if (*in != limit) goto need_complex;
8981
7.33M
    } else {
8982
95.6M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8983
89.1M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8984
88.3M
      in++;
8985
88.3M
      col++;
8986
88.3M
      if (in >= end) {
8987
22.3k
    const xmlChar *oldbase = ctxt->input->base;
8988
22.3k
    GROW;
8989
22.3k
                if (ctxt->instate == XML_PARSER_EOF)
8990
0
                    return(NULL);
8991
22.3k
    if (oldbase != ctxt->input->base) {
8992
0
        long delta = ctxt->input->base - oldbase;
8993
0
        start = start + delta;
8994
0
        in = in + delta;
8995
0
    }
8996
22.3k
    end = ctxt->input->end;
8997
22.3k
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8998
0
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8999
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9000
0
                                   "AttValue length too long\n");
9001
0
                    return(NULL);
9002
0
                }
9003
22.3k
      }
9004
88.3M
  }
9005
7.33M
  last = in;
9006
7.33M
        if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9007
0
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9008
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9009
0
                           "AttValue length too long\n");
9010
0
            return(NULL);
9011
0
        }
9012
7.33M
  if (*in != limit) goto need_complex;
9013
7.33M
    }
9014
6.19M
    in++;
9015
6.19M
    col++;
9016
6.19M
    if (len != NULL) {
9017
6.10M
        *len = last - start;
9018
6.10M
        ret = (xmlChar *) start;
9019
6.10M
    } else {
9020
89.3k
        if (alloc) *alloc = 1;
9021
89.3k
        ret = xmlStrndup(start, last - start);
9022
89.3k
    }
9023
6.19M
    CUR_PTR = in;
9024
6.19M
    ctxt->input->line = line;
9025
6.19M
    ctxt->input->col = col;
9026
6.19M
    if (alloc) *alloc = 0;
9027
6.19M
    return ret;
9028
1.22M
need_complex:
9029
1.22M
    if (alloc) *alloc = 1;
9030
1.22M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9031
7.42M
}
9032
9033
/**
9034
 * xmlParseAttribute2:
9035
 * @ctxt:  an XML parser context
9036
 * @pref:  the element prefix
9037
 * @elem:  the element name
9038
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9039
 * @value:  a xmlChar ** used to store the value of the attribute
9040
 * @len:  an int * to save the length of the attribute
9041
 * @alloc:  an int * to indicate if the attribute was allocated
9042
 *
9043
 * parse an attribute in the new SAX2 framework.
9044
 *
9045
 * Returns the attribute name, and the value in *value, .
9046
 */
9047
9048
static const xmlChar *
9049
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9050
                   const xmlChar * pref, const xmlChar * elem,
9051
                   const xmlChar ** prefix, xmlChar ** value,
9052
                   int *len, int *alloc)
9053
10.9M
{
9054
10.9M
    const xmlChar *name;
9055
10.9M
    xmlChar *val, *internal_val = NULL;
9056
10.9M
    int normalize = 0;
9057
9058
10.9M
    *value = NULL;
9059
10.9M
    GROW;
9060
10.9M
    name = xmlParseQName(ctxt, prefix);
9061
10.9M
    if (name == NULL) {
9062
2.76M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9063
2.76M
                       "error parsing attribute name\n");
9064
2.76M
        return (NULL);
9065
2.76M
    }
9066
9067
    /*
9068
     * get the type if needed
9069
     */
9070
8.18M
    if (ctxt->attsSpecial != NULL) {
9071
466k
        int type;
9072
9073
466k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9074
466k
                                                 pref, elem, *prefix, name);
9075
466k
        if (type != 0)
9076
85.9k
            normalize = 1;
9077
466k
    }
9078
9079
    /*
9080
     * read the value
9081
     */
9082
8.18M
    SKIP_BLANKS;
9083
8.18M
    if (RAW == '=') {
9084
7.45M
        NEXT;
9085
7.45M
        SKIP_BLANKS;
9086
7.45M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9087
7.45M
  if (normalize) {
9088
      /*
9089
       * Sometimes a second normalisation pass for spaces is needed
9090
       * but that only happens if charrefs or entities refernces
9091
       * have been used in the attribute value, i.e. the attribute
9092
       * value have been extracted in an allocated string already.
9093
       */
9094
83.0k
      if (*alloc) {
9095
67.6k
          const xmlChar *val2;
9096
9097
67.6k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9098
67.6k
    if ((val2 != NULL) && (val2 != val)) {
9099
8.48k
        xmlFree(val);
9100
8.48k
        val = (xmlChar *) val2;
9101
8.48k
    }
9102
67.6k
      }
9103
83.0k
  }
9104
7.45M
        ctxt->instate = XML_PARSER_CONTENT;
9105
7.45M
    } else {
9106
733k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9107
733k
                          "Specification mandates value for attribute %s\n",
9108
733k
                          name);
9109
733k
        return (NULL);
9110
733k
    }
9111
9112
7.45M
    if (*prefix == ctxt->str_xml) {
9113
        /*
9114
         * Check that xml:lang conforms to the specification
9115
         * No more registered as an error, just generate a warning now
9116
         * since this was deprecated in XML second edition
9117
         */
9118
254k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9119
0
            internal_val = xmlStrndup(val, *len);
9120
0
            if (!xmlCheckLanguageID(internal_val)) {
9121
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9122
0
                              "Malformed value for xml:lang : %s\n",
9123
0
                              internal_val, NULL);
9124
0
            }
9125
0
        }
9126
9127
        /*
9128
         * Check that xml:space conforms to the specification
9129
         */
9130
254k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9131
13.1k
            internal_val = xmlStrndup(val, *len);
9132
13.1k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9133
1.00k
                *(ctxt->space) = 0;
9134
12.1k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9135
7.35k
                *(ctxt->space) = 1;
9136
4.82k
            else {
9137
4.82k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9138
4.82k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9139
4.82k
                              internal_val, NULL);
9140
4.82k
            }
9141
13.1k
        }
9142
254k
        if (internal_val) {
9143
10.9k
            xmlFree(internal_val);
9144
10.9k
        }
9145
254k
    }
9146
9147
7.45M
    *value = val;
9148
7.45M
    return (name);
9149
8.18M
}
9150
/**
9151
 * xmlParseStartTag2:
9152
 * @ctxt:  an XML parser context
9153
 *
9154
 * parse a start of tag either for rule element or
9155
 * EmptyElement. In both case we don't parse the tag closing chars.
9156
 * This routine is called when running SAX2 parsing
9157
 *
9158
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9159
 *
9160
 * [ WFC: Unique Att Spec ]
9161
 * No attribute name may appear more than once in the same start-tag or
9162
 * empty-element tag.
9163
 *
9164
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9165
 *
9166
 * [ WFC: Unique Att Spec ]
9167
 * No attribute name may appear more than once in the same start-tag or
9168
 * empty-element tag.
9169
 *
9170
 * With namespace:
9171
 *
9172
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9173
 *
9174
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9175
 *
9176
 * Returns the element name parsed
9177
 */
9178
9179
static const xmlChar *
9180
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9181
18.9M
                  const xmlChar **URI, int *tlen) {
9182
18.9M
    const xmlChar *localname;
9183
18.9M
    const xmlChar *prefix;
9184
18.9M
    const xmlChar *attname;
9185
18.9M
    const xmlChar *aprefix;
9186
18.9M
    const xmlChar *nsname;
9187
18.9M
    xmlChar *attvalue;
9188
18.9M
    const xmlChar **atts = ctxt->atts;
9189
18.9M
    int maxatts = ctxt->maxatts;
9190
18.9M
    int nratts, nbatts, nbdef, inputid;
9191
18.9M
    int i, j, nbNs, attval;
9192
18.9M
    unsigned long cur;
9193
18.9M
    int nsNr = ctxt->nsNr;
9194
9195
18.9M
    if (RAW != '<') return(NULL);
9196
18.9M
    NEXT1;
9197
9198
    /*
9199
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9200
     *       point since the attribute values may be stored as pointers to
9201
     *       the buffer and calling SHRINK would destroy them !
9202
     *       The Shrinking is only possible once the full set of attribute
9203
     *       callbacks have been done.
9204
     */
9205
18.9M
    SHRINK;
9206
18.9M
    cur = ctxt->input->cur - ctxt->input->base;
9207
18.9M
    inputid = ctxt->input->id;
9208
18.9M
    nbatts = 0;
9209
18.9M
    nratts = 0;
9210
18.9M
    nbdef = 0;
9211
18.9M
    nbNs = 0;
9212
18.9M
    attval = 0;
9213
    /* Forget any namespaces added during an earlier parse of this element. */
9214
18.9M
    ctxt->nsNr = nsNr;
9215
9216
18.9M
    localname = xmlParseQName(ctxt, &prefix);
9217
18.9M
    if (localname == NULL) {
9218
277k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9219
277k
           "StartTag: invalid element name\n");
9220
277k
        return(NULL);
9221
277k
    }
9222
18.6M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9223
9224
    /*
9225
     * Now parse the attributes, it ends up with the ending
9226
     *
9227
     * (S Attribute)* S?
9228
     */
9229
18.6M
    SKIP_BLANKS;
9230
18.6M
    GROW;
9231
9232
21.6M
    while (((RAW != '>') &&
9233
14.1M
     ((RAW != '/') || (NXT(1) != '>')) &&
9234
11.1M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9235
10.9M
  const xmlChar *q = CUR_PTR;
9236
10.9M
  unsigned int cons = ctxt->input->consumed;
9237
10.9M
  int len = -1, alloc = 0;
9238
9239
10.9M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9240
10.9M
                               &aprefix, &attvalue, &len, &alloc);
9241
10.9M
        if ((attname == NULL) || (attvalue == NULL))
9242
3.71M
            goto next_attr;
9243
7.23M
  if (len < 0) len = xmlStrlen(attvalue);
9244
9245
7.23M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9246
492k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9247
492k
            xmlURIPtr uri;
9248
9249
492k
            if (URL == NULL) {
9250
1
                xmlErrMemory(ctxt, "dictionary allocation failure");
9251
1
                if ((attvalue != NULL) && (alloc != 0))
9252
1
                    xmlFree(attvalue);
9253
1
                return(NULL);
9254
1
            }
9255
492k
            if (*URL != 0) {
9256
478k
                uri = xmlParseURI((const char *) URL);
9257
478k
                if (uri == NULL) {
9258
292k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9259
292k
                             "xmlns: '%s' is not a valid URI\n",
9260
292k
                                       URL, NULL, NULL);
9261
292k
                } else {
9262
185k
                    if (uri->scheme == NULL) {
9263
85.2k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9264
85.2k
                                  "xmlns: URI %s is not absolute\n",
9265
85.2k
                                  URL, NULL, NULL);
9266
85.2k
                    }
9267
185k
                    xmlFreeURI(uri);
9268
185k
                }
9269
478k
                if (URL == ctxt->str_xml_ns) {
9270
794
                    if (attname != ctxt->str_xml) {
9271
794
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9272
794
                     "xml namespace URI cannot be the default namespace\n",
9273
794
                                 NULL, NULL, NULL);
9274
794
                    }
9275
794
                    goto next_attr;
9276
794
                }
9277
477k
                if ((len == 29) &&
9278
7.99k
                    (xmlStrEqual(URL,
9279
7.99k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9280
2.60k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9281
2.60k
                         "reuse of the xmlns namespace name is forbidden\n",
9282
2.60k
                             NULL, NULL, NULL);
9283
2.60k
                    goto next_attr;
9284
2.60k
                }
9285
477k
            }
9286
            /*
9287
             * check that it's not a defined namespace
9288
             */
9289
513k
            for (j = 1;j <= nbNs;j++)
9290
48.2k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9291
23.7k
                    break;
9292
488k
            if (j <= nbNs)
9293
23.7k
                xmlErrAttributeDup(ctxt, NULL, attname);
9294
464k
            else
9295
464k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9296
9297
6.74M
        } else if (aprefix == ctxt->str_xmlns) {
9298
187k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9299
187k
            xmlURIPtr uri;
9300
9301
187k
            if (attname == ctxt->str_xml) {
9302
3.08k
                if (URL != ctxt->str_xml_ns) {
9303
2.49k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9304
2.49k
                             "xml namespace prefix mapped to wrong URI\n",
9305
2.49k
                             NULL, NULL, NULL);
9306
2.49k
                }
9307
                /*
9308
                 * Do not keep a namespace definition node
9309
                 */
9310
3.08k
                goto next_attr;
9311
3.08k
            }
9312
184k
            if (URL == ctxt->str_xml_ns) {
9313
1.54k
                if (attname != ctxt->str_xml) {
9314
1.54k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9315
1.54k
                             "xml namespace URI mapped to wrong prefix\n",
9316
1.54k
                             NULL, NULL, NULL);
9317
1.54k
                }
9318
1.54k
                goto next_attr;
9319
1.54k
            }
9320
182k
            if (attname == ctxt->str_xmlns) {
9321
3.67k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9322
3.67k
                         "redefinition of the xmlns prefix is forbidden\n",
9323
3.67k
                         NULL, NULL, NULL);
9324
3.67k
                goto next_attr;
9325
3.67k
            }
9326
179k
            if ((len == 29) &&
9327
5.20k
                (xmlStrEqual(URL,
9328
5.20k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9329
1.28k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9330
1.28k
                         "reuse of the xmlns namespace name is forbidden\n",
9331
1.28k
                         NULL, NULL, NULL);
9332
1.28k
                goto next_attr;
9333
1.28k
            }
9334
177k
            if ((URL == NULL) || (URL[0] == 0)) {
9335
4.62k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9336
4.62k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9337
4.62k
                              attname, NULL, NULL);
9338
4.62k
                goto next_attr;
9339
173k
            } else {
9340
173k
                uri = xmlParseURI((const char *) URL);
9341
173k
                if (uri == NULL) {
9342
88.7k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9343
88.7k
                         "xmlns:%s: '%s' is not a valid URI\n",
9344
88.7k
                                       attname, URL, NULL);
9345
88.7k
                } else {
9346
84.4k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9347
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9348
0
                                  "xmlns:%s: URI %s is not absolute\n",
9349
0
                                  attname, URL, NULL);
9350
0
                    }
9351
84.4k
                    xmlFreeURI(uri);
9352
84.4k
                }
9353
173k
            }
9354
9355
            /*
9356
             * check that it's not a defined namespace
9357
             */
9358
251k
            for (j = 1;j <= nbNs;j++)
9359
92.9k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9360
15.0k
                    break;
9361
173k
            if (j <= nbNs)
9362
15.0k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9363
158k
            else
9364
158k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9365
9366
6.55M
        } else {
9367
            /*
9368
             * Add the pair to atts
9369
             */
9370
6.55M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9371
115k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9372
0
                    goto next_attr;
9373
0
                }
9374
115k
                maxatts = ctxt->maxatts;
9375
115k
                atts = ctxt->atts;
9376
115k
            }
9377
6.55M
            ctxt->attallocs[nratts++] = alloc;
9378
6.55M
            atts[nbatts++] = attname;
9379
6.55M
            atts[nbatts++] = aprefix;
9380
            /*
9381
             * The namespace URI field is used temporarily to point at the
9382
             * base of the current input buffer for non-alloced attributes.
9383
             * When the input buffer is reallocated, all the pointers become
9384
             * invalid, but they can be reconstructed later.
9385
             */
9386
6.55M
            if (alloc)
9387
740k
                atts[nbatts++] = NULL;
9388
5.81M
            else
9389
5.81M
                atts[nbatts++] = ctxt->input->base;
9390
6.55M
            atts[nbatts++] = attvalue;
9391
6.55M
            attvalue += len;
9392
6.55M
            atts[nbatts++] = attvalue;
9393
            /*
9394
             * tag if some deallocation is needed
9395
             */
9396
6.55M
            if (alloc != 0) attval = 1;
9397
6.55M
            attvalue = NULL; /* moved into atts */
9398
6.55M
        }
9399
9400
10.9M
next_attr:
9401
10.9M
        if ((attvalue != NULL) && (alloc != 0)) {
9402
392k
            xmlFree(attvalue);
9403
392k
            attvalue = NULL;
9404
392k
        }
9405
9406
10.9M
  GROW
9407
10.9M
        if (ctxt->instate == XML_PARSER_EOF)
9408
2
            break;
9409
10.9M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9410
3.29M
      break;
9411
7.65M
  if (SKIP_BLANKS == 0) {
9412
4.64M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9413
4.64M
         "attributes construct error\n");
9414
4.64M
      break;
9415
4.64M
  }
9416
3.00M
        if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9417
0
            (attname == NULL) && (attvalue == NULL)) {
9418
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9419
0
           "xmlParseStartTag: problem parsing attributes\n");
9420
0
      break;
9421
0
  }
9422
3.00M
        GROW;
9423
3.00M
    }
9424
9425
18.6M
    if (ctxt->input->id != inputid) {
9426
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9427
0
                    "Unexpected change of input\n");
9428
0
        localname = NULL;
9429
0
        goto done;
9430
0
    }
9431
9432
    /* Reconstruct attribute value pointers. */
9433
25.1M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9434
6.55M
        if (atts[i+2] != NULL) {
9435
            /*
9436
             * Arithmetic on dangling pointers is technically undefined
9437
             * behavior, but well...
9438
             */
9439
5.81M
            ptrdiff_t offset = ctxt->input->base - atts[i+2];
9440
5.81M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9441
5.81M
            atts[i+3] += offset;  /* value */
9442
5.81M
            atts[i+4] += offset;  /* valuend */
9443
5.81M
        }
9444
6.55M
    }
9445
9446
    /*
9447
     * The attributes defaulting
9448
     */
9449
18.6M
    if (ctxt->attsDefault != NULL) {
9450
852k
        xmlDefAttrsPtr defaults;
9451
9452
852k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9453
852k
  if (defaults != NULL) {
9454
928k
      for (i = 0;i < defaults->nbAttrs;i++) {
9455
680k
          attname = defaults->values[5 * i];
9456
680k
    aprefix = defaults->values[5 * i + 1];
9457
9458
                /*
9459
     * special work for namespaces defaulted defs
9460
     */
9461
680k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9462
        /*
9463
         * check that it's not a defined namespace
9464
         */
9465
134k
        for (j = 1;j <= nbNs;j++)
9466
59.9k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9467
25.5k
          break;
9468
100k
              if (j <= nbNs) continue;
9469
9470
74.5k
        nsname = xmlGetNamespace(ctxt, NULL);
9471
74.5k
        if (nsname != defaults->values[5 * i + 2]) {
9472
17.4k
      if (nsPush(ctxt, NULL,
9473
17.4k
                 defaults->values[5 * i + 2]) > 0)
9474
17.4k
          nbNs++;
9475
17.4k
        }
9476
580k
    } else if (aprefix == ctxt->str_xmlns) {
9477
        /*
9478
         * check that it's not a defined namespace
9479
         */
9480
442k
        for (j = 1;j <= nbNs;j++)
9481
198k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9482
11.0k
          break;
9483
255k
              if (j <= nbNs) continue;
9484
9485
244k
        nsname = xmlGetNamespace(ctxt, attname);
9486
244k
        if (nsname != defaults->values[2]) {
9487
229k
      if (nsPush(ctxt, attname,
9488
229k
                 defaults->values[5 * i + 2]) > 0)
9489
229k
          nbNs++;
9490
229k
        }
9491
324k
    } else {
9492
        /*
9493
         * check that it's not a defined attribute
9494
         */
9495
1.05M
        for (j = 0;j < nbatts;j+=5) {
9496
755k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9497
23.1k
          break;
9498
755k
        }
9499
324k
        if (j < nbatts) continue;
9500
9501
301k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9502
3.82k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9503
0
          return(NULL);
9504
0
      }
9505
3.82k
      maxatts = ctxt->maxatts;
9506
3.82k
      atts = ctxt->atts;
9507
3.82k
        }
9508
301k
        atts[nbatts++] = attname;
9509
301k
        atts[nbatts++] = aprefix;
9510
301k
        if (aprefix == NULL)
9511
156k
      atts[nbatts++] = NULL;
9512
144k
        else
9513
144k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9514
301k
        atts[nbatts++] = defaults->values[5 * i + 2];
9515
301k
        atts[nbatts++] = defaults->values[5 * i + 3];
9516
301k
        if ((ctxt->standalone == 1) &&
9517
3.26k
            (defaults->values[5 * i + 4] != NULL)) {
9518
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9519
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9520
0
                                   attname, localname);
9521
0
        }
9522
301k
        nbdef++;
9523
301k
    }
9524
680k
      }
9525
248k
  }
9526
852k
    }
9527
9528
    /*
9529
     * The attributes checkings
9530
     */
9531
25.4M
    for (i = 0; i < nbatts;i += 5) {
9532
        /*
9533
  * The default namespace does not apply to attribute names.
9534
  */
9535
6.85M
  if (atts[i + 1] != NULL) {
9536
638k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9537
638k
      if (nsname == NULL) {
9538
220k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9539
220k
        "Namespace prefix %s for %s on %s is not defined\n",
9540
220k
        atts[i + 1], atts[i], localname);
9541
220k
      }
9542
638k
      atts[i + 2] = nsname;
9543
638k
  } else
9544
6.22M
      nsname = NULL;
9545
  /*
9546
   * [ WFC: Unique Att Spec ]
9547
   * No attribute name may appear more than once in the same
9548
   * start-tag or empty-element tag.
9549
   * As extended by the Namespace in XML REC.
9550
   */
9551
11.9M
        for (j = 0; j < i;j += 5) {
9552
5.12M
      if (atts[i] == atts[j]) {
9553
130k
          if (atts[i+1] == atts[j+1]) {
9554
67.9k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9555
67.9k
        break;
9556
67.9k
    }
9557
62.8k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9558
6.51k
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9559
6.51k
           "Namespaced Attribute %s in '%s' redefined\n",
9560
6.51k
           atts[i], nsname, NULL);
9561
6.51k
        break;
9562
6.51k
    }
9563
62.8k
      }
9564
5.12M
  }
9565
6.85M
    }
9566
9567
18.6M
    nsname = xmlGetNamespace(ctxt, prefix);
9568
18.6M
    if ((prefix != NULL) && (nsname == NULL)) {
9569
572k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9570
572k
           "Namespace prefix %s on %s is not defined\n",
9571
572k
     prefix, localname, NULL);
9572
572k
    }
9573
18.6M
    *pref = prefix;
9574
18.6M
    *URI = nsname;
9575
9576
    /*
9577
     * SAX: Start of Element !
9578
     */
9579
18.6M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9580
18.6M
  (!ctxt->disableSAX)) {
9581
17.5M
  if (nbNs > 0)
9582
523k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9583
523k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9584
523k
        nbatts / 5, nbdef, atts);
9585
16.9M
  else
9586
16.9M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9587
16.9M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9588
17.5M
    }
9589
9590
18.6M
done:
9591
    /*
9592
     * Free up attribute allocated strings if needed
9593
     */
9594
18.6M
    if (attval != 0) {
9595
1.59M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9596
887k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9597
740k
          xmlFree((xmlChar *) atts[i]);
9598
707k
    }
9599
9600
18.6M
    return(localname);
9601
18.6M
}
9602
9603
/**
9604
 * xmlParseEndTag2:
9605
 * @ctxt:  an XML parser context
9606
 * @line:  line of the start tag
9607
 * @nsNr:  number of namespaces on the start tag
9608
 *
9609
 * parse an end of tag
9610
 *
9611
 * [42] ETag ::= '</' Name S? '>'
9612
 *
9613
 * With namespace
9614
 *
9615
 * [NS 9] ETag ::= '</' QName S? '>'
9616
 */
9617
9618
static void
9619
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9620
7.42M
                const xmlChar *URI, int line, int nsNr, int tlen) {
9621
7.42M
    const xmlChar *name;
9622
7.42M
    size_t curLength;
9623
9624
7.42M
    GROW;
9625
7.42M
    if ((RAW != '<') || (NXT(1) != '/')) {
9626
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9627
0
  return;
9628
0
    }
9629
7.42M
    SKIP(2);
9630
9631
7.42M
    curLength = ctxt->input->end - ctxt->input->cur;
9632
7.42M
    if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9633
121k
        (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9634
53.1k
        if ((curLength >= (size_t)(tlen + 1)) &&
9635
52.7k
      (ctxt->input->cur[tlen] == '>')) {
9636
44.9k
      ctxt->input->cur += tlen + 1;
9637
44.9k
      ctxt->input->col += tlen + 1;
9638
44.9k
      goto done;
9639
44.9k
  }
9640
8.17k
  ctxt->input->cur += tlen;
9641
8.17k
  ctxt->input->col += tlen;
9642
8.17k
  name = (xmlChar*)1;
9643
7.37M
    } else {
9644
7.37M
  if (prefix == NULL)
9645
7.14M
      name = xmlParseNameAndCompare(ctxt, ctxt->name);
9646
224k
  else
9647
224k
      name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9648
7.37M
    }
9649
9650
    /*
9651
     * We should definitely be at the ending "S? '>'" part
9652
     */
9653
7.38M
    GROW;
9654
7.38M
    if (ctxt->instate == XML_PARSER_EOF)
9655
0
        return;
9656
7.38M
    SKIP_BLANKS;
9657
7.38M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9658
1.00M
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9659
1.00M
    } else
9660
6.37M
  NEXT1;
9661
9662
    /*
9663
     * [ WFC: Element Type Match ]
9664
     * The Name in an element's end-tag must match the element type in the
9665
     * start-tag.
9666
     *
9667
     */
9668
7.38M
    if (name != (xmlChar*)1) {
9669
2.35M
        if (name == NULL) name = BAD_CAST "unparseable";
9670
2.35M
        if ((line == 0) && (ctxt->node != NULL))
9671
2.16M
            line = ctxt->node->line;
9672
2.35M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9673
2.35M
         "Opening and ending tag mismatch: %s line %d and %s\n",
9674
2.35M
                    ctxt->name, line, name);
9675
2.35M
    }
9676
9677
    /*
9678
     * SAX: End of Tag
9679
     */
9680
7.42M
done:
9681
7.42M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9682
7.42M
  (!ctxt->disableSAX))
9683
7.30M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9684
9685
7.42M
    spacePop(ctxt);
9686
7.42M
    if (nsNr != 0)
9687
75.7k
  nsPop(ctxt, nsNr);
9688
7.42M
    return;
9689
7.38M
}
9690
9691
/**
9692
 * xmlParseCDSect:
9693
 * @ctxt:  an XML parser context
9694
 *
9695
 * Parse escaped pure raw content.
9696
 *
9697
 * [18] CDSect ::= CDStart CData CDEnd
9698
 *
9699
 * [19] CDStart ::= '<![CDATA['
9700
 *
9701
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9702
 *
9703
 * [21] CDEnd ::= ']]>'
9704
 */
9705
void
9706
47.7k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9707
47.7k
    xmlChar *buf = NULL;
9708
47.7k
    int len = 0;
9709
47.7k
    int size = XML_PARSER_BUFFER_SIZE;
9710
47.7k
    int r, rl;
9711
47.7k
    int s, sl;
9712
47.7k
    int cur, l;
9713
47.7k
    int count = 0;
9714
9715
    /* Check 2.6.0 was NXT(0) not RAW */
9716
47.7k
    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9717
47.7k
  SKIP(9);
9718
47.7k
    } else
9719
0
        return;
9720
9721
47.7k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9722
47.7k
    r = CUR_CHAR(rl);
9723
47.7k
    if (!IS_CHAR(r)) {
9724
186
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9725
186
  ctxt->instate = XML_PARSER_CONTENT;
9726
186
        return;
9727
186
    }
9728
47.5k
    NEXTL(rl);
9729
47.5k
    s = CUR_CHAR(sl);
9730
47.5k
    if (!IS_CHAR(s)) {
9731
929
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9732
929
  ctxt->instate = XML_PARSER_CONTENT;
9733
929
        return;
9734
929
    }
9735
46.5k
    NEXTL(sl);
9736
46.5k
    cur = CUR_CHAR(l);
9737
46.5k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9738
46.5k
    if (buf == NULL) {
9739
0
  xmlErrMemory(ctxt, NULL);
9740
0
  return;
9741
0
    }
9742
120M
    while (IS_CHAR(cur) &&
9743
120M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9744
120M
  if (len + 5 >= size) {
9745
40.6k
      xmlChar *tmp;
9746
9747
40.6k
            if ((size > XML_MAX_TEXT_LENGTH) &&
9748
0
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9749
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9750
0
                             "CData section too big found", NULL);
9751
0
                xmlFree (buf);
9752
0
                return;
9753
0
            }
9754
40.6k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9755
40.6k
      if (tmp == NULL) {
9756
0
          xmlFree(buf);
9757
0
    xmlErrMemory(ctxt, NULL);
9758
0
    return;
9759
0
      }
9760
40.6k
      buf = tmp;
9761
40.6k
      size *= 2;
9762
40.6k
  }
9763
120M
  COPY_BUF(rl,buf,len,r);
9764
120M
  r = s;
9765
120M
  rl = sl;
9766
120M
  s = cur;
9767
120M
  sl = l;
9768
120M
  count++;
9769
120M
  if (count > 50) {
9770
2.35M
      GROW;
9771
2.35M
            if (ctxt->instate == XML_PARSER_EOF) {
9772
1
    xmlFree(buf);
9773
1
    return;
9774
1
            }
9775
2.35M
      count = 0;
9776
2.35M
  }
9777
120M
  NEXTL(l);
9778
120M
  cur = CUR_CHAR(l);
9779
120M
    }
9780
46.5k
    buf[len] = 0;
9781
46.5k
    ctxt->instate = XML_PARSER_CONTENT;
9782
46.5k
    if (cur != '>') {
9783
6.17k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9784
6.17k
                       "CData section not finished\n%.50s\n", buf);
9785
6.17k
  xmlFree(buf);
9786
6.17k
        return;
9787
6.17k
    }
9788
40.4k
    NEXTL(l);
9789
9790
    /*
9791
     * OK the buffer is to be consumed as cdata.
9792
     */
9793
40.4k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9794
9.09k
  if (ctxt->sax->cdataBlock != NULL)
9795
9.09k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9796
0
  else if (ctxt->sax->characters != NULL)
9797
0
      ctxt->sax->characters(ctxt->userData, buf, len);
9798
9.09k
    }
9799
40.4k
    xmlFree(buf);
9800
40.4k
}
9801
9802
/**
9803
 * xmlParseContent:
9804
 * @ctxt:  an XML parser context
9805
 *
9806
 * Parse a content:
9807
 *
9808
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9809
 */
9810
9811
void
9812
1.51M
xmlParseContent(xmlParserCtxtPtr ctxt) {
9813
1.51M
    GROW;
9814
5.88M
    while ((RAW != 0) &&
9815
4.57M
     ((RAW != '<') || (NXT(1) != '/')) &&
9816
4.44M
     (ctxt->instate != XML_PARSER_EOF)) {
9817
4.37M
  const xmlChar *test = CUR_PTR;
9818
4.37M
  unsigned int cons = ctxt->input->consumed;
9819
4.37M
  const xmlChar *cur = ctxt->input->cur;
9820
9821
  /*
9822
   * First case : a Processing Instruction.
9823
   */
9824
4.37M
  if ((*cur == '<') && (cur[1] == '?')) {
9825
83.5k
      xmlParsePI(ctxt);
9826
83.5k
  }
9827
9828
  /*
9829
   * Second case : a CDSection
9830
   */
9831
  /* 2.6.0 test was *cur not RAW */
9832
4.28M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9833
47.7k
      xmlParseCDSect(ctxt);
9834
47.7k
  }
9835
9836
  /*
9837
   * Third case :  a comment
9838
   */
9839
4.24M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9840
318k
     (NXT(2) == '-') && (NXT(3) == '-')) {
9841
188k
      xmlParseComment(ctxt);
9842
188k
      ctxt->instate = XML_PARSER_CONTENT;
9843
188k
  }
9844
9845
  /*
9846
   * Fourth case :  a sub-element.
9847
   */
9848
4.05M
  else if (*cur == '<') {
9849
2.12M
      xmlParseElement(ctxt);
9850
2.12M
  }
9851
9852
  /*
9853
   * Fifth case : a reference. If if has not been resolved,
9854
   *    parsing returns it's Name, create the node
9855
   */
9856
9857
1.92M
  else if (*cur == '&') {
9858
479k
      xmlParseReference(ctxt);
9859
479k
  }
9860
9861
  /*
9862
   * Last case, text. Note that References are handled directly.
9863
   */
9864
1.44M
  else {
9865
1.44M
      xmlParseCharData(ctxt, 0);
9866
1.44M
  }
9867
9868
4.37M
  GROW;
9869
4.37M
  SHRINK;
9870
9871
4.37M
  if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9872
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9873
0
                  "detected an error in element content\n");
9874
0
      xmlHaltParser(ctxt);
9875
0
            break;
9876
0
  }
9877
4.37M
    }
9878
1.51M
}
9879
9880
/**
9881
 * xmlParseElement:
9882
 * @ctxt:  an XML parser context
9883
 *
9884
 * parse an XML element, this is highly recursive
9885
 *
9886
 * [39] element ::= EmptyElemTag | STag content ETag
9887
 *
9888
 * [ WFC: Element Type Match ]
9889
 * The Name in an element's end-tag must match the element type in the
9890
 * start-tag.
9891
 *
9892
 */
9893
9894
void
9895
2.12M
xmlParseElement(xmlParserCtxtPtr ctxt) {
9896
2.12M
    const xmlChar *name;
9897
2.12M
    const xmlChar *prefix = NULL;
9898
2.12M
    const xmlChar *URI = NULL;
9899
2.12M
    xmlParserNodeInfo node_info;
9900
2.12M
    int line, tlen = 0;
9901
2.12M
    xmlNodePtr ret;
9902
2.12M
    int nsNr = ctxt->nsNr;
9903
9904
2.12M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9905
587
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9906
587
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9907
587
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9908
587
        xmlParserMaxDepth);
9909
587
  xmlHaltParser(ctxt);
9910
587
  return;
9911
587
    }
9912
9913
    /* Capture start position */
9914
2.12M
    if (ctxt->record_info) {
9915
0
        node_info.begin_pos = ctxt->input->consumed +
9916
0
                          (CUR_PTR - ctxt->input->base);
9917
0
  node_info.begin_line = ctxt->input->line;
9918
0
    }
9919
9920
2.12M
    if (ctxt->spaceNr == 0)
9921
0
  spacePush(ctxt, -1);
9922
2.12M
    else if (*ctxt->space == -2)
9923
450k
  spacePush(ctxt, -1);
9924
1.67M
    else
9925
1.67M
  spacePush(ctxt, *ctxt->space);
9926
9927
2.12M
    line = ctxt->input->line;
9928
2.12M
#ifdef LIBXML_SAX1_ENABLED
9929
2.12M
    if (ctxt->sax2)
9930
2.12M
#endif /* LIBXML_SAX1_ENABLED */
9931
2.12M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9932
0
#ifdef LIBXML_SAX1_ENABLED
9933
0
    else
9934
0
  name = xmlParseStartTag(ctxt);
9935
2.12M
#endif /* LIBXML_SAX1_ENABLED */
9936
2.12M
    if (ctxt->instate == XML_PARSER_EOF)
9937
16
  return;
9938
2.12M
    if (name == NULL) {
9939
271k
  spacePop(ctxt);
9940
271k
        return;
9941
271k
    }
9942
1.85M
    namePush(ctxt, name);
9943
1.85M
    ret = ctxt->node;
9944
9945
1.85M
#ifdef LIBXML_VALID_ENABLED
9946
    /*
9947
     * [ VC: Root Element Type ]
9948
     * The Name in the document type declaration must match the element
9949
     * type of the root element.
9950
     */
9951
1.85M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9952
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9953
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9954
1.85M
#endif /* LIBXML_VALID_ENABLED */
9955
9956
    /*
9957
     * Check for an Empty Element.
9958
     */
9959
1.85M
    if ((RAW == '/') && (NXT(1) == '>')) {
9960
57.2k
        SKIP(2);
9961
57.2k
  if (ctxt->sax2) {
9962
57.2k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9963
57.2k
    (!ctxt->disableSAX))
9964
24.5k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9965
57.2k
#ifdef LIBXML_SAX1_ENABLED
9966
57.2k
  } else {
9967
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9968
0
    (!ctxt->disableSAX))
9969
0
    ctxt->sax->endElement(ctxt->userData, name);
9970
0
#endif /* LIBXML_SAX1_ENABLED */
9971
0
  }
9972
57.2k
  namePop(ctxt);
9973
57.2k
  spacePop(ctxt);
9974
57.2k
  if (nsNr != ctxt->nsNr)
9975
5.31k
      nsPop(ctxt, ctxt->nsNr - nsNr);
9976
57.2k
  if ( ret != NULL && ctxt->record_info ) {
9977
0
     node_info.end_pos = ctxt->input->consumed +
9978
0
            (CUR_PTR - ctxt->input->base);
9979
0
     node_info.end_line = ctxt->input->line;
9980
0
     node_info.node = ret;
9981
0
     xmlParserAddNodeInfo(ctxt, &node_info);
9982
0
  }
9983
57.2k
  return;
9984
57.2k
    }
9985
1.79M
    if (RAW == '>') {
9986
1.32M
        NEXT1;
9987
1.32M
    } else {
9988
466k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9989
466k
         "Couldn't find end of Start Tag %s line %d\n",
9990
466k
                    name, line, NULL);
9991
9992
  /*
9993
   * end of parsing of this node.
9994
   */
9995
466k
  nodePop(ctxt);
9996
466k
  namePop(ctxt);
9997
466k
  spacePop(ctxt);
9998
466k
  if (nsNr != ctxt->nsNr)
9999
139k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10000
10001
  /*
10002
   * Capture end position and add node
10003
   */
10004
466k
  if ( ret != NULL && ctxt->record_info ) {
10005
0
     node_info.end_pos = ctxt->input->consumed +
10006
0
            (CUR_PTR - ctxt->input->base);
10007
0
     node_info.end_line = ctxt->input->line;
10008
0
     node_info.node = ret;
10009
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10010
0
  }
10011
466k
  return;
10012
466k
    }
10013
10014
    /*
10015
     * Parse the content of the element:
10016
     */
10017
1.32M
    xmlParseContent(ctxt);
10018
1.32M
    if (ctxt->instate == XML_PARSER_EOF)
10019
154k
  return;
10020
1.17M
    if (!IS_BYTE_CHAR(RAW)) {
10021
1.04M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10022
1.04M
   "Premature end of data in tag %s line %d\n",
10023
1.04M
                    name, line, NULL);
10024
10025
  /*
10026
   * end of parsing of this node.
10027
   */
10028
1.04M
  nodePop(ctxt);
10029
1.04M
  namePop(ctxt);
10030
1.04M
  spacePop(ctxt);
10031
1.04M
  if (nsNr != ctxt->nsNr)
10032
75.1k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10033
1.04M
  return;
10034
1.04M
    }
10035
10036
    /*
10037
     * parse the end of tag: '</' should be here.
10038
     */
10039
125k
    if (ctxt->sax2) {
10040
125k
  xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10041
125k
  namePop(ctxt);
10042
125k
    }
10043
0
#ifdef LIBXML_SAX1_ENABLED
10044
0
      else
10045
0
  xmlParseEndTag1(ctxt, line);
10046
125k
#endif /* LIBXML_SAX1_ENABLED */
10047
10048
    /*
10049
     * Capture end position and add node
10050
     */
10051
125k
    if ( ret != NULL && ctxt->record_info ) {
10052
0
       node_info.end_pos = ctxt->input->consumed +
10053
0
                          (CUR_PTR - ctxt->input->base);
10054
0
       node_info.end_line = ctxt->input->line;
10055
0
       node_info.node = ret;
10056
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10057
0
    }
10058
125k
}
10059
10060
/**
10061
 * xmlParseVersionNum:
10062
 * @ctxt:  an XML parser context
10063
 *
10064
 * parse the XML version value.
10065
 *
10066
 * [26] VersionNum ::= '1.' [0-9]+
10067
 *
10068
 * In practice allow [0-9].[0-9]+ at that level
10069
 *
10070
 * Returns the string giving the XML version number, or NULL
10071
 */
10072
xmlChar *
10073
51.0k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10074
51.0k
    xmlChar *buf = NULL;
10075
51.0k
    int len = 0;
10076
51.0k
    int size = 10;
10077
51.0k
    xmlChar cur;
10078
10079
51.0k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10080
51.0k
    if (buf == NULL) {
10081
0
  xmlErrMemory(ctxt, NULL);
10082
0
  return(NULL);
10083
0
    }
10084
51.0k
    cur = CUR;
10085
51.0k
    if (!((cur >= '0') && (cur <= '9'))) {
10086
258
  xmlFree(buf);
10087
258
  return(NULL);
10088
258
    }
10089
50.7k
    buf[len++] = cur;
10090
50.7k
    NEXT;
10091
50.7k
    cur=CUR;
10092
50.7k
    if (cur != '.') {
10093
140
  xmlFree(buf);
10094
140
  return(NULL);
10095
140
    }
10096
50.6k
    buf[len++] = cur;
10097
50.6k
    NEXT;
10098
50.6k
    cur=CUR;
10099
1.71M
    while ((cur >= '0') && (cur <= '9')) {
10100
1.66M
  if (len + 1 >= size) {
10101
2.10k
      xmlChar *tmp;
10102
10103
2.10k
      size *= 2;
10104
2.10k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10105
2.10k
      if (tmp == NULL) {
10106
0
          xmlFree(buf);
10107
0
    xmlErrMemory(ctxt, NULL);
10108
0
    return(NULL);
10109
0
      }
10110
2.10k
      buf = tmp;
10111
2.10k
  }
10112
1.66M
  buf[len++] = cur;
10113
1.66M
  NEXT;
10114
1.66M
  cur=CUR;
10115
1.66M
    }
10116
50.6k
    buf[len] = 0;
10117
50.6k
    return(buf);
10118
50.6k
}
10119
10120
/**
10121
 * xmlParseVersionInfo:
10122
 * @ctxt:  an XML parser context
10123
 *
10124
 * parse the XML version.
10125
 *
10126
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10127
 *
10128
 * [25] Eq ::= S? '=' S?
10129
 *
10130
 * Returns the version string, e.g. "1.0"
10131
 */
10132
10133
xmlChar *
10134
69.9k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10135
69.9k
    xmlChar *version = NULL;
10136
10137
69.9k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10138
51.9k
  SKIP(7);
10139
51.9k
  SKIP_BLANKS;
10140
51.9k
  if (RAW != '=') {
10141
563
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10142
563
      return(NULL);
10143
563
        }
10144
51.3k
  NEXT;
10145
51.3k
  SKIP_BLANKS;
10146
51.3k
  if (RAW == '"') {
10147
40.2k
      NEXT;
10148
40.2k
      version = xmlParseVersionNum(ctxt);
10149
40.2k
      if (RAW != '"') {
10150
513
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10151
513
      } else
10152
39.7k
          NEXT;
10153
40.2k
  } else if (RAW == '\''){
10154
10.8k
      NEXT;
10155
10.8k
      version = xmlParseVersionNum(ctxt);
10156
10.8k
      if (RAW != '\'') {
10157
100
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10158
100
      } else
10159
10.7k
          NEXT;
10160
10.8k
  } else {
10161
358
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10162
358
  }
10163
51.3k
    }
10164
69.3k
    return(version);
10165
69.9k
}
10166
10167
/**
10168
 * xmlParseEncName:
10169
 * @ctxt:  an XML parser context
10170
 *
10171
 * parse the XML encoding name
10172
 *
10173
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10174
 *
10175
 * Returns the encoding name value or NULL
10176
 */
10177
xmlChar *
10178
53.1k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10179
53.1k
    xmlChar *buf = NULL;
10180
53.1k
    int len = 0;
10181
53.1k
    int size = 10;
10182
53.1k
    xmlChar cur;
10183
10184
53.1k
    cur = CUR;
10185
53.1k
    if (((cur >= 'a') && (cur <= 'z')) ||
10186
52.9k
        ((cur >= 'A') && (cur <= 'Z'))) {
10187
52.9k
  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10188
52.9k
  if (buf == NULL) {
10189
0
      xmlErrMemory(ctxt, NULL);
10190
0
      return(NULL);
10191
0
  }
10192
10193
52.9k
  buf[len++] = cur;
10194
52.9k
  NEXT;
10195
52.9k
  cur = CUR;
10196
8.80M
  while (((cur >= 'a') && (cur <= 'z')) ||
10197
6.86M
         ((cur >= 'A') && (cur <= 'Z')) ||
10198
4.52M
         ((cur >= '0') && (cur <= '9')) ||
10199
88.3k
         (cur == '.') || (cur == '_') ||
10200
8.75M
         (cur == '-')) {
10201
8.75M
      if (len + 1 >= size) {
10202
3.59k
          xmlChar *tmp;
10203
10204
3.59k
    size *= 2;
10205
3.59k
    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10206
3.59k
    if (tmp == NULL) {
10207
0
        xmlErrMemory(ctxt, NULL);
10208
0
        xmlFree(buf);
10209
0
        return(NULL);
10210
0
    }
10211
3.59k
    buf = tmp;
10212
3.59k
      }
10213
8.75M
      buf[len++] = cur;
10214
8.75M
      NEXT;
10215
8.75M
      cur = CUR;
10216
8.75M
      if (cur == 0) {
10217
241
          SHRINK;
10218
241
    GROW;
10219
241
    cur = CUR;
10220
241
      }
10221
8.75M
        }
10222
52.9k
  buf[len] = 0;
10223
52.9k
    } else {
10224
177
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10225
177
    }
10226
53.1k
    return(buf);
10227
53.1k
}
10228
10229
/**
10230
 * xmlParseEncodingDecl:
10231
 * @ctxt:  an XML parser context
10232
 *
10233
 * parse the XML encoding declaration
10234
 *
10235
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10236
 *
10237
 * this setups the conversion filters.
10238
 *
10239
 * Returns the encoding value or NULL
10240
 */
10241
10242
const xmlChar *
10243
67.2k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10244
67.2k
    xmlChar *encoding = NULL;
10245
10246
67.2k
    SKIP_BLANKS;
10247
67.2k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10248
53.3k
  SKIP(8);
10249
53.3k
  SKIP_BLANKS;
10250
53.3k
  if (RAW != '=') {
10251
69
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10252
69
      return(NULL);
10253
69
        }
10254
53.2k
  NEXT;
10255
53.2k
  SKIP_BLANKS;
10256
53.2k
  if (RAW == '"') {
10257
42.0k
      NEXT;
10258
42.0k
      encoding = xmlParseEncName(ctxt);
10259
42.0k
      if (RAW != '"') {
10260
408
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10261
408
    xmlFree((xmlChar *) encoding);
10262
408
    return(NULL);
10263
408
      } else
10264
41.6k
          NEXT;
10265
42.0k
  } else if (RAW == '\''){
10266
11.1k
      NEXT;
10267
11.1k
      encoding = xmlParseEncName(ctxt);
10268
11.1k
      if (RAW != '\'') {
10269
213
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10270
213
    xmlFree((xmlChar *) encoding);
10271
213
    return(NULL);
10272
213
      } else
10273
10.8k
          NEXT;
10274
11.1k
  } else {
10275
81
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10276
81
  }
10277
10278
        /*
10279
         * Non standard parsing, allowing the user to ignore encoding
10280
         */
10281
52.6k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10282
0
      xmlFree((xmlChar *) encoding);
10283
0
            return(NULL);
10284
0
  }
10285
10286
  /*
10287
   * UTF-16 encoding stwich has already taken place at this stage,
10288
   * more over the little-endian/big-endian selection is already done
10289
   */
10290
52.6k
        if ((encoding != NULL) &&
10291
52.5k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10292
52.5k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10293
      /*
10294
       * If no encoding was passed to the parser, that we are
10295
       * using UTF-16 and no decoder is present i.e. the
10296
       * document is apparently UTF-8 compatible, then raise an
10297
       * encoding mismatch fatal error
10298
       */
10299
20
      if ((ctxt->encoding == NULL) &&
10300
20
          (ctxt->input->buf != NULL) &&
10301
20
          (ctxt->input->buf->encoder == NULL)) {
10302
14
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10303
14
      "Document labelled UTF-16 but has UTF-8 content\n");
10304
14
      }
10305
20
      if (ctxt->encoding != NULL)
10306
0
    xmlFree((xmlChar *) ctxt->encoding);
10307
20
      ctxt->encoding = encoding;
10308
20
  }
10309
  /*
10310
   * UTF-8 encoding is handled natively
10311
   */
10312
52.6k
        else if ((encoding != NULL) &&
10313
52.5k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10314
29.1k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10315
23.5k
      if (ctxt->encoding != NULL)
10316
0
    xmlFree((xmlChar *) ctxt->encoding);
10317
23.5k
      ctxt->encoding = encoding;
10318
23.5k
  }
10319
29.0k
  else if (encoding != NULL) {
10320
28.9k
      xmlCharEncodingHandlerPtr handler;
10321
10322
28.9k
      if (ctxt->input->encoding != NULL)
10323
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10324
28.9k
      ctxt->input->encoding = encoding;
10325
10326
28.9k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10327
28.9k
      if (handler != NULL) {
10328
26.6k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10329
        /* failed to convert */
10330
57
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10331
57
        return(NULL);
10332
57
    }
10333
26.6k
      } else {
10334
2.25k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10335
2.25k
      "Unsupported encoding %s\n", encoding);
10336
2.25k
    return(NULL);
10337
2.25k
      }
10338
28.9k
  }
10339
52.6k
    }
10340
64.2k
    return(encoding);
10341
67.2k
}
10342
10343
/**
10344
 * xmlParseSDDecl:
10345
 * @ctxt:  an XML parser context
10346
 *
10347
 * parse the XML standalone declaration
10348
 *
10349
 * [32] SDDecl ::= S 'standalone' Eq
10350
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10351
 *
10352
 * [ VC: Standalone Document Declaration ]
10353
 * TODO The standalone document declaration must have the value "no"
10354
 * if any external markup declarations contain declarations of:
10355
 *  - attributes with default values, if elements to which these
10356
 *    attributes apply appear in the document without specifications
10357
 *    of values for these attributes, or
10358
 *  - entities (other than amp, lt, gt, apos, quot), if references
10359
 *    to those entities appear in the document, or
10360
 *  - attributes with values subject to normalization, where the
10361
 *    attribute appears in the document with a value which will change
10362
 *    as a result of normalization, or
10363
 *  - element types with element content, if white space occurs directly
10364
 *    within any instance of those types.
10365
 *
10366
 * Returns:
10367
 *   1 if standalone="yes"
10368
 *   0 if standalone="no"
10369
 *  -2 if standalone attribute is missing or invalid
10370
 *    (A standalone value of -2 means that the XML declaration was found,
10371
 *     but no value was specified for the standalone attribute).
10372
 */
10373
10374
int
10375
42.6k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10376
42.6k
    int standalone = -2;
10377
10378
42.6k
    SKIP_BLANKS;
10379
42.6k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10380
6.32k
  SKIP(10);
10381
6.32k
        SKIP_BLANKS;
10382
6.32k
  if (RAW != '=') {
10383
40
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10384
40
      return(standalone);
10385
40
        }
10386
6.28k
  NEXT;
10387
6.28k
  SKIP_BLANKS;
10388
6.28k
        if (RAW == '\''){
10389
280
      NEXT;
10390
280
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10391
169
          standalone = 0;
10392
169
                SKIP(2);
10393
169
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10394
72
                 (NXT(2) == 's')) {
10395
64
          standalone = 1;
10396
64
    SKIP(3);
10397
64
            } else {
10398
47
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10399
47
      }
10400
280
      if (RAW != '\'') {
10401
115
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10402
115
      } else
10403
165
          NEXT;
10404
6.00k
  } else if (RAW == '"'){
10405
5.99k
      NEXT;
10406
5.99k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10407
19
          standalone = 0;
10408
19
    SKIP(2);
10409
5.97k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10410
5.92k
                 (NXT(2) == 's')) {
10411
5.90k
          standalone = 1;
10412
5.90k
                SKIP(3);
10413
5.90k
            } else {
10414
71
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10415
71
      }
10416
5.99k
      if (RAW != '"') {
10417
132
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10418
132
      } else
10419
5.86k
          NEXT;
10420
5.99k
  } else {
10421
13
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10422
13
        }
10423
6.28k
    }
10424
42.6k
    return(standalone);
10425
42.6k
}
10426
10427
/**
10428
 * xmlParseXMLDecl:
10429
 * @ctxt:  an XML parser context
10430
 *
10431
 * parse an XML declaration header
10432
 *
10433
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10434
 */
10435
10436
void
10437
69.9k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10438
69.9k
    xmlChar *version;
10439
10440
    /*
10441
     * This value for standalone indicates that the document has an
10442
     * XML declaration but it does not have a standalone attribute.
10443
     * It will be overwritten later if a standalone attribute is found.
10444
     */
10445
69.9k
    ctxt->input->standalone = -2;
10446
10447
    /*
10448
     * We know that '<?xml' is here.
10449
     */
10450
69.9k
    SKIP(5);
10451
10452
69.9k
    if (!IS_BLANK_CH(RAW)) {
10453
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10454
0
                 "Blank needed after '<?xml'\n");
10455
0
    }
10456
69.9k
    SKIP_BLANKS;
10457
10458
    /*
10459
     * We must have the VersionInfo here.
10460
     */
10461
69.9k
    version = xmlParseVersionInfo(ctxt);
10462
69.9k
    if (version == NULL) {
10463
19.3k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10464
50.6k
    } else {
10465
50.6k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10466
      /*
10467
       * Changed here for XML-1.0 5th edition
10468
       */
10469
12.3k
      if (ctxt->options & XML_PARSE_OLD10) {
10470
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10471
0
                "Unsupported version '%s'\n",
10472
0
                version);
10473
12.3k
      } else {
10474
12.3k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10475
11.5k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10476
11.5k
                      "Unsupported version '%s'\n",
10477
11.5k
          version, NULL);
10478
11.5k
    } else {
10479
741
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10480
741
              "Unsupported version '%s'\n",
10481
741
              version);
10482
741
    }
10483
12.3k
      }
10484
12.3k
  }
10485
50.6k
  if (ctxt->version != NULL)
10486
0
      xmlFree((void *) ctxt->version);
10487
50.6k
  ctxt->version = version;
10488
50.6k
    }
10489
10490
    /*
10491
     * We may have the encoding declaration
10492
     */
10493
69.9k
    if (!IS_BLANK_CH(RAW)) {
10494
21.9k
        if ((RAW == '?') && (NXT(1) == '>')) {
10495
2.65k
      SKIP(2);
10496
2.65k
      return;
10497
2.65k
  }
10498
19.3k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10499
19.3k
    }
10500
67.2k
    xmlParseEncodingDecl(ctxt);
10501
67.2k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10502
64.9k
         (ctxt->instate == XML_PARSER_EOF)) {
10503
  /*
10504
   * The XML REC instructs us to stop parsing right here
10505
   */
10506
2.31k
        return;
10507
2.31k
    }
10508
10509
    /*
10510
     * We may have the standalone status.
10511
     */
10512
64.9k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10513
26.1k
        if ((RAW == '?') && (NXT(1) == '>')) {
10514
22.3k
      SKIP(2);
10515
22.3k
      return;
10516
22.3k
  }
10517
3.88k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10518
3.88k
    }
10519
10520
    /*
10521
     * We can grow the input buffer freely at that point
10522
     */
10523
42.6k
    GROW;
10524
10525
42.6k
    SKIP_BLANKS;
10526
42.6k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10527
10528
42.6k
    SKIP_BLANKS;
10529
42.6k
    if ((RAW == '?') && (NXT(1) == '>')) {
10530
23.9k
        SKIP(2);
10531
23.9k
    } else if (RAW == '>') {
10532
        /* Deprecated old WD ... */
10533
5.96k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10534
5.96k
  NEXT;
10535
12.7k
    } else {
10536
12.7k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10537
12.7k
  MOVETO_ENDTAG(CUR_PTR);
10538
12.7k
  NEXT;
10539
12.7k
    }
10540
42.6k
}
10541
10542
/**
10543
 * xmlParseMisc:
10544
 * @ctxt:  an XML parser context
10545
 *
10546
 * parse an XML Misc* optional field.
10547
 *
10548
 * [27] Misc ::= Comment | PI |  S
10549
 */
10550
10551
void
10552
0
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10553
0
    while ((ctxt->instate != XML_PARSER_EOF) &&
10554
0
           (((RAW == '<') && (NXT(1) == '?')) ||
10555
0
            (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10556
0
            IS_BLANK_CH(CUR))) {
10557
0
        if ((RAW == '<') && (NXT(1) == '?')) {
10558
0
      xmlParsePI(ctxt);
10559
0
  } else if (IS_BLANK_CH(CUR)) {
10560
0
      NEXT;
10561
0
  } else
10562
0
      xmlParseComment(ctxt);
10563
0
    }
10564
0
}
10565
10566
/**
10567
 * xmlParseDocument:
10568
 * @ctxt:  an XML parser context
10569
 *
10570
 * parse an XML document (and build a tree if using the standard SAX
10571
 * interface).
10572
 *
10573
 * [1] document ::= prolog element Misc*
10574
 *
10575
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10576
 *
10577
 * Returns 0, -1 in case of error. the parser context is augmented
10578
 *                as a result of the parsing.
10579
 */
10580
10581
int
10582
0
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10583
0
    xmlChar start[4];
10584
0
    xmlCharEncoding enc;
10585
10586
0
    xmlInitParser();
10587
10588
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10589
0
        return(-1);
10590
10591
0
    GROW;
10592
10593
    /*
10594
     * SAX: detecting the level.
10595
     */
10596
0
    xmlDetectSAX2(ctxt);
10597
10598
    /*
10599
     * SAX: beginning of the document processing.
10600
     */
10601
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10602
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10603
0
    if (ctxt->instate == XML_PARSER_EOF)
10604
0
  return(-1);
10605
10606
0
    if ((ctxt->encoding == NULL) &&
10607
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10608
  /*
10609
   * Get the 4 first bytes and decode the charset
10610
   * if enc != XML_CHAR_ENCODING_NONE
10611
   * plug some encoding conversion routines.
10612
   */
10613
0
  start[0] = RAW;
10614
0
  start[1] = NXT(1);
10615
0
  start[2] = NXT(2);
10616
0
  start[3] = NXT(3);
10617
0
  enc = xmlDetectCharEncoding(&start[0], 4);
10618
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10619
0
      xmlSwitchEncoding(ctxt, enc);
10620
0
  }
10621
0
    }
10622
10623
10624
0
    if (CUR == 0) {
10625
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10626
0
  return(-1);
10627
0
    }
10628
10629
    /*
10630
     * Check for the XMLDecl in the Prolog.
10631
     * do not GROW here to avoid the detected encoder to decode more
10632
     * than just the first line, unless the amount of data is really
10633
     * too small to hold "<?xml version="1.0" encoding="foo"
10634
     */
10635
0
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10636
0
       GROW;
10637
0
    }
10638
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10639
10640
  /*
10641
   * Note that we will switch encoding on the fly.
10642
   */
10643
0
  xmlParseXMLDecl(ctxt);
10644
0
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10645
0
      (ctxt->instate == XML_PARSER_EOF)) {
10646
      /*
10647
       * The XML REC instructs us to stop parsing right here
10648
       */
10649
0
      return(-1);
10650
0
  }
10651
0
  ctxt->standalone = ctxt->input->standalone;
10652
0
  SKIP_BLANKS;
10653
0
    } else {
10654
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10655
0
    }
10656
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10657
0
        ctxt->sax->startDocument(ctxt->userData);
10658
0
    if (ctxt->instate == XML_PARSER_EOF)
10659
0
  return(-1);
10660
0
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10661
0
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10662
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10663
0
    }
10664
10665
    /*
10666
     * The Misc part of the Prolog
10667
     */
10668
0
    GROW;
10669
0
    xmlParseMisc(ctxt);
10670
10671
    /*
10672
     * Then possibly doc type declaration(s) and more Misc
10673
     * (doctypedecl Misc*)?
10674
     */
10675
0
    GROW;
10676
0
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10677
10678
0
  ctxt->inSubset = 1;
10679
0
  xmlParseDocTypeDecl(ctxt);
10680
0
  if (RAW == '[') {
10681
0
      ctxt->instate = XML_PARSER_DTD;
10682
0
      xmlParseInternalSubset(ctxt);
10683
0
      if (ctxt->instate == XML_PARSER_EOF)
10684
0
    return(-1);
10685
0
  }
10686
10687
  /*
10688
   * Create and update the external subset.
10689
   */
10690
0
  ctxt->inSubset = 2;
10691
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10692
0
      (!ctxt->disableSAX))
10693
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10694
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10695
0
  if (ctxt->instate == XML_PARSER_EOF)
10696
0
      return(-1);
10697
0
  ctxt->inSubset = 0;
10698
10699
0
        xmlCleanSpecialAttr(ctxt);
10700
10701
0
  ctxt->instate = XML_PARSER_PROLOG;
10702
0
  xmlParseMisc(ctxt);
10703
0
    }
10704
10705
    /*
10706
     * Time to start parsing the tree itself
10707
     */
10708
0
    GROW;
10709
0
    if (RAW != '<') {
10710
0
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10711
0
           "Start tag expected, '<' not found\n");
10712
0
    } else {
10713
0
  ctxt->instate = XML_PARSER_CONTENT;
10714
0
  xmlParseElement(ctxt);
10715
0
  ctxt->instate = XML_PARSER_EPILOG;
10716
10717
10718
  /*
10719
   * The Misc part at the end
10720
   */
10721
0
  xmlParseMisc(ctxt);
10722
10723
0
  if (RAW != 0) {
10724
0
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10725
0
  }
10726
0
  ctxt->instate = XML_PARSER_EOF;
10727
0
    }
10728
10729
    /*
10730
     * SAX: end of the document processing.
10731
     */
10732
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10733
0
        ctxt->sax->endDocument(ctxt->userData);
10734
10735
    /*
10736
     * Remove locally kept entity definitions if the tree was not built
10737
     */
10738
0
    if ((ctxt->myDoc != NULL) &&
10739
0
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10740
0
  xmlFreeDoc(ctxt->myDoc);
10741
0
  ctxt->myDoc = NULL;
10742
0
    }
10743
10744
0
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10745
0
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10746
0
  if (ctxt->valid)
10747
0
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10748
0
  if (ctxt->nsWellFormed)
10749
0
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10750
0
  if (ctxt->options & XML_PARSE_OLD10)
10751
0
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10752
0
    }
10753
0
    if (! ctxt->wellFormed) {
10754
0
  ctxt->valid = 0;
10755
0
  return(-1);
10756
0
    }
10757
0
    return(0);
10758
0
}
10759
10760
/**
10761
 * xmlParseExtParsedEnt:
10762
 * @ctxt:  an XML parser context
10763
 *
10764
 * parse a general parsed entity
10765
 * An external general parsed entity is well-formed if it matches the
10766
 * production labeled extParsedEnt.
10767
 *
10768
 * [78] extParsedEnt ::= TextDecl? content
10769
 *
10770
 * Returns 0, -1 in case of error. the parser context is augmented
10771
 *                as a result of the parsing.
10772
 */
10773
10774
int
10775
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10776
0
    xmlChar start[4];
10777
0
    xmlCharEncoding enc;
10778
10779
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10780
0
        return(-1);
10781
10782
0
    xmlDefaultSAXHandlerInit();
10783
10784
0
    xmlDetectSAX2(ctxt);
10785
10786
0
    GROW;
10787
10788
    /*
10789
     * SAX: beginning of the document processing.
10790
     */
10791
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10792
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10793
10794
    /*
10795
     * Get the 4 first bytes and decode the charset
10796
     * if enc != XML_CHAR_ENCODING_NONE
10797
     * plug some encoding conversion routines.
10798
     */
10799
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10800
0
  start[0] = RAW;
10801
0
  start[1] = NXT(1);
10802
0
  start[2] = NXT(2);
10803
0
  start[3] = NXT(3);
10804
0
  enc = xmlDetectCharEncoding(start, 4);
10805
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10806
0
      xmlSwitchEncoding(ctxt, enc);
10807
0
  }
10808
0
    }
10809
10810
10811
0
    if (CUR == 0) {
10812
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10813
0
    }
10814
10815
    /*
10816
     * Check for the XMLDecl in the Prolog.
10817
     */
10818
0
    GROW;
10819
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10820
10821
  /*
10822
   * Note that we will switch encoding on the fly.
10823
   */
10824
0
  xmlParseXMLDecl(ctxt);
10825
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10826
      /*
10827
       * The XML REC instructs us to stop parsing right here
10828
       */
10829
0
      return(-1);
10830
0
  }
10831
0
  SKIP_BLANKS;
10832
0
    } else {
10833
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10834
0
    }
10835
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10836
0
        ctxt->sax->startDocument(ctxt->userData);
10837
0
    if (ctxt->instate == XML_PARSER_EOF)
10838
0
  return(-1);
10839
10840
    /*
10841
     * Doing validity checking on chunk doesn't make sense
10842
     */
10843
0
    ctxt->instate = XML_PARSER_CONTENT;
10844
0
    ctxt->validate = 0;
10845
0
    ctxt->loadsubset = 0;
10846
0
    ctxt->depth = 0;
10847
10848
0
    xmlParseContent(ctxt);
10849
0
    if (ctxt->instate == XML_PARSER_EOF)
10850
0
  return(-1);
10851
10852
0
    if ((RAW == '<') && (NXT(1) == '/')) {
10853
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10854
0
    } else if (RAW != 0) {
10855
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10856
0
    }
10857
10858
    /*
10859
     * SAX: end of the document processing.
10860
     */
10861
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10862
0
        ctxt->sax->endDocument(ctxt->userData);
10863
10864
0
    if (! ctxt->wellFormed) return(-1);
10865
0
    return(0);
10866
0
}
10867
10868
#ifdef LIBXML_PUSH_ENABLED
10869
/************************************************************************
10870
 *                  *
10871
 *    Progressive parsing interfaces        *
10872
 *                  *
10873
 ************************************************************************/
10874
10875
/**
10876
 * xmlParseLookupSequence:
10877
 * @ctxt:  an XML parser context
10878
 * @first:  the first char to lookup
10879
 * @next:  the next char to lookup or zero
10880
 * @third:  the next char to lookup or zero
10881
 *
10882
 * Try to find if a sequence (first, next, third) or  just (first next) or
10883
 * (first) is available in the input stream.
10884
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10885
 * to avoid rescanning sequences of bytes, it DOES change the state of the
10886
 * parser, do not use liberally.
10887
 *
10888
 * Returns the index to the current parsing point if the full sequence
10889
 *      is available, -1 otherwise.
10890
 */
10891
static int
10892
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10893
7.80M
                       xmlChar next, xmlChar third) {
10894
7.80M
    int base, len;
10895
7.80M
    xmlParserInputPtr in;
10896
7.80M
    const xmlChar *buf;
10897
10898
7.80M
    in = ctxt->input;
10899
7.80M
    if (in == NULL) return(-1);
10900
7.80M
    base = in->cur - in->base;
10901
7.80M
    if (base < 0) return(-1);
10902
7.80M
    if (ctxt->checkIndex > base)
10903
6.55M
        base = ctxt->checkIndex;
10904
7.80M
    if (in->buf == NULL) {
10905
0
  buf = in->base;
10906
0
  len = in->length;
10907
7.80M
    } else {
10908
7.80M
  buf = xmlBufContent(in->buf->buffer);
10909
7.80M
  len = xmlBufUse(in->buf->buffer);
10910
7.80M
    }
10911
    /* take into account the sequence length */
10912
7.80M
    if (third) len -= 2;
10913
6.97M
    else if (next) len --;
10914
6.29G
    for (;base < len;base++) {
10915
6.28G
        if (buf[base] == first) {
10916
18.7M
      if (third != 0) {
10917
16.3M
    if ((buf[base + 1] != next) ||
10918
15.6M
        (buf[base + 2] != third)) continue;
10919
16.3M
      } else if (next != 0) {
10920
2.24M
    if (buf[base + 1] != next) continue;
10921
2.24M
      }
10922
1.11M
      ctxt->checkIndex = 0;
10923
#ifdef DEBUG_PUSH
10924
      if (next == 0)
10925
    xmlGenericError(xmlGenericErrorContext,
10926
      "PP: lookup '%c' found at %d\n",
10927
      first, base);
10928
      else if (third == 0)
10929
    xmlGenericError(xmlGenericErrorContext,
10930
      "PP: lookup '%c%c' found at %d\n",
10931
      first, next, base);
10932
      else
10933
    xmlGenericError(xmlGenericErrorContext,
10934
      "PP: lookup '%c%c%c' found at %d\n",
10935
      first, next, third, base);
10936
#endif
10937
1.11M
      return(base - (in->cur - in->base));
10938
18.7M
  }
10939
6.28G
    }
10940
6.69M
    ctxt->checkIndex = base;
10941
#ifdef DEBUG_PUSH
10942
    if (next == 0)
10943
  xmlGenericError(xmlGenericErrorContext,
10944
    "PP: lookup '%c' failed\n", first);
10945
    else if (third == 0)
10946
  xmlGenericError(xmlGenericErrorContext,
10947
    "PP: lookup '%c%c' failed\n", first, next);
10948
    else
10949
  xmlGenericError(xmlGenericErrorContext,
10950
    "PP: lookup '%c%c%c' failed\n", first, next, third);
10951
#endif
10952
6.69M
    return(-1);
10953
7.80M
}
10954
10955
/**
10956
 * xmlParseGetLasts:
10957
 * @ctxt:  an XML parser context
10958
 * @lastlt:  pointer to store the last '<' from the input
10959
 * @lastgt:  pointer to store the last '>' from the input
10960
 *
10961
 * Lookup the last < and > in the current chunk
10962
 */
10963
static void
10964
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10965
7.92M
                 const xmlChar **lastgt) {
10966
7.92M
    const xmlChar *tmp;
10967
10968
7.92M
    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10969
0
  xmlGenericError(xmlGenericErrorContext,
10970
0
        "Internal error: xmlParseGetLasts\n");
10971
0
  return;
10972
0
    }
10973
7.92M
    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10974
1.63M
        tmp = ctxt->input->end;
10975
1.63M
  tmp--;
10976
7.52G
  while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10977
1.63M
  if (tmp < ctxt->input->base) {
10978
76.0k
      *lastlt = NULL;
10979
76.0k
      *lastgt = NULL;
10980
1.55M
  } else {
10981
1.55M
      *lastlt = tmp;
10982
1.55M
      tmp++;
10983
1.66G
      while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10984
1.65G
          if (*tmp == '\'') {
10985
1.11M
        tmp++;
10986
303M
        while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10987
1.11M
        if (tmp < ctxt->input->end) tmp++;
10988
1.65G
    } else if (*tmp == '"') {
10989
1.71M
        tmp++;
10990
4.40G
        while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10991
1.71M
        if (tmp < ctxt->input->end) tmp++;
10992
1.71M
    } else
10993
1.65G
        tmp++;
10994
1.65G
      }
10995
1.55M
      if (tmp < ctxt->input->end)
10996
580k
          *lastgt = tmp;
10997
976k
      else {
10998
976k
          tmp = *lastlt;
10999
976k
    tmp--;
11000
192M
    while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11001
976k
    if (tmp >= ctxt->input->base)
11002
920k
        *lastgt = tmp;
11003
56.0k
    else
11004
56.0k
        *lastgt = NULL;
11005
976k
      }
11006
1.55M
  }
11007
6.29M
    } else {
11008
6.29M
        *lastlt = NULL;
11009
6.29M
  *lastgt = NULL;
11010
6.29M
    }
11011
7.92M
}
11012
/**
11013
 * xmlCheckCdataPush:
11014
 * @cur: pointer to the block of characters
11015
 * @len: length of the block in bytes
11016
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11017
 *
11018
 * Check that the block of characters is okay as SCdata content [20]
11019
 *
11020
 * Returns the number of bytes to pass if okay, a negative index where an
11021
 *         UTF-8 error occurred otherwise
11022
 */
11023
static int
11024
632k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11025
632k
    int ix;
11026
632k
    unsigned char c;
11027
632k
    int codepoint;
11028
11029
632k
    if ((utf == NULL) || (len <= 0))
11030
356k
        return(0);
11031
11032
111M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11033
111M
        c = utf[ix];
11034
111M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11035
39.1M
      if (c >= 0x20)
11036
38.0M
    ix++;
11037
1.12M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11038
1.11M
          ix++;
11039
2.11k
      else
11040
2.11k
          return(-ix);
11041
71.8M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11042
338k
      if (ix + 2 > len) return(complete ? -ix : ix);
11043
337k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11044
1.33k
          return(-ix);
11045
336k
      codepoint = (utf[ix] & 0x1f) << 6;
11046
336k
      codepoint |= utf[ix+1] & 0x3f;
11047
336k
      if (!xmlIsCharQ(codepoint))
11048
751
          return(-ix);
11049
335k
      ix += 2;
11050
71.5M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11051
71.5M
      if (ix + 3 > len) return(complete ? -ix : ix);
11052
71.4M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11053
71.4M
          ((utf[ix+2] & 0xc0) != 0x80))
11054
1.49k
        return(-ix);
11055
71.4M
      codepoint = (utf[ix] & 0xf) << 12;
11056
71.4M
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11057
71.4M
      codepoint |= utf[ix+2] & 0x3f;
11058
71.4M
      if (!xmlIsCharQ(codepoint))
11059
1.61k
          return(-ix);
11060
71.4M
      ix += 3;
11061
71.4M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11062
37.1k
      if (ix + 4 > len) return(complete ? -ix : ix);
11063
36.9k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11064
35.9k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11065
34.6k
    ((utf[ix+3] & 0xc0) != 0x80))
11066
3.92k
        return(-ix);
11067
33.0k
      codepoint = (utf[ix] & 0x7) << 18;
11068
33.0k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11069
33.0k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11070
33.0k
      codepoint |= utf[ix+3] & 0x3f;
11071
33.0k
      if (!xmlIsCharQ(codepoint))
11072
1.69k
          return(-ix);
11073
31.3k
      ix += 4;
11074
31.3k
  } else       /* unknown encoding */
11075
2.18k
      return(-ix);
11076
111M
      }
11077
257k
      return(ix);
11078
276k
}
11079
11080
/**
11081
 * xmlParseTryOrFinish:
11082
 * @ctxt:  an XML parser context
11083
 * @terminate:  last chunk indicator
11084
 *
11085
 * Try to progress on parsing
11086
 *
11087
 * Returns zero if no parsing was possible
11088
 */
11089
static int
11090
7.73M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11091
7.73M
    int ret = 0;
11092
7.73M
    int avail, tlen;
11093
7.73M
    xmlChar cur, next;
11094
7.73M
    const xmlChar *lastlt, *lastgt;
11095
11096
7.73M
    if (ctxt->input == NULL)
11097
0
        return(0);
11098
11099
#ifdef DEBUG_PUSH
11100
    switch (ctxt->instate) {
11101
  case XML_PARSER_EOF:
11102
      xmlGenericError(xmlGenericErrorContext,
11103
        "PP: try EOF\n"); break;
11104
  case XML_PARSER_START:
11105
      xmlGenericError(xmlGenericErrorContext,
11106
        "PP: try START\n"); break;
11107
  case XML_PARSER_MISC:
11108
      xmlGenericError(xmlGenericErrorContext,
11109
        "PP: try MISC\n");break;
11110
  case XML_PARSER_COMMENT:
11111
      xmlGenericError(xmlGenericErrorContext,
11112
        "PP: try COMMENT\n");break;
11113
  case XML_PARSER_PROLOG:
11114
      xmlGenericError(xmlGenericErrorContext,
11115
        "PP: try PROLOG\n");break;
11116
  case XML_PARSER_START_TAG:
11117
      xmlGenericError(xmlGenericErrorContext,
11118
        "PP: try START_TAG\n");break;
11119
  case XML_PARSER_CONTENT:
11120
      xmlGenericError(xmlGenericErrorContext,
11121
        "PP: try CONTENT\n");break;
11122
  case XML_PARSER_CDATA_SECTION:
11123
      xmlGenericError(xmlGenericErrorContext,
11124
        "PP: try CDATA_SECTION\n");break;
11125
  case XML_PARSER_END_TAG:
11126
      xmlGenericError(xmlGenericErrorContext,
11127
        "PP: try END_TAG\n");break;
11128
  case XML_PARSER_ENTITY_DECL:
11129
      xmlGenericError(xmlGenericErrorContext,
11130
        "PP: try ENTITY_DECL\n");break;
11131
  case XML_PARSER_ENTITY_VALUE:
11132
      xmlGenericError(xmlGenericErrorContext,
11133
        "PP: try ENTITY_VALUE\n");break;
11134
  case XML_PARSER_ATTRIBUTE_VALUE:
11135
      xmlGenericError(xmlGenericErrorContext,
11136
        "PP: try ATTRIBUTE_VALUE\n");break;
11137
  case XML_PARSER_DTD:
11138
      xmlGenericError(xmlGenericErrorContext,
11139
        "PP: try DTD\n");break;
11140
  case XML_PARSER_EPILOG:
11141
      xmlGenericError(xmlGenericErrorContext,
11142
        "PP: try EPILOG\n");break;
11143
  case XML_PARSER_PI:
11144
      xmlGenericError(xmlGenericErrorContext,
11145
        "PP: try PI\n");break;
11146
        case XML_PARSER_IGNORE:
11147
            xmlGenericError(xmlGenericErrorContext,
11148
        "PP: try IGNORE\n");break;
11149
    }
11150
#endif
11151
11152
7.73M
    if ((ctxt->input != NULL) &&
11153
7.73M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11154
122k
  xmlSHRINK(ctxt);
11155
122k
  ctxt->checkIndex = 0;
11156
122k
    }
11157
7.73M
    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11158
11159
86.0M
    while (ctxt->instate != XML_PARSER_EOF) {
11160
86.0M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11161
2.69k
      return(0);
11162
11163
86.0M
  if (ctxt->input == NULL) break;
11164
86.0M
  if (ctxt->input->buf == NULL)
11165
0
      avail = ctxt->input->length -
11166
0
              (ctxt->input->cur - ctxt->input->base);
11167
86.0M
  else {
11168
      /*
11169
       * If we are operating on converted input, try to flush
11170
       * remainng chars to avoid them stalling in the non-converted
11171
       * buffer. But do not do this in document start where
11172
       * encoding="..." may not have been read and we work on a
11173
       * guessed encoding.
11174
       */
11175
86.0M
      if ((ctxt->instate != XML_PARSER_START) &&
11176
79.9M
          (ctxt->input->buf->raw != NULL) &&
11177
8.65M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11178
436k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11179
436k
                                                 ctxt->input);
11180
436k
    size_t current = ctxt->input->cur - ctxt->input->base;
11181
11182
436k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11183
436k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11184
436k
                                      base, current);
11185
436k
      }
11186
86.0M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11187
86.0M
        (ctxt->input->cur - ctxt->input->base);
11188
86.0M
  }
11189
86.0M
        if (avail < 1)
11190
228k
      goto done;
11191
85.7M
        switch (ctxt->instate) {
11192
0
            case XML_PARSER_EOF:
11193
          /*
11194
     * Document parsing is done !
11195
     */
11196
0
          goto done;
11197
6.06M
            case XML_PARSER_START:
11198
6.06M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11199
181
        xmlChar start[4];
11200
181
        xmlCharEncoding enc;
11201
11202
        /*
11203
         * Very first chars read from the document flow.
11204
         */
11205
181
        if (avail < 4)
11206
181
      goto done;
11207
11208
        /*
11209
         * Get the 4 first bytes and decode the charset
11210
         * if enc != XML_CHAR_ENCODING_NONE
11211
         * plug some encoding conversion routines,
11212
         * else xmlSwitchEncoding will set to (default)
11213
         * UTF8.
11214
         */
11215
0
        start[0] = RAW;
11216
0
        start[1] = NXT(1);
11217
0
        start[2] = NXT(2);
11218
0
        start[3] = NXT(3);
11219
0
        enc = xmlDetectCharEncoding(start, 4);
11220
0
        xmlSwitchEncoding(ctxt, enc);
11221
0
        break;
11222
181
    }
11223
11224
6.06M
    if (avail < 2)
11225
65
        goto done;
11226
6.06M
    cur = ctxt->input->cur[0];
11227
6.06M
    next = ctxt->input->cur[1];
11228
6.06M
    if (cur == 0) {
11229
310
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11230
310
      ctxt->sax->setDocumentLocator(ctxt->userData,
11231
310
                  &xmlDefaultSAXLocator);
11232
310
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11233
310
        xmlHaltParser(ctxt);
11234
#ifdef DEBUG_PUSH
11235
        xmlGenericError(xmlGenericErrorContext,
11236
          "PP: entering EOF\n");
11237
#endif
11238
310
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11239
310
      ctxt->sax->endDocument(ctxt->userData);
11240
310
        goto done;
11241
310
    }
11242
6.06M
          if ((cur == '<') && (next == '?')) {
11243
        /* PI or XML decl */
11244
5.93M
        if (avail < 5) return(ret);
11245
5.93M
        if ((!terminate) &&
11246
5.90M
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11247
5.85M
      return(ret);
11248
78.5k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11249
78.5k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11250
78.5k
                  &xmlDefaultSAXLocator);
11251
78.5k
        if ((ctxt->input->cur[2] == 'x') &&
11252
72.4k
      (ctxt->input->cur[3] == 'm') &&
11253
70.9k
      (ctxt->input->cur[4] == 'l') &&
11254
70.4k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11255
69.9k
      ret += 5;
11256
#ifdef DEBUG_PUSH
11257
      xmlGenericError(xmlGenericErrorContext,
11258
        "PP: Parsing XML Decl\n");
11259
#endif
11260
69.9k
      xmlParseXMLDecl(ctxt);
11261
69.9k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11262
          /*
11263
           * The XML REC instructs us to stop parsing right
11264
           * here
11265
           */
11266
2.31k
          xmlHaltParser(ctxt);
11267
2.31k
          return(0);
11268
2.31k
      }
11269
67.6k
      ctxt->standalone = ctxt->input->standalone;
11270
67.6k
      if ((ctxt->encoding == NULL) &&
11271
44.0k
          (ctxt->input->encoding != NULL))
11272
26.6k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11273
67.6k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11274
67.6k
          (!ctxt->disableSAX))
11275
67.4k
          ctxt->sax->startDocument(ctxt->userData);
11276
67.6k
      ctxt->instate = XML_PARSER_MISC;
11277
#ifdef DEBUG_PUSH
11278
      xmlGenericError(xmlGenericErrorContext,
11279
        "PP: entering MISC\n");
11280
#endif
11281
67.6k
        } else {
11282
8.63k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11283
8.63k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11284
8.63k
          (!ctxt->disableSAX))
11285
8.63k
          ctxt->sax->startDocument(ctxt->userData);
11286
8.63k
      ctxt->instate = XML_PARSER_MISC;
11287
#ifdef DEBUG_PUSH
11288
      xmlGenericError(xmlGenericErrorContext,
11289
        "PP: entering MISC\n");
11290
#endif
11291
8.63k
        }
11292
131k
    } else {
11293
131k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11294
131k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11295
131k
                  &xmlDefaultSAXLocator);
11296
131k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11297
131k
        if (ctxt->version == NULL) {
11298
0
            xmlErrMemory(ctxt, NULL);
11299
0
      break;
11300
0
        }
11301
131k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11302
131k
            (!ctxt->disableSAX))
11303
131k
      ctxt->sax->startDocument(ctxt->userData);
11304
131k
        ctxt->instate = XML_PARSER_MISC;
11305
#ifdef DEBUG_PUSH
11306
        xmlGenericError(xmlGenericErrorContext,
11307
          "PP: entering MISC\n");
11308
#endif
11309
131k
    }
11310
208k
    break;
11311
17.0M
            case XML_PARSER_START_TAG: {
11312
17.0M
          const xmlChar *name;
11313
17.0M
    const xmlChar *prefix = NULL;
11314
17.0M
    const xmlChar *URI = NULL;
11315
17.0M
    int nsNr = ctxt->nsNr;
11316
11317
17.0M
    if ((avail < 2) && (ctxt->inputNr == 1))
11318
0
        goto done;
11319
17.0M
    cur = ctxt->input->cur[0];
11320
17.0M
          if (cur != '<') {
11321
24.0k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11322
24.0k
        xmlHaltParser(ctxt);
11323
24.0k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11324
24.0k
      ctxt->sax->endDocument(ctxt->userData);
11325
24.0k
        goto done;
11326
24.0k
    }
11327
17.0M
    if (!terminate) {
11328
7.18M
        if (ctxt->progressive) {
11329
            /* > can be found unescaped in attribute values */
11330
7.18M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11331
236k
          goto done;
11332
7.18M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11333
0
      goto done;
11334
0
        }
11335
7.18M
    }
11336
16.7M
    if (ctxt->spaceNr == 0)
11337
493k
        spacePush(ctxt, -1);
11338
16.2M
    else if (*ctxt->space == -2)
11339
6.70M
        spacePush(ctxt, -1);
11340
9.57M
    else
11341
9.57M
        spacePush(ctxt, *ctxt->space);
11342
16.7M
#ifdef LIBXML_SAX1_ENABLED
11343
16.7M
    if (ctxt->sax2)
11344
16.7M
#endif /* LIBXML_SAX1_ENABLED */
11345
16.7M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11346
0
#ifdef LIBXML_SAX1_ENABLED
11347
0
    else
11348
0
        name = xmlParseStartTag(ctxt);
11349
16.7M
#endif /* LIBXML_SAX1_ENABLED */
11350
16.7M
    if (ctxt->instate == XML_PARSER_EOF)
11351
434
        goto done;
11352
16.7M
    if (name == NULL) {
11353
6.46k
        spacePop(ctxt);
11354
6.46k
        xmlHaltParser(ctxt);
11355
6.46k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11356
6.46k
      ctxt->sax->endDocument(ctxt->userData);
11357
6.46k
        goto done;
11358
6.46k
    }
11359
16.7M
#ifdef LIBXML_VALID_ENABLED
11360
    /*
11361
     * [ VC: Root Element Type ]
11362
     * The Name in the document type declaration must match
11363
     * the element type of the root element.
11364
     */
11365
16.7M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11366
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11367
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11368
16.7M
#endif /* LIBXML_VALID_ENABLED */
11369
11370
    /*
11371
     * Check for an Empty Element.
11372
     */
11373
16.7M
    if ((RAW == '/') && (NXT(1) == '>')) {
11374
3.87M
        SKIP(2);
11375
11376
3.87M
        if (ctxt->sax2) {
11377
3.87M
      if ((ctxt->sax != NULL) &&
11378
3.87M
          (ctxt->sax->endElementNs != NULL) &&
11379
3.87M
          (!ctxt->disableSAX))
11380
3.87M
          ctxt->sax->endElementNs(ctxt->userData, name,
11381
3.87M
                                  prefix, URI);
11382
3.87M
      if (ctxt->nsNr - nsNr > 0)
11383
6.50k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11384
3.87M
#ifdef LIBXML_SAX1_ENABLED
11385
3.87M
        } else {
11386
0
      if ((ctxt->sax != NULL) &&
11387
0
          (ctxt->sax->endElement != NULL) &&
11388
0
          (!ctxt->disableSAX))
11389
0
          ctxt->sax->endElement(ctxt->userData, name);
11390
0
#endif /* LIBXML_SAX1_ENABLED */
11391
0
        }
11392
3.87M
        if (ctxt->instate == XML_PARSER_EOF)
11393
0
      goto done;
11394
3.87M
        spacePop(ctxt);
11395
3.87M
        if (ctxt->nameNr == 0) {
11396
4.92k
      ctxt->instate = XML_PARSER_EPILOG;
11397
3.87M
        } else {
11398
3.87M
      ctxt->instate = XML_PARSER_CONTENT;
11399
3.87M
        }
11400
3.87M
                    ctxt->progressive = 1;
11401
3.87M
        break;
11402
3.87M
    }
11403
12.8M
    if (RAW == '>') {
11404
8.50M
        NEXT;
11405
8.50M
    } else {
11406
4.38M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11407
4.38M
           "Couldn't find end of Start Tag %s\n",
11408
4.38M
           name);
11409
4.38M
        nodePop(ctxt);
11410
4.38M
        spacePop(ctxt);
11411
4.38M
    }
11412
12.8M
    if (ctxt->sax2)
11413
12.8M
        nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11414
0
#ifdef LIBXML_SAX1_ENABLED
11415
0
    else
11416
0
        namePush(ctxt, name);
11417
12.8M
#endif /* LIBXML_SAX1_ENABLED */
11418
11419
12.8M
    ctxt->instate = XML_PARSER_CONTENT;
11420
12.8M
                ctxt->progressive = 1;
11421
12.8M
                break;
11422
16.7M
      }
11423
53.8M
            case XML_PARSER_CONTENT: {
11424
53.8M
    const xmlChar *test;
11425
53.8M
    unsigned int cons;
11426
53.8M
    if ((avail < 2) && (ctxt->inputNr == 1))
11427
41.2k
        goto done;
11428
53.7M
    cur = ctxt->input->cur[0];
11429
53.7M
    next = ctxt->input->cur[1];
11430
11431
53.7M
    test = CUR_PTR;
11432
53.7M
          cons = ctxt->input->consumed;
11433
53.7M
    if ((cur == '<') && (next == '/')) {
11434
7.30M
        ctxt->instate = XML_PARSER_END_TAG;
11435
7.30M
        break;
11436
46.4M
          } else if ((cur == '<') && (next == '?')) {
11437
629k
        if ((!terminate) &&
11438
329k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11439
293k
                        ctxt->progressive = XML_PARSER_PI;
11440
293k
      goto done;
11441
293k
                    }
11442
336k
        xmlParsePI(ctxt);
11443
336k
        ctxt->instate = XML_PARSER_CONTENT;
11444
336k
                    ctxt->progressive = 1;
11445
45.8M
    } else if ((cur == '<') && (next != '!')) {
11446
16.6M
        ctxt->instate = XML_PARSER_START_TAG;
11447
16.6M
        break;
11448
29.2M
    } else if ((cur == '<') && (next == '!') &&
11449
740k
               (ctxt->input->cur[2] == '-') &&
11450
120k
         (ctxt->input->cur[3] == '-')) {
11451
120k
        int term;
11452
11453
120k
              if (avail < 4)
11454
0
            goto done;
11455
120k
        ctxt->input->cur += 4;
11456
120k
        term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11457
120k
        ctxt->input->cur -= 4;
11458
120k
        if ((!terminate) && (term < 0)) {
11459
31.2k
                        ctxt->progressive = XML_PARSER_COMMENT;
11460
31.2k
      goto done;
11461
31.2k
                    }
11462
88.7k
        xmlParseComment(ctxt);
11463
88.7k
        ctxt->instate = XML_PARSER_CONTENT;
11464
88.7k
                    ctxt->progressive = 1;
11465
29.1M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11466
620k
        (ctxt->input->cur[2] == '[') &&
11467
616k
        (ctxt->input->cur[3] == 'C') &&
11468
614k
        (ctxt->input->cur[4] == 'D') &&
11469
612k
        (ctxt->input->cur[5] == 'A') &&
11470
610k
        (ctxt->input->cur[6] == 'T') &&
11471
608k
        (ctxt->input->cur[7] == 'A') &&
11472
606k
        (ctxt->input->cur[8] == '[')) {
11473
604k
        SKIP(9);
11474
604k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11475
604k
        break;
11476
28.5M
    } else if ((cur == '<') && (next == '!') &&
11477
15.2k
               (avail < 9)) {
11478
13.7k
        goto done;
11479
28.4M
    } else if (cur == '&') {
11480
1.90M
        if ((!terminate) &&
11481
460k
            (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11482
369k
      goto done;
11483
1.53M
        xmlParseReference(ctxt);
11484
26.5M
    } else {
11485
        /* TODO Avoid the extra copy, handle directly !!! */
11486
        /*
11487
         * Goal of the following test is:
11488
         *  - minimize calls to the SAX 'character' callback
11489
         *    when they are mergeable
11490
         *  - handle an problem for isBlank when we only parse
11491
         *    a sequence of blank chars and the next one is
11492
         *    not available to check against '<' presence.
11493
         *  - tries to homogenize the differences in SAX
11494
         *    callbacks between the push and pull versions
11495
         *    of the parser.
11496
         */
11497
26.5M
        if ((ctxt->inputNr == 1) &&
11498
26.5M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11499
3.76M
      if (!terminate) {
11500
3.31M
          if (ctxt->progressive) {
11501
3.31M
        if ((lastlt == NULL) ||
11502
3.30M
            (ctxt->input->cur > lastlt))
11503
216k
            goto done;
11504
3.31M
          } else if (xmlParseLookupSequence(ctxt,
11505
0
                                            '<', 0, 0) < 0) {
11506
0
        goto done;
11507
0
          }
11508
3.31M
      }
11509
3.76M
                    }
11510
26.3M
        ctxt->checkIndex = 0;
11511
26.3M
        xmlParseCharData(ctxt, 0);
11512
26.3M
    }
11513
28.3M
    if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11514
11.6k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11515
11.6k
                    "detected an error in element content\n");
11516
11.6k
        xmlHaltParser(ctxt);
11517
11.6k
        break;
11518
11.6k
    }
11519
28.3M
    break;
11520
28.3M
      }
11521
28.3M
            case XML_PARSER_END_TAG:
11522
7.32M
    if (avail < 2)
11523
0
        goto done;
11524
7.32M
    if (!terminate) {
11525
2.55M
        if (ctxt->progressive) {
11526
            /* > can be found unescaped in attribute values */
11527
2.55M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11528
27.1k
          goto done;
11529
2.55M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11530
0
      goto done;
11531
0
        }
11532
2.55M
    }
11533
7.30M
    if (ctxt->sax2) {
11534
7.30M
        xmlParseEndTag2(ctxt,
11535
7.30M
                (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11536
7.30M
                (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11537
7.30M
                (int) (ptrdiff_t)
11538
7.30M
                                ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11539
7.30M
        nameNsPop(ctxt);
11540
7.30M
    }
11541
0
#ifdef LIBXML_SAX1_ENABLED
11542
0
      else
11543
0
        xmlParseEndTag1(ctxt, 0);
11544
7.30M
#endif /* LIBXML_SAX1_ENABLED */
11545
7.30M
    if (ctxt->instate == XML_PARSER_EOF) {
11546
        /* Nothing */
11547
7.30M
    } else if (ctxt->nameNr == 0) {
11548
32.4k
        ctxt->instate = XML_PARSER_EPILOG;
11549
7.26M
    } else {
11550
7.26M
        ctxt->instate = XML_PARSER_CONTENT;
11551
7.26M
    }
11552
7.30M
    break;
11553
688k
            case XML_PARSER_CDATA_SECTION: {
11554
          /*
11555
     * The Push mode need to have the SAX callback for
11556
     * cdataBlock merge back contiguous callbacks.
11557
     */
11558
688k
    int base;
11559
11560
688k
    base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11561
688k
    if (base < 0) {
11562
85.4k
        if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11563
29.5k
            int tmp;
11564
11565
29.5k
      tmp = xmlCheckCdataPush(ctxt->input->cur,
11566
29.5k
                              XML_PARSER_BIG_BUFFER_SIZE, 0);
11567
29.5k
      if (tmp < 0) {
11568
218
          tmp = -tmp;
11569
218
          ctxt->input->cur += tmp;
11570
218
          goto encoding_error;
11571
218
      }
11572
29.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11573
29.3k
          if (ctxt->sax->cdataBlock != NULL)
11574
29.3k
        ctxt->sax->cdataBlock(ctxt->userData,
11575
29.3k
                              ctxt->input->cur, tmp);
11576
0
          else if (ctxt->sax->characters != NULL)
11577
0
        ctxt->sax->characters(ctxt->userData,
11578
0
                              ctxt->input->cur, tmp);
11579
29.3k
      }
11580
29.3k
      if (ctxt->instate == XML_PARSER_EOF)
11581
0
          goto done;
11582
29.3k
      SKIPL(tmp);
11583
29.3k
      ctxt->checkIndex = 0;
11584
29.3k
        }
11585
85.2k
        goto done;
11586
603k
    } else {
11587
603k
        int tmp;
11588
11589
603k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11590
603k
        if ((tmp < 0) || (tmp != base)) {
11591
1.08k
      tmp = -tmp;
11592
1.08k
      ctxt->input->cur += tmp;
11593
1.08k
      goto encoding_error;
11594
1.08k
        }
11595
602k
        if ((ctxt->sax != NULL) && (base == 0) &&
11596
356k
            (ctxt->sax->cdataBlock != NULL) &&
11597
356k
            (!ctxt->disableSAX)) {
11598
      /*
11599
       * Special case to provide identical behaviour
11600
       * between pull and push parsers on enpty CDATA
11601
       * sections
11602
       */
11603
356k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11604
356k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11605
356k
                     "<![CDATA[", 9)))
11606
356k
           ctxt->sax->cdataBlock(ctxt->userData,
11607
356k
                                 BAD_CAST "", 0);
11608
356k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11609
245k
      (!ctxt->disableSAX)) {
11610
245k
      if (ctxt->sax->cdataBlock != NULL)
11611
245k
          ctxt->sax->cdataBlock(ctxt->userData,
11612
245k
              ctxt->input->cur, base);
11613
0
      else if (ctxt->sax->characters != NULL)
11614
0
          ctxt->sax->characters(ctxt->userData,
11615
0
              ctxt->input->cur, base);
11616
245k
        }
11617
602k
        if (ctxt->instate == XML_PARSER_EOF)
11618
0
      goto done;
11619
602k
        SKIPL(base + 3);
11620
602k
        ctxt->checkIndex = 0;
11621
602k
        ctxt->instate = XML_PARSER_CONTENT;
11622
#ifdef DEBUG_PUSH
11623
        xmlGenericError(xmlGenericErrorContext,
11624
          "PP: entering CONTENT\n");
11625
#endif
11626
602k
    }
11627
602k
    break;
11628
688k
      }
11629
602k
            case XML_PARSER_MISC:
11630
570k
    SKIP_BLANKS;
11631
570k
    if (ctxt->input->buf == NULL)
11632
0
        avail = ctxt->input->length -
11633
0
                (ctxt->input->cur - ctxt->input->base);
11634
570k
    else
11635
570k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11636
570k
                (ctxt->input->cur - ctxt->input->base);
11637
570k
    if (avail < 2)
11638
117k
        goto done;
11639
452k
    cur = ctxt->input->cur[0];
11640
452k
    next = ctxt->input->cur[1];
11641
452k
          if ((cur == '<') && (next == '?')) {
11642
207k
        if ((!terminate) &&
11643
199k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11644
15.2k
                        ctxt->progressive = XML_PARSER_PI;
11645
15.2k
      goto done;
11646
15.2k
                    }
11647
#ifdef DEBUG_PUSH
11648
        xmlGenericError(xmlGenericErrorContext,
11649
          "PP: Parsing PI\n");
11650
#endif
11651
192k
        xmlParsePI(ctxt);
11652
192k
        if (ctxt->instate == XML_PARSER_EOF)
11653
1
      goto done;
11654
192k
        ctxt->instate = XML_PARSER_MISC;
11655
192k
                    ctxt->progressive = 1;
11656
192k
        ctxt->checkIndex = 0;
11657
245k
    } else if ((cur == '<') && (next == '!') &&
11658
110k
        (ctxt->input->cur[2] == '-') &&
11659
24.9k
        (ctxt->input->cur[3] == '-')) {
11660
23.2k
        if ((!terminate) &&
11661
18.6k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11662
4.18k
                        ctxt->progressive = XML_PARSER_COMMENT;
11663
4.18k
      goto done;
11664
4.18k
                    }
11665
#ifdef DEBUG_PUSH
11666
        xmlGenericError(xmlGenericErrorContext,
11667
          "PP: Parsing Comment\n");
11668
#endif
11669
19.1k
        xmlParseComment(ctxt);
11670
19.1k
        if (ctxt->instate == XML_PARSER_EOF)
11671
0
      goto done;
11672
19.1k
        ctxt->instate = XML_PARSER_MISC;
11673
19.1k
                    ctxt->progressive = 1;
11674
19.1k
        ctxt->checkIndex = 0;
11675
221k
    } else if ((cur == '<') && (next == '!') &&
11676
86.7k
        (ctxt->input->cur[2] == 'D') &&
11677
84.5k
        (ctxt->input->cur[3] == 'O') &&
11678
82.2k
        (ctxt->input->cur[4] == 'C') &&
11679
78.3k
        (ctxt->input->cur[5] == 'T') &&
11680
75.5k
        (ctxt->input->cur[6] == 'Y') &&
11681
73.7k
        (ctxt->input->cur[7] == 'P') &&
11682
71.8k
        (ctxt->input->cur[8] == 'E')) {
11683
69.9k
        if ((!terminate) &&
11684
60.0k
            (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11685
4.85k
                        ctxt->progressive = XML_PARSER_DTD;
11686
4.85k
      goto done;
11687
4.85k
                    }
11688
#ifdef DEBUG_PUSH
11689
        xmlGenericError(xmlGenericErrorContext,
11690
          "PP: Parsing internal subset\n");
11691
#endif
11692
65.0k
        ctxt->inSubset = 1;
11693
65.0k
                    ctxt->progressive = 0;
11694
65.0k
        ctxt->checkIndex = 0;
11695
65.0k
        xmlParseDocTypeDecl(ctxt);
11696
65.0k
        if (ctxt->instate == XML_PARSER_EOF)
11697
0
      goto done;
11698
65.0k
        if (RAW == '[') {
11699
57.5k
      ctxt->instate = XML_PARSER_DTD;
11700
#ifdef DEBUG_PUSH
11701
      xmlGenericError(xmlGenericErrorContext,
11702
        "PP: entering DTD\n");
11703
#endif
11704
57.5k
        } else {
11705
      /*
11706
       * Create and update the external subset.
11707
       */
11708
7.49k
      ctxt->inSubset = 2;
11709
7.49k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11710
7.49k
          (ctxt->sax->externalSubset != NULL))
11711
7.49k
          ctxt->sax->externalSubset(ctxt->userData,
11712
7.49k
            ctxt->intSubName, ctxt->extSubSystem,
11713
7.49k
            ctxt->extSubURI);
11714
7.49k
      ctxt->inSubset = 0;
11715
7.49k
      xmlCleanSpecialAttr(ctxt);
11716
7.49k
      ctxt->instate = XML_PARSER_PROLOG;
11717
#ifdef DEBUG_PUSH
11718
      xmlGenericError(xmlGenericErrorContext,
11719
        "PP: entering PROLOG\n");
11720
#endif
11721
7.49k
        }
11722
151k
    } else if ((cur == '<') && (next == '!') &&
11723
16.7k
               (avail < 9)) {
11724
16.6k
        goto done;
11725
135k
    } else {
11726
135k
        ctxt->instate = XML_PARSER_START_TAG;
11727
135k
        ctxt->progressive = XML_PARSER_START_TAG;
11728
135k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11729
#ifdef DEBUG_PUSH
11730
        xmlGenericError(xmlGenericErrorContext,
11731
          "PP: entering START_TAG\n");
11732
#endif
11733
135k
    }
11734
411k
    break;
11735
411k
            case XML_PARSER_PROLOG:
11736
75.4k
    SKIP_BLANKS;
11737
75.4k
    if (ctxt->input->buf == NULL)
11738
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11739
75.4k
    else
11740
75.4k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11741
75.4k
                            (ctxt->input->cur - ctxt->input->base);
11742
75.4k
    if (avail < 2)
11743
3.08k
        goto done;
11744
72.4k
    cur = ctxt->input->cur[0];
11745
72.4k
    next = ctxt->input->cur[1];
11746
72.4k
          if ((cur == '<') && (next == '?')) {
11747
12.3k
        if ((!terminate) &&
11748
11.1k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11749
1.96k
                        ctxt->progressive = XML_PARSER_PI;
11750
1.96k
      goto done;
11751
1.96k
                    }
11752
#ifdef DEBUG_PUSH
11753
        xmlGenericError(xmlGenericErrorContext,
11754
          "PP: Parsing PI\n");
11755
#endif
11756
10.3k
        xmlParsePI(ctxt);
11757
10.3k
        if (ctxt->instate == XML_PARSER_EOF)
11758
0
      goto done;
11759
10.3k
        ctxt->instate = XML_PARSER_PROLOG;
11760
10.3k
                    ctxt->progressive = 1;
11761
60.0k
    } else if ((cur == '<') && (next == '!') &&
11762
9.30k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11763
4.83k
        if ((!terminate) &&
11764
3.64k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11765
1.49k
                        ctxt->progressive = XML_PARSER_COMMENT;
11766
1.49k
      goto done;
11767
1.49k
                    }
11768
#ifdef DEBUG_PUSH
11769
        xmlGenericError(xmlGenericErrorContext,
11770
          "PP: Parsing Comment\n");
11771
#endif
11772
3.34k
        xmlParseComment(ctxt);
11773
3.34k
        if (ctxt->instate == XML_PARSER_EOF)
11774
0
      goto done;
11775
3.34k
        ctxt->instate = XML_PARSER_PROLOG;
11776
3.34k
                    ctxt->progressive = 1;
11777
55.2k
    } else if ((cur == '<') && (next == '!') &&
11778
4.47k
               (avail < 4)) {
11779
3.37k
        goto done;
11780
51.8k
    } else {
11781
51.8k
        ctxt->instate = XML_PARSER_START_TAG;
11782
51.8k
        if (ctxt->progressive == 0)
11783
50.7k
      ctxt->progressive = XML_PARSER_START_TAG;
11784
51.8k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11785
#ifdef DEBUG_PUSH
11786
        xmlGenericError(xmlGenericErrorContext,
11787
          "PP: entering START_TAG\n");
11788
#endif
11789
51.8k
    }
11790
65.5k
    break;
11791
65.5k
            case XML_PARSER_EPILOG:
11792
32.2k
    SKIP_BLANKS;
11793
32.2k
    if (ctxt->input->buf == NULL)
11794
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11795
32.2k
    else
11796
32.2k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11797
32.2k
                            (ctxt->input->cur - ctxt->input->base);
11798
32.2k
    if (avail < 2)
11799
14.7k
        goto done;
11800
17.4k
    cur = ctxt->input->cur[0];
11801
17.4k
    next = ctxt->input->cur[1];
11802
17.4k
          if ((cur == '<') && (next == '?')) {
11803
6.55k
        if ((!terminate) &&
11804
5.05k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11805
2.67k
                        ctxt->progressive = XML_PARSER_PI;
11806
2.67k
      goto done;
11807
2.67k
                    }
11808
#ifdef DEBUG_PUSH
11809
        xmlGenericError(xmlGenericErrorContext,
11810
          "PP: Parsing PI\n");
11811
#endif
11812
3.87k
        xmlParsePI(ctxt);
11813
3.87k
        if (ctxt->instate == XML_PARSER_EOF)
11814
0
      goto done;
11815
3.87k
        ctxt->instate = XML_PARSER_EPILOG;
11816
3.87k
                    ctxt->progressive = 1;
11817
10.9k
    } else if ((cur == '<') && (next == '!') &&
11818
10.2k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11819
6.68k
        if ((!terminate) &&
11820
5.39k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11821
1.74k
                        ctxt->progressive = XML_PARSER_COMMENT;
11822
1.74k
      goto done;
11823
1.74k
                    }
11824
#ifdef DEBUG_PUSH
11825
        xmlGenericError(xmlGenericErrorContext,
11826
          "PP: Parsing Comment\n");
11827
#endif
11828
4.94k
        xmlParseComment(ctxt);
11829
4.94k
        if (ctxt->instate == XML_PARSER_EOF)
11830
0
      goto done;
11831
4.94k
        ctxt->instate = XML_PARSER_EPILOG;
11832
4.94k
                    ctxt->progressive = 1;
11833
4.94k
    } else if ((cur == '<') && (next == '!') &&
11834
3.59k
               (avail < 4)) {
11835
3.54k
        goto done;
11836
3.54k
    } else {
11837
709
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11838
709
        xmlHaltParser(ctxt);
11839
#ifdef DEBUG_PUSH
11840
        xmlGenericError(xmlGenericErrorContext,
11841
          "PP: entering EOF\n");
11842
#endif
11843
709
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11844
709
      ctxt->sax->endDocument(ctxt->userData);
11845
709
        goto done;
11846
709
    }
11847
8.82k
    break;
11848
149k
            case XML_PARSER_DTD: {
11849
          /*
11850
     * Sorry but progressive parsing of the internal subset
11851
     * is not expected to be supported. We first check that
11852
     * the full content of the internal subset is available and
11853
     * the parsing is launched only at that point.
11854
     * Internal subset ends up with "']' S? '>'" in an unescaped
11855
     * section and not in a ']]>' sequence which are conditional
11856
     * sections (whoever argued to keep that crap in XML deserve
11857
     * a place in hell !).
11858
     */
11859
149k
    int base, i;
11860
149k
    xmlChar *buf;
11861
149k
          xmlChar quote = 0;
11862
149k
                size_t use;
11863
11864
149k
    base = ctxt->input->cur - ctxt->input->base;
11865
149k
    if (base < 0) return(0);
11866
149k
    if (ctxt->checkIndex > base)
11867
32.4k
        base = ctxt->checkIndex;
11868
149k
    buf = xmlBufContent(ctxt->input->buf->buffer);
11869
149k
                use = xmlBufUse(ctxt->input->buf->buffer);
11870
31.4G
    for (;(unsigned int) base < use; base++) {
11871
31.4G
        if (quote != 0) {
11872
26.8G
            if (buf[base] == quote)
11873
14.8M
          quote = 0;
11874
26.8G
      continue;
11875
26.8G
        }
11876
4.66G
        if ((quote == 0) && (buf[base] == '<')) {
11877
17.7M
            int found  = 0;
11878
      /* special handling of comments */
11879
17.7M
            if (((unsigned int) base + 4 < use) &&
11880
17.7M
          (buf[base + 1] == '!') &&
11881
11.7M
          (buf[base + 2] == '-') &&
11882
75.8k
          (buf[base + 3] == '-')) {
11883
855M
          for (;(unsigned int) base + 3 < use; base++) {
11884
855M
        if ((buf[base] == '-') &&
11885
1.60M
            (buf[base + 1] == '-') &&
11886
324k
            (buf[base + 2] == '>')) {
11887
67.6k
            found = 1;
11888
67.6k
            base += 2;
11889
67.6k
            break;
11890
67.6k
        }
11891
855M
                }
11892
72.1k
          if (!found) {
11893
#if 0
11894
              fprintf(stderr, "unfinished comment\n");
11895
#endif
11896
4.43k
              break; /* for */
11897
4.43k
                }
11898
67.6k
                continue;
11899
72.1k
      }
11900
17.7M
        }
11901
4.66G
        if (buf[base] == '"') {
11902
2.72M
            quote = '"';
11903
2.72M
      continue;
11904
2.72M
        }
11905
4.65G
        if (buf[base] == '\'') {
11906
12.1M
            quote = '\'';
11907
12.1M
      continue;
11908
12.1M
        }
11909
4.64G
        if (buf[base] == ']') {
11910
#if 0
11911
            fprintf(stderr, "%c%c%c%c: ", buf[base],
11912
              buf[base + 1], buf[base + 2], buf[base + 3]);
11913
#endif
11914
1.61M
            if ((unsigned int) base +1 >= use)
11915
891
          break;
11916
1.61M
      if (buf[base + 1] == ']') {
11917
          /* conditional crap, skip both ']' ! */
11918
1.04M
          base++;
11919
1.04M
          continue;
11920
1.04M
      }
11921
2.14M
            for (i = 1; (unsigned int) base + i < use; i++) {
11922
2.14M
          if (buf[base + i] == '>') {
11923
#if 0
11924
              fprintf(stderr, "found\n");
11925
#endif
11926
56.0k
              goto found_end_int_subset;
11927
56.0k
          }
11928
2.08M
          if (!IS_BLANK_CH(buf[base + i])) {
11929
#if 0
11930
              fprintf(stderr, "not found\n");
11931
#endif
11932
514k
              goto not_end_of_int_subset;
11933
514k
          }
11934
2.08M
      }
11935
#if 0
11936
      fprintf(stderr, "end of stream\n");
11937
#endif
11938
1.04k
            break;
11939
11940
571k
        }
11941
4.64G
not_end_of_int_subset:
11942
4.64G
                    continue; /* for */
11943
4.64G
    }
11944
    /*
11945
     * We didn't found the end of the Internal subset
11946
     */
11947
93.4k
                if (quote == 0)
11948
33.3k
                    ctxt->checkIndex = base;
11949
60.1k
                else
11950
60.1k
                    ctxt->checkIndex = 0;
11951
#ifdef DEBUG_PUSH
11952
    if (next == 0)
11953
        xmlGenericError(xmlGenericErrorContext,
11954
          "PP: lookup of int subset end filed\n");
11955
#endif
11956
93.4k
          goto done;
11957
11958
56.0k
found_end_int_subset:
11959
56.0k
                ctxt->checkIndex = 0;
11960
56.0k
    xmlParseInternalSubset(ctxt);
11961
56.0k
    if (ctxt->instate == XML_PARSER_EOF)
11962
6.63k
        goto done;
11963
49.4k
    ctxt->inSubset = 2;
11964
49.4k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11965
48.9k
        (ctxt->sax->externalSubset != NULL))
11966
48.9k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11967
48.9k
          ctxt->extSubSystem, ctxt->extSubURI);
11968
49.4k
    ctxt->inSubset = 0;
11969
49.4k
    xmlCleanSpecialAttr(ctxt);
11970
49.4k
    if (ctxt->instate == XML_PARSER_EOF)
11971
0
        goto done;
11972
49.4k
    ctxt->instate = XML_PARSER_PROLOG;
11973
49.4k
    ctxt->checkIndex = 0;
11974
#ifdef DEBUG_PUSH
11975
    xmlGenericError(xmlGenericErrorContext,
11976
      "PP: entering PROLOG\n");
11977
#endif
11978
49.4k
                break;
11979
49.4k
      }
11980
0
            case XML_PARSER_COMMENT:
11981
0
    xmlGenericError(xmlGenericErrorContext,
11982
0
      "PP: internal error, state == COMMENT\n");
11983
0
    ctxt->instate = XML_PARSER_CONTENT;
11984
#ifdef DEBUG_PUSH
11985
    xmlGenericError(xmlGenericErrorContext,
11986
      "PP: entering CONTENT\n");
11987
#endif
11988
0
    break;
11989
0
            case XML_PARSER_IGNORE:
11990
0
    xmlGenericError(xmlGenericErrorContext,
11991
0
      "PP: internal error, state == IGNORE");
11992
0
          ctxt->instate = XML_PARSER_DTD;
11993
#ifdef DEBUG_PUSH
11994
    xmlGenericError(xmlGenericErrorContext,
11995
      "PP: entering DTD\n");
11996
#endif
11997
0
          break;
11998
0
            case XML_PARSER_PI:
11999
0
    xmlGenericError(xmlGenericErrorContext,
12000
0
      "PP: internal error, state == PI\n");
12001
0
    ctxt->instate = XML_PARSER_CONTENT;
12002
#ifdef DEBUG_PUSH
12003
    xmlGenericError(xmlGenericErrorContext,
12004
      "PP: entering CONTENT\n");
12005
#endif
12006
0
    break;
12007
0
            case XML_PARSER_ENTITY_DECL:
12008
0
    xmlGenericError(xmlGenericErrorContext,
12009
0
      "PP: internal error, state == ENTITY_DECL\n");
12010
0
    ctxt->instate = XML_PARSER_DTD;
12011
#ifdef DEBUG_PUSH
12012
    xmlGenericError(xmlGenericErrorContext,
12013
      "PP: entering DTD\n");
12014
#endif
12015
0
    break;
12016
0
            case XML_PARSER_ENTITY_VALUE:
12017
0
    xmlGenericError(xmlGenericErrorContext,
12018
0
      "PP: internal error, state == ENTITY_VALUE\n");
12019
0
    ctxt->instate = XML_PARSER_CONTENT;
12020
#ifdef DEBUG_PUSH
12021
    xmlGenericError(xmlGenericErrorContext,
12022
      "PP: entering DTD\n");
12023
#endif
12024
0
    break;
12025
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12026
0
    xmlGenericError(xmlGenericErrorContext,
12027
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12028
0
    ctxt->instate = XML_PARSER_START_TAG;
12029
#ifdef DEBUG_PUSH
12030
    xmlGenericError(xmlGenericErrorContext,
12031
      "PP: entering START_TAG\n");
12032
#endif
12033
0
    break;
12034
0
            case XML_PARSER_SYSTEM_LITERAL:
12035
0
    xmlGenericError(xmlGenericErrorContext,
12036
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12037
0
    ctxt->instate = XML_PARSER_START_TAG;
12038
#ifdef DEBUG_PUSH
12039
    xmlGenericError(xmlGenericErrorContext,
12040
      "PP: entering START_TAG\n");
12041
#endif
12042
0
    break;
12043
0
            case XML_PARSER_PUBLIC_LITERAL:
12044
0
    xmlGenericError(xmlGenericErrorContext,
12045
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12046
0
    ctxt->instate = XML_PARSER_START_TAG;
12047
#ifdef DEBUG_PUSH
12048
    xmlGenericError(xmlGenericErrorContext,
12049
      "PP: entering START_TAG\n");
12050
#endif
12051
0
    break;
12052
85.7M
  }
12053
85.7M
    }
12054
1.87M
done:
12055
#ifdef DEBUG_PUSH
12056
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12057
#endif
12058
1.87M
    return(ret);
12059
1.30k
encoding_error:
12060
1.30k
    {
12061
1.30k
        char buffer[150];
12062
12063
1.30k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12064
1.30k
      ctxt->input->cur[0], ctxt->input->cur[1],
12065
1.30k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12066
1.30k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12067
1.30k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12068
1.30k
         BAD_CAST buffer, NULL);
12069
1.30k
    }
12070
1.30k
    return(0);
12071
7.73M
}
12072
12073
/**
12074
 * xmlParseCheckTransition:
12075
 * @ctxt:  an XML parser context
12076
 * @chunk:  a char array
12077
 * @size:  the size in byte of the chunk
12078
 *
12079
 * Check depending on the current parser state if the chunk given must be
12080
 * processed immediately or one need more data to advance on parsing.
12081
 *
12082
 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12083
 */
12084
static int
12085
6.05M
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12086
6.05M
    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12087
0
        return(-1);
12088
6.05M
    if (ctxt->instate == XML_PARSER_START_TAG) {
12089
1.00M
        if (memchr(chunk, '>', size) != NULL)
12090
192k
            return(1);
12091
809k
        return(0);
12092
1.00M
    }
12093
5.05M
    if (ctxt->progressive == XML_PARSER_COMMENT) {
12094
54.0k
        if (memchr(chunk, '>', size) != NULL)
12095
35.7k
            return(1);
12096
18.3k
        return(0);
12097
54.0k
    }
12098
5.00M
    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12099
120k
        if (memchr(chunk, '>', size) != NULL)
12100
83.8k
            return(1);
12101
36.6k
        return(0);
12102
120k
    }
12103
4.88M
    if (ctxt->progressive == XML_PARSER_PI) {
12104
488k
        if (memchr(chunk, '>', size) != NULL)
12105
289k
            return(1);
12106
199k
        return(0);
12107
488k
    }
12108
4.39M
    if (ctxt->instate == XML_PARSER_END_TAG) {
12109
51.4k
        if (memchr(chunk, '>', size) != NULL)
12110
23.9k
            return(1);
12111
27.5k
        return(0);
12112
51.4k
    }
12113
4.34M
    if ((ctxt->progressive == XML_PARSER_DTD) ||
12114
4.03M
        (ctxt->instate == XML_PARSER_DTD)) {
12115
1.39M
        if (memchr(chunk, '>', size) != NULL)
12116
86.1k
            return(1);
12117
1.30M
        return(0);
12118
1.39M
    }
12119
2.95M
    return(1);
12120
4.34M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
6.22M
              int terminate) {
12136
6.22M
    int end_in_lf = 0;
12137
6.22M
    int remain = 0;
12138
6.22M
    size_t old_avail = 0;
12139
6.22M
    size_t avail = 0;
12140
12141
6.22M
    if (ctxt == NULL)
12142
0
        return(XML_ERR_INTERNAL_ERROR);
12143
6.22M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12144
8.35k
        return(ctxt->errNo);
12145
6.21M
    if (ctxt->instate == XML_PARSER_EOF)
12146
18.3k
        return(-1);
12147
6.20M
    if (ctxt->instate == XML_PARSER_START)
12148
2.13M
        xmlDetectSAX2(ctxt);
12149
6.20M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12150
6.05M
        (chunk[size - 1] == '\r')) {
12151
19.3k
  end_in_lf = 1;
12152
19.3k
  size--;
12153
19.3k
    }
12154
12155
10.1M
xmldecl_done:
12156
12157
10.1M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12158
10.0M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12159
10.0M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12160
10.0M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12161
10.0M
  int res;
12162
12163
10.0M
        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12164
        /*
12165
         * Specific handling if we autodetected an encoding, we should not
12166
         * push more than the first line ... which depend on the encoding
12167
         * And only push the rest once the final encoding was detected
12168
         */
12169
10.0M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12170
6.04M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12171
4.72M
            unsigned int len = 45;
12172
12173
4.72M
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12174
4.72M
                               BAD_CAST "UTF-16")) ||
12175
4.01k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12176
4.01k
                               BAD_CAST "UTF16")))
12177
4.71M
                len = 90;
12178
4.01k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12179
4.01k
                                    BAD_CAST "UCS-4")) ||
12180
2.59k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12181
2.59k
                                    BAD_CAST "UCS4")))
12182
1.42k
                len = 180;
12183
12184
4.72M
            if (ctxt->input->buf->rawconsumed < len)
12185
3.89k
                len -= ctxt->input->buf->rawconsumed;
12186
12187
            /*
12188
             * Change size for reading the initial declaration only
12189
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12190
             * will blindly copy extra bytes from memory.
12191
             */
12192
4.72M
            if ((unsigned int) size > len) {
12193
3.93M
                remain = size - len;
12194
3.93M
                size = len;
12195
3.93M
            } else {
12196
789k
                remain = 0;
12197
789k
            }
12198
4.72M
        }
12199
10.0M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12200
10.0M
  if (res < 0) {
12201
397
      ctxt->errNo = XML_PARSER_EOF;
12202
397
      xmlHaltParser(ctxt);
12203
397
      return (XML_PARSER_EOF);
12204
397
  }
12205
10.0M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12206
#ifdef DEBUG_PUSH
12207
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12208
#endif
12209
12210
10.0M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12211
84.9k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12212
84.9k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12213
84.9k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12214
7.05k
        (in->raw != NULL)) {
12215
7.05k
    int nbchars;
12216
7.05k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12217
7.05k
    size_t current = ctxt->input->cur - ctxt->input->base;
12218
12219
7.05k
    nbchars = xmlCharEncInput(in, terminate);
12220
7.05k
    if (nbchars < 0) {
12221
        /* TODO 2.6.0 */
12222
222
        xmlGenericError(xmlGenericErrorContext,
12223
222
            "xmlParseChunk: encoder error\n");
12224
222
        return(XML_ERR_INVALID_ENCODING);
12225
222
    }
12226
6.83k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12227
6.83k
      }
12228
84.9k
  }
12229
84.9k
    }
12230
10.1M
    if (remain != 0) {
12231
3.93M
        xmlParseTryOrFinish(ctxt, 0);
12232
6.19M
    } else {
12233
6.19M
        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12234
6.19M
            avail = xmlBufUse(ctxt->input->buf->buffer);
12235
        /*
12236
         * Depending on the current state it may not be such
12237
         * a good idea to try parsing if there is nothing in the chunk
12238
         * which would be worth doing a parser state transition and we
12239
         * need to wait for more data
12240
         */
12241
6.19M
        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12242
6.05M
            (old_avail == 0) || (avail == 0) ||
12243
6.05M
            (xmlParseCheckTransition(ctxt,
12244
6.05M
                       (const char *)&ctxt->input->base[old_avail],
12245
6.05M
                                     avail - old_avail)))
12246
3.80M
            xmlParseTryOrFinish(ctxt, terminate);
12247
6.19M
    }
12248
10.1M
    if (ctxt->instate == XML_PARSER_EOF)
12249
52.6k
        return(ctxt->errNo);
12250
12251
10.0M
    if ((ctxt->input != NULL) &&
12252
10.0M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12253
10.0M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12254
94
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12255
94
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12256
94
        xmlHaltParser(ctxt);
12257
94
    }
12258
10.0M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12259
2.78k
        return(ctxt->errNo);
12260
12261
10.0M
    if (remain != 0) {
12262
3.93M
        chunk += size;
12263
3.93M
        size = remain;
12264
3.93M
        remain = 0;
12265
3.93M
        goto xmldecl_done;
12266
3.93M
    }
12267
6.14M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12268
19.1k
        (ctxt->input->buf != NULL)) {
12269
19.1k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12270
19.1k
           ctxt->input);
12271
19.1k
  size_t current = ctxt->input->cur - ctxt->input->base;
12272
12273
19.1k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12274
12275
19.1k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12276
19.1k
            base, current);
12277
19.1k
    }
12278
6.14M
    if (terminate) {
12279
  /*
12280
   * Check for termination
12281
   */
12282
111k
  int cur_avail = 0;
12283
12284
111k
  if (ctxt->input != NULL) {
12285
111k
      if (ctxt->input->buf == NULL)
12286
0
    cur_avail = ctxt->input->length -
12287
0
          (ctxt->input->cur - ctxt->input->base);
12288
111k
      else
12289
111k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12290
111k
                    (ctxt->input->cur - ctxt->input->base);
12291
111k
  }
12292
12293
111k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12294
111k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12295
75.4k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12296
75.4k
  }
12297
111k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12298
340
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12299
340
  }
12300
111k
  if (ctxt->instate != XML_PARSER_EOF) {
12301
111k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12302
111k
    ctxt->sax->endDocument(ctxt->userData);
12303
111k
  }
12304
111k
  ctxt->instate = XML_PARSER_EOF;
12305
111k
    }
12306
6.14M
    if (ctxt->wellFormed == 0)
12307
104k
  return((xmlParserErrors) ctxt->errNo);
12308
6.03M
    else
12309
6.03M
        return(0);
12310
6.14M
}
12311
12312
/************************************************************************
12313
 *                  *
12314
 *    I/O front end functions to the parser     *
12315
 *                  *
12316
 ************************************************************************/
12317
12318
/**
12319
 * xmlCreatePushParserCtxt:
12320
 * @sax:  a SAX handler
12321
 * @user_data:  The user data returned on SAX callbacks
12322
 * @chunk:  a pointer to an array of chars
12323
 * @size:  number of chars in the array
12324
 * @filename:  an optional file name or URI
12325
 *
12326
 * Create a parser context for using the XML parser in push mode.
12327
 * If @buffer and @size are non-NULL, the data is used to detect
12328
 * the encoding.  The remaining characters will be parsed so they
12329
 * don't need to be fed in again through xmlParseChunk.
12330
 * To allow content encoding detection, @size should be >= 4
12331
 * The value of @filename is used for fetching external entities
12332
 * and error/warning reports.
12333
 *
12334
 * Returns the new parser context or NULL
12335
 */
12336
12337
xmlParserCtxtPtr
12338
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12339
211k
                        const char *chunk, int size, const char *filename) {
12340
211k
    xmlParserCtxtPtr ctxt;
12341
211k
    xmlParserInputPtr inputStream;
12342
211k
    xmlParserInputBufferPtr buf;
12343
211k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12344
12345
    /*
12346
     * plug some encoding conversion routines
12347
     */
12348
211k
    if ((chunk != NULL) && (size >= 4))
12349
211k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12350
12351
211k
    buf = xmlAllocParserInputBuffer(enc);
12352
211k
    if (buf == NULL) return(NULL);
12353
12354
211k
    ctxt = xmlNewParserCtxt();
12355
211k
    if (ctxt == NULL) {
12356
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12357
0
  xmlFreeParserInputBuffer(buf);
12358
0
  return(NULL);
12359
0
    }
12360
211k
    ctxt->dictNames = 1;
12361
211k
    ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12362
211k
    if (ctxt->pushTab == NULL) {
12363
0
        xmlErrMemory(ctxt, NULL);
12364
0
  xmlFreeParserInputBuffer(buf);
12365
0
  xmlFreeParserCtxt(ctxt);
12366
0
  return(NULL);
12367
0
    }
12368
211k
    if (sax != NULL) {
12369
211k
#ifdef LIBXML_SAX1_ENABLED
12370
211k
  if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12371
211k
#endif /* LIBXML_SAX1_ENABLED */
12372
211k
      xmlFree(ctxt->sax);
12373
211k
  ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12374
211k
  if (ctxt->sax == NULL) {
12375
0
      xmlErrMemory(ctxt, NULL);
12376
0
      xmlFreeParserInputBuffer(buf);
12377
0
      xmlFreeParserCtxt(ctxt);
12378
0
      return(NULL);
12379
0
  }
12380
211k
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12381
211k
  if (sax->initialized == XML_SAX2_MAGIC)
12382
211k
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12383
0
  else
12384
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12385
211k
  if (user_data != NULL)
12386
0
      ctxt->userData = user_data;
12387
211k
    }
12388
211k
    if (filename == NULL) {
12389
118k
  ctxt->directory = NULL;
12390
118k
    } else {
12391
92.4k
        ctxt->directory = xmlParserGetDirectory(filename);
12392
92.4k
    }
12393
12394
211k
    inputStream = xmlNewInputStream(ctxt);
12395
211k
    if (inputStream == NULL) {
12396
0
  xmlFreeParserCtxt(ctxt);
12397
0
  xmlFreeParserInputBuffer(buf);
12398
0
  return(NULL);
12399
0
    }
12400
12401
211k
    if (filename == NULL)
12402
118k
  inputStream->filename = NULL;
12403
92.4k
    else {
12404
92.4k
  inputStream->filename = (char *)
12405
92.4k
      xmlCanonicPath((const xmlChar *) filename);
12406
92.4k
  if (inputStream->filename == NULL) {
12407
0
      xmlFreeParserCtxt(ctxt);
12408
0
      xmlFreeParserInputBuffer(buf);
12409
0
      return(NULL);
12410
0
  }
12411
92.4k
    }
12412
211k
    inputStream->buf = buf;
12413
211k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12414
211k
    inputPush(ctxt, inputStream);
12415
12416
    /*
12417
     * If the caller didn't provide an initial 'chunk' for determining
12418
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12419
     * that it can be automatically determined later
12420
     */
12421
211k
    if ((size == 0) || (chunk == NULL)) {
12422
182
  ctxt->charset = XML_CHAR_ENCODING_NONE;
12423
211k
    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12424
211k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12425
211k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12426
12427
211k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12428
12429
211k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12430
#ifdef DEBUG_PUSH
12431
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12432
#endif
12433
211k
    }
12434
12435
211k
    if (enc != XML_CHAR_ENCODING_NONE) {
12436
76.5k
        xmlSwitchEncoding(ctxt, enc);
12437
76.5k
    }
12438
12439
211k
    return(ctxt);
12440
211k
}
12441
#endif /* LIBXML_PUSH_ENABLED */
12442
12443
/**
12444
 * xmlHaltParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing don't override error
12448
 * for internal use
12449
 */
12450
static void
12451
107k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12452
107k
    if (ctxt == NULL)
12453
0
        return;
12454
107k
    ctxt->instate = XML_PARSER_EOF;
12455
107k
    ctxt->disableSAX = 1;
12456
107k
    while (ctxt->inputNr > 1)
12457
158
        xmlFreeInputStream(inputPop(ctxt));
12458
107k
    if (ctxt->input != NULL) {
12459
        /*
12460
   * in case there was a specific allocation deallocate before
12461
   * overriding base
12462
   */
12463
107k
        if (ctxt->input->free != NULL) {
12464
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12465
0
      ctxt->input->free = NULL;
12466
0
  }
12467
107k
  ctxt->input->cur = BAD_CAST"";
12468
107k
  ctxt->input->base = ctxt->input->cur;
12469
107k
        ctxt->input->end = ctxt->input->cur;
12470
107k
    }
12471
107k
}
12472
12473
/**
12474
 * xmlStopParser:
12475
 * @ctxt:  an XML parser context
12476
 *
12477
 * Blocks further parser processing
12478
 */
12479
void
12480
52.9k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12481
52.9k
    if (ctxt == NULL)
12482
0
        return;
12483
52.9k
    xmlHaltParser(ctxt);
12484
52.9k
    ctxt->errNo = XML_ERR_USER_STOP;
12485
52.9k
}
12486
12487
/**
12488
 * xmlCreateIOParserCtxt:
12489
 * @sax:  a SAX handler
12490
 * @user_data:  The user data returned on SAX callbacks
12491
 * @ioread:  an I/O read function
12492
 * @ioclose:  an I/O close function
12493
 * @ioctx:  an I/O handler
12494
 * @enc:  the charset encoding if known
12495
 *
12496
 * Create a parser context for using the XML parser with an existing
12497
 * I/O stream
12498
 *
12499
 * Returns the new parser context or NULL
12500
 */
12501
xmlParserCtxtPtr
12502
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12503
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12504
0
  void *ioctx, xmlCharEncoding enc) {
12505
0
    xmlParserCtxtPtr ctxt;
12506
0
    xmlParserInputPtr inputStream;
12507
0
    xmlParserInputBufferPtr buf;
12508
12509
0
    if (ioread == NULL) return(NULL);
12510
12511
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12512
0
    if (buf == NULL) {
12513
0
        if (ioclose != NULL)
12514
0
            ioclose(ioctx);
12515
0
        return (NULL);
12516
0
    }
12517
12518
0
    ctxt = xmlNewParserCtxt();
12519
0
    if (ctxt == NULL) {
12520
0
  xmlFreeParserInputBuffer(buf);
12521
0
  return(NULL);
12522
0
    }
12523
0
    if (sax != NULL) {
12524
0
#ifdef LIBXML_SAX1_ENABLED
12525
0
  if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12526
0
#endif /* LIBXML_SAX1_ENABLED */
12527
0
      xmlFree(ctxt->sax);
12528
0
  ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12529
0
  if (ctxt->sax == NULL) {
12530
0
      xmlErrMemory(ctxt, NULL);
12531
0
      xmlFreeParserCtxt(ctxt);
12532
0
      return(NULL);
12533
0
  }
12534
0
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12535
0
  if (sax->initialized == XML_SAX2_MAGIC)
12536
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12537
0
  else
12538
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12539
0
  if (user_data != NULL)
12540
0
      ctxt->userData = user_data;
12541
0
    }
12542
12543
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12544
0
    if (inputStream == NULL) {
12545
0
  xmlFreeParserCtxt(ctxt);
12546
0
  return(NULL);
12547
0
    }
12548
0
    inputPush(ctxt, inputStream);
12549
12550
0
    return(ctxt);
12551
0
}
12552
12553
#ifdef LIBXML_VALID_ENABLED
12554
/************************************************************************
12555
 *                  *
12556
 *    Front ends when parsing a DTD       *
12557
 *                  *
12558
 ************************************************************************/
12559
12560
/**
12561
 * xmlIOParseDTD:
12562
 * @sax:  the SAX handler block or NULL
12563
 * @input:  an Input Buffer
12564
 * @enc:  the charset encoding if known
12565
 *
12566
 * Load and parse a DTD
12567
 *
12568
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12569
 * @input will be freed by the function in any case.
12570
 */
12571
12572
xmlDtdPtr
12573
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12574
0
        xmlCharEncoding enc) {
12575
0
    xmlDtdPtr ret = NULL;
12576
0
    xmlParserCtxtPtr ctxt;
12577
0
    xmlParserInputPtr pinput = NULL;
12578
0
    xmlChar start[4];
12579
12580
0
    if (input == NULL)
12581
0
  return(NULL);
12582
12583
0
    ctxt = xmlNewParserCtxt();
12584
0
    if (ctxt == NULL) {
12585
0
        xmlFreeParserInputBuffer(input);
12586
0
  return(NULL);
12587
0
    }
12588
12589
    /* We are loading a DTD */
12590
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12591
12592
    /*
12593
     * Set-up the SAX context
12594
     */
12595
0
    if (sax != NULL) {
12596
0
  if (ctxt->sax != NULL)
12597
0
      xmlFree(ctxt->sax);
12598
0
        ctxt->sax = sax;
12599
0
        ctxt->userData = ctxt;
12600
0
    }
12601
0
    xmlDetectSAX2(ctxt);
12602
12603
    /*
12604
     * generate a parser input from the I/O handler
12605
     */
12606
12607
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12608
0
    if (pinput == NULL) {
12609
0
        if (sax != NULL) ctxt->sax = NULL;
12610
0
        xmlFreeParserInputBuffer(input);
12611
0
  xmlFreeParserCtxt(ctxt);
12612
0
  return(NULL);
12613
0
    }
12614
12615
    /*
12616
     * plug some encoding conversion routines here.
12617
     */
12618
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12619
0
        if (sax != NULL) ctxt->sax = NULL;
12620
0
  xmlFreeParserCtxt(ctxt);
12621
0
  return(NULL);
12622
0
    }
12623
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12624
0
        xmlSwitchEncoding(ctxt, enc);
12625
0
    }
12626
12627
0
    pinput->filename = NULL;
12628
0
    pinput->line = 1;
12629
0
    pinput->col = 1;
12630
0
    pinput->base = ctxt->input->cur;
12631
0
    pinput->cur = ctxt->input->cur;
12632
0
    pinput->free = NULL;
12633
12634
    /*
12635
     * let's parse that entity knowing it's an external subset.
12636
     */
12637
0
    ctxt->inSubset = 2;
12638
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12639
0
    if (ctxt->myDoc == NULL) {
12640
0
  xmlErrMemory(ctxt, "New Doc failed");
12641
0
  return(NULL);
12642
0
    }
12643
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12644
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12645
0
                                 BAD_CAST "none", BAD_CAST "none");
12646
12647
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12648
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12649
  /*
12650
   * Get the 4 first bytes and decode the charset
12651
   * if enc != XML_CHAR_ENCODING_NONE
12652
   * plug some encoding conversion routines.
12653
   */
12654
0
  start[0] = RAW;
12655
0
  start[1] = NXT(1);
12656
0
  start[2] = NXT(2);
12657
0
  start[3] = NXT(3);
12658
0
  enc = xmlDetectCharEncoding(start, 4);
12659
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12660
0
      xmlSwitchEncoding(ctxt, enc);
12661
0
  }
12662
0
    }
12663
12664
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12665
12666
0
    if (ctxt->myDoc != NULL) {
12667
0
  if (ctxt->wellFormed) {
12668
0
      ret = ctxt->myDoc->extSubset;
12669
0
      ctxt->myDoc->extSubset = NULL;
12670
0
      if (ret != NULL) {
12671
0
    xmlNodePtr tmp;
12672
12673
0
    ret->doc = NULL;
12674
0
    tmp = ret->children;
12675
0
    while (tmp != NULL) {
12676
0
        tmp->doc = NULL;
12677
0
        tmp = tmp->next;
12678
0
    }
12679
0
      }
12680
0
  } else {
12681
0
      ret = NULL;
12682
0
  }
12683
0
        xmlFreeDoc(ctxt->myDoc);
12684
0
        ctxt->myDoc = NULL;
12685
0
    }
12686
0
    if (sax != NULL) ctxt->sax = NULL;
12687
0
    xmlFreeParserCtxt(ctxt);
12688
12689
0
    return(ret);
12690
0
}
12691
12692
/**
12693
 * xmlSAXParseDTD:
12694
 * @sax:  the SAX handler block
12695
 * @ExternalID:  a NAME* containing the External ID of the DTD
12696
 * @SystemID:  a NAME* containing the URL to the DTD
12697
 *
12698
 * Load and parse an external subset.
12699
 *
12700
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12701
 */
12702
12703
xmlDtdPtr
12704
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12705
0
                          const xmlChar *SystemID) {
12706
0
    xmlDtdPtr ret = NULL;
12707
0
    xmlParserCtxtPtr ctxt;
12708
0
    xmlParserInputPtr input = NULL;
12709
0
    xmlCharEncoding enc;
12710
0
    xmlChar* systemIdCanonic;
12711
12712
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12713
12714
0
    ctxt = xmlNewParserCtxt();
12715
0
    if (ctxt == NULL) {
12716
0
  return(NULL);
12717
0
    }
12718
12719
    /* We are loading a DTD */
12720
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12721
12722
    /*
12723
     * Set-up the SAX context
12724
     */
12725
0
    if (sax != NULL) {
12726
0
  if (ctxt->sax != NULL)
12727
0
      xmlFree(ctxt->sax);
12728
0
        ctxt->sax = sax;
12729
0
        ctxt->userData = ctxt;
12730
0
    }
12731
12732
    /*
12733
     * Canonicalise the system ID
12734
     */
12735
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12736
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12737
0
  xmlFreeParserCtxt(ctxt);
12738
0
  return(NULL);
12739
0
    }
12740
12741
    /*
12742
     * Ask the Entity resolver to load the damn thing
12743
     */
12744
12745
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12746
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12747
0
                                   systemIdCanonic);
12748
0
    if (input == NULL) {
12749
0
        if (sax != NULL) ctxt->sax = NULL;
12750
0
  xmlFreeParserCtxt(ctxt);
12751
0
  if (systemIdCanonic != NULL)
12752
0
      xmlFree(systemIdCanonic);
12753
0
  return(NULL);
12754
0
    }
12755
12756
    /*
12757
     * plug some encoding conversion routines here.
12758
     */
12759
0
    if (xmlPushInput(ctxt, input) < 0) {
12760
0
        if (sax != NULL) ctxt->sax = NULL;
12761
0
  xmlFreeParserCtxt(ctxt);
12762
0
  if (systemIdCanonic != NULL)
12763
0
      xmlFree(systemIdCanonic);
12764
0
  return(NULL);
12765
0
    }
12766
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12767
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12768
0
  xmlSwitchEncoding(ctxt, enc);
12769
0
    }
12770
12771
0
    if (input->filename == NULL)
12772
0
  input->filename = (char *) systemIdCanonic;
12773
0
    else
12774
0
  xmlFree(systemIdCanonic);
12775
0
    input->line = 1;
12776
0
    input->col = 1;
12777
0
    input->base = ctxt->input->cur;
12778
0
    input->cur = ctxt->input->cur;
12779
0
    input->free = NULL;
12780
12781
    /*
12782
     * let's parse that entity knowing it's an external subset.
12783
     */
12784
0
    ctxt->inSubset = 2;
12785
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12786
0
    if (ctxt->myDoc == NULL) {
12787
0
  xmlErrMemory(ctxt, "New Doc failed");
12788
0
        if (sax != NULL) ctxt->sax = NULL;
12789
0
  xmlFreeParserCtxt(ctxt);
12790
0
  return(NULL);
12791
0
    }
12792
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12793
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12794
0
                                 ExternalID, SystemID);
12795
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12796
12797
0
    if (ctxt->myDoc != NULL) {
12798
0
  if (ctxt->wellFormed) {
12799
0
      ret = ctxt->myDoc->extSubset;
12800
0
      ctxt->myDoc->extSubset = NULL;
12801
0
      if (ret != NULL) {
12802
0
    xmlNodePtr tmp;
12803
12804
0
    ret->doc = NULL;
12805
0
    tmp = ret->children;
12806
0
    while (tmp != NULL) {
12807
0
        tmp->doc = NULL;
12808
0
        tmp = tmp->next;
12809
0
    }
12810
0
      }
12811
0
  } else {
12812
0
      ret = NULL;
12813
0
  }
12814
0
        xmlFreeDoc(ctxt->myDoc);
12815
0
        ctxt->myDoc = NULL;
12816
0
    }
12817
0
    if (sax != NULL) ctxt->sax = NULL;
12818
0
    xmlFreeParserCtxt(ctxt);
12819
12820
0
    return(ret);
12821
0
}
12822
12823
12824
/**
12825
 * xmlParseDTD:
12826
 * @ExternalID:  a NAME* containing the External ID of the DTD
12827
 * @SystemID:  a NAME* containing the URL to the DTD
12828
 *
12829
 * Load and parse an external subset.
12830
 *
12831
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12832
 */
12833
12834
xmlDtdPtr
12835
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12836
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12837
0
}
12838
#endif /* LIBXML_VALID_ENABLED */
12839
12840
/************************************************************************
12841
 *                  *
12842
 *    Front ends when parsing an Entity     *
12843
 *                  *
12844
 ************************************************************************/
12845
12846
/**
12847
 * xmlParseCtxtExternalEntity:
12848
 * @ctx:  the existing parsing context
12849
 * @URL:  the URL for the entity to load
12850
 * @ID:  the System ID for the entity to load
12851
 * @lst:  the return value for the set of parsed nodes
12852
 *
12853
 * Parse an external general entity within an existing parsing context
12854
 * An external general parsed entity is well-formed if it matches the
12855
 * production labeled extParsedEnt.
12856
 *
12857
 * [78] extParsedEnt ::= TextDecl? content
12858
 *
12859
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12860
 *    the parser error code otherwise
12861
 */
12862
12863
int
12864
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12865
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12866
0
    xmlParserCtxtPtr ctxt;
12867
0
    xmlDocPtr newDoc;
12868
0
    xmlNodePtr newRoot;
12869
0
    xmlSAXHandlerPtr oldsax = NULL;
12870
0
    int ret = 0;
12871
0
    xmlChar start[4];
12872
0
    xmlCharEncoding enc;
12873
12874
0
    if (ctx == NULL) return(-1);
12875
12876
0
    if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12877
0
        (ctx->depth > 1024)) {
12878
0
  return(XML_ERR_ENTITY_LOOP);
12879
0
    }
12880
12881
0
    if (lst != NULL)
12882
0
        *lst = NULL;
12883
0
    if ((URL == NULL) && (ID == NULL))
12884
0
  return(-1);
12885
0
    if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12886
0
  return(-1);
12887
12888
0
    ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12889
0
    if (ctxt == NULL) {
12890
0
  return(-1);
12891
0
    }
12892
12893
0
    oldsax = ctxt->sax;
12894
0
    ctxt->sax = ctx->sax;
12895
0
    xmlDetectSAX2(ctxt);
12896
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12897
0
    if (newDoc == NULL) {
12898
0
  xmlFreeParserCtxt(ctxt);
12899
0
  return(-1);
12900
0
    }
12901
0
    newDoc->properties = XML_DOC_INTERNAL;
12902
0
    if (ctx->myDoc->dict) {
12903
0
  newDoc->dict = ctx->myDoc->dict;
12904
0
  xmlDictReference(newDoc->dict);
12905
0
    }
12906
0
    if (ctx->myDoc != NULL) {
12907
0
  newDoc->intSubset = ctx->myDoc->intSubset;
12908
0
  newDoc->extSubset = ctx->myDoc->extSubset;
12909
0
    }
12910
0
    if (ctx->myDoc->URL != NULL) {
12911
0
  newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12912
0
    }
12913
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12914
0
    if (newRoot == NULL) {
12915
0
  ctxt->sax = oldsax;
12916
0
  xmlFreeParserCtxt(ctxt);
12917
0
  newDoc->intSubset = NULL;
12918
0
  newDoc->extSubset = NULL;
12919
0
        xmlFreeDoc(newDoc);
12920
0
  return(-1);
12921
0
    }
12922
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12923
0
    nodePush(ctxt, newDoc->children);
12924
0
    if (ctx->myDoc == NULL) {
12925
0
  ctxt->myDoc = newDoc;
12926
0
    } else {
12927
0
  ctxt->myDoc = ctx->myDoc;
12928
0
  newDoc->children->doc = ctx->myDoc;
12929
0
    }
12930
12931
    /*
12932
     * Get the 4 first bytes and decode the charset
12933
     * if enc != XML_CHAR_ENCODING_NONE
12934
     * plug some encoding conversion routines.
12935
     */
12936
0
    GROW
12937
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12938
0
  start[0] = RAW;
12939
0
  start[1] = NXT(1);
12940
0
  start[2] = NXT(2);
12941
0
  start[3] = NXT(3);
12942
0
  enc = xmlDetectCharEncoding(start, 4);
12943
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12944
0
      xmlSwitchEncoding(ctxt, enc);
12945
0
  }
12946
0
    }
12947
12948
    /*
12949
     * Parse a possible text declaration first
12950
     */
12951
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12952
0
  xmlParseTextDecl(ctxt);
12953
  /*
12954
   * An XML-1.0 document can't reference an entity not XML-1.0
12955
   */
12956
0
  if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12957
0
      (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12958
0
      xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12959
0
                     "Version mismatch between document and entity\n");
12960
0
  }
12961
0
    }
12962
12963
    /*
12964
     * If the user provided its own SAX callbacks then reuse the
12965
     * useData callback field, otherwise the expected setup in a
12966
     * DOM builder is to have userData == ctxt
12967
     */
12968
0
    if (ctx->userData == ctx)
12969
0
        ctxt->userData = ctxt;
12970
0
    else
12971
0
        ctxt->userData = ctx->userData;
12972
12973
    /*
12974
     * Doing validity checking on chunk doesn't make sense
12975
     */
12976
0
    ctxt->instate = XML_PARSER_CONTENT;
12977
0
    ctxt->validate = ctx->validate;
12978
0
    ctxt->valid = ctx->valid;
12979
0
    ctxt->loadsubset = ctx->loadsubset;
12980
0
    ctxt->depth = ctx->depth + 1;
12981
0
    ctxt->replaceEntities = ctx->replaceEntities;
12982
0
    if (ctxt->validate) {
12983
0
  ctxt->vctxt.error = ctx->vctxt.error;
12984
0
  ctxt->vctxt.warning = ctx->vctxt.warning;
12985
0
    } else {
12986
0
  ctxt->vctxt.error = NULL;
12987
0
  ctxt->vctxt.warning = NULL;
12988
0
    }
12989
0
    ctxt->vctxt.nodeTab = NULL;
12990
0
    ctxt->vctxt.nodeNr = 0;
12991
0
    ctxt->vctxt.nodeMax = 0;
12992
0
    ctxt->vctxt.node = NULL;
12993
0
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12994
0
    ctxt->dict = ctx->dict;
12995
0
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12996
0
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12997
0
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12998
0
    ctxt->dictNames = ctx->dictNames;
12999
0
    ctxt->attsDefault = ctx->attsDefault;
13000
0
    ctxt->attsSpecial = ctx->attsSpecial;
13001
0
    ctxt->linenumbers = ctx->linenumbers;
13002
13003
0
    xmlParseContent(ctxt);
13004
13005
0
    ctx->validate = ctxt->validate;
13006
0
    ctx->valid = ctxt->valid;
13007
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13008
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13009
0
    } else if (RAW != 0) {
13010
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13011
0
    }
13012
0
    if (ctxt->node != newDoc->children) {
13013
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13014
0
    }
13015
13016
0
    if (!ctxt->wellFormed) {
13017
0
        if (ctxt->errNo == 0)
13018
0
      ret = 1;
13019
0
  else
13020
0
      ret = ctxt->errNo;
13021
0
    } else {
13022
0
  if (lst != NULL) {
13023
0
      xmlNodePtr cur;
13024
13025
      /*
13026
       * Return the newly created nodeset after unlinking it from
13027
       * they pseudo parent.
13028
       */
13029
0
      cur = newDoc->children->children;
13030
0
      *lst = cur;
13031
0
      while (cur != NULL) {
13032
0
    cur->parent = NULL;
13033
0
    cur = cur->next;
13034
0
      }
13035
0
            newDoc->children->children = NULL;
13036
0
  }
13037
0
  ret = 0;
13038
0
    }
13039
0
    ctxt->sax = oldsax;
13040
0
    ctxt->dict = NULL;
13041
0
    ctxt->attsDefault = NULL;
13042
0
    ctxt->attsSpecial = NULL;
13043
0
    xmlFreeParserCtxt(ctxt);
13044
0
    newDoc->intSubset = NULL;
13045
0
    newDoc->extSubset = NULL;
13046
0
    xmlFreeDoc(newDoc);
13047
13048
0
    return(ret);
13049
0
}
13050
13051
/**
13052
 * xmlParseExternalEntityPrivate:
13053
 * @doc:  the document the chunk pertains to
13054
 * @oldctxt:  the previous parser context if available
13055
 * @sax:  the SAX handler bloc (possibly NULL)
13056
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13057
 * @depth:  Used for loop detection, use 0
13058
 * @URL:  the URL for the entity to load
13059
 * @ID:  the System ID for the entity to load
13060
 * @list:  the return value for the set of parsed nodes
13061
 *
13062
 * Private version of xmlParseExternalEntity()
13063
 *
13064
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13065
 *    the parser error code otherwise
13066
 */
13067
13068
static xmlParserErrors
13069
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13070
                xmlSAXHandlerPtr sax,
13071
          void *user_data, int depth, const xmlChar *URL,
13072
0
          const xmlChar *ID, xmlNodePtr *list) {
13073
0
    xmlParserCtxtPtr ctxt;
13074
0
    xmlDocPtr newDoc;
13075
0
    xmlNodePtr newRoot;
13076
0
    xmlSAXHandlerPtr oldsax = NULL;
13077
0
    xmlParserErrors ret = XML_ERR_OK;
13078
0
    xmlChar start[4];
13079
0
    xmlCharEncoding enc;
13080
13081
0
    if (((depth > 40) &&
13082
0
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13083
0
  (depth > 1024)) {
13084
0
  return(XML_ERR_ENTITY_LOOP);
13085
0
    }
13086
13087
0
    if (list != NULL)
13088
0
        *list = NULL;
13089
0
    if ((URL == NULL) && (ID == NULL))
13090
0
  return(XML_ERR_INTERNAL_ERROR);
13091
0
    if (doc == NULL)
13092
0
  return(XML_ERR_INTERNAL_ERROR);
13093
13094
13095
0
    ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13096
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13097
0
    ctxt->userData = ctxt;
13098
0
    if (oldctxt != NULL) {
13099
0
  ctxt->_private = oldctxt->_private;
13100
0
  ctxt->loadsubset = oldctxt->loadsubset;
13101
0
  ctxt->validate = oldctxt->validate;
13102
0
  ctxt->external = oldctxt->external;
13103
0
  ctxt->record_info = oldctxt->record_info;
13104
0
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13105
0
  ctxt->node_seq.length = oldctxt->node_seq.length;
13106
0
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13107
0
    } else {
13108
  /*
13109
   * Doing validity checking on chunk without context
13110
   * doesn't make sense
13111
   */
13112
0
  ctxt->_private = NULL;
13113
0
  ctxt->validate = 0;
13114
0
  ctxt->external = 2;
13115
0
  ctxt->loadsubset = 0;
13116
0
    }
13117
0
    if (sax != NULL) {
13118
0
  oldsax = ctxt->sax;
13119
0
        ctxt->sax = sax;
13120
0
  if (user_data != NULL)
13121
0
      ctxt->userData = user_data;
13122
0
    }
13123
0
    xmlDetectSAX2(ctxt);
13124
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13125
0
    if (newDoc == NULL) {
13126
0
  ctxt->node_seq.maximum = 0;
13127
0
  ctxt->node_seq.length = 0;
13128
0
  ctxt->node_seq.buffer = NULL;
13129
0
  xmlFreeParserCtxt(ctxt);
13130
0
  return(XML_ERR_INTERNAL_ERROR);
13131
0
    }
13132
0
    newDoc->properties = XML_DOC_INTERNAL;
13133
0
    newDoc->intSubset = doc->intSubset;
13134
0
    newDoc->extSubset = doc->extSubset;
13135
0
    newDoc->dict = doc->dict;
13136
0
    xmlDictReference(newDoc->dict);
13137
13138
0
    if (doc->URL != NULL) {
13139
0
  newDoc->URL = xmlStrdup(doc->URL);
13140
0
    }
13141
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13142
0
    if (newRoot == NULL) {
13143
0
  if (sax != NULL)
13144
0
      ctxt->sax = oldsax;
13145
0
  ctxt->node_seq.maximum = 0;
13146
0
  ctxt->node_seq.length = 0;
13147
0
  ctxt->node_seq.buffer = NULL;
13148
0
  xmlFreeParserCtxt(ctxt);
13149
0
  newDoc->intSubset = NULL;
13150
0
  newDoc->extSubset = NULL;
13151
0
        xmlFreeDoc(newDoc);
13152
0
  return(XML_ERR_INTERNAL_ERROR);
13153
0
    }
13154
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13155
0
    nodePush(ctxt, newDoc->children);
13156
0
    ctxt->myDoc = doc;
13157
0
    newRoot->doc = doc;
13158
13159
    /*
13160
     * Get the 4 first bytes and decode the charset
13161
     * if enc != XML_CHAR_ENCODING_NONE
13162
     * plug some encoding conversion routines.
13163
     */
13164
0
    GROW;
13165
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13166
0
  start[0] = RAW;
13167
0
  start[1] = NXT(1);
13168
0
  start[2] = NXT(2);
13169
0
  start[3] = NXT(3);
13170
0
  enc = xmlDetectCharEncoding(start, 4);
13171
0
  if (enc != XML_CHAR_ENCODING_NONE) {
13172
0
      xmlSwitchEncoding(ctxt, enc);
13173
0
  }
13174
0
    }
13175
13176
    /*
13177
     * Parse a possible text declaration first
13178
     */
13179
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13180
0
  xmlParseTextDecl(ctxt);
13181
0
    }
13182
13183
0
    ctxt->instate = XML_PARSER_CONTENT;
13184
0
    ctxt->depth = depth;
13185
13186
0
    xmlParseContent(ctxt);
13187
13188
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13189
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13190
0
    } else if (RAW != 0) {
13191
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13192
0
    }
13193
0
    if (ctxt->node != newDoc->children) {
13194
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13195
0
    }
13196
13197
0
    if (!ctxt->wellFormed) {
13198
0
        if (ctxt->errNo == 0)
13199
0
      ret = XML_ERR_INTERNAL_ERROR;
13200
0
  else
13201
0
      ret = (xmlParserErrors)ctxt->errNo;
13202
0
    } else {
13203
0
  if (list != NULL) {
13204
0
      xmlNodePtr cur;
13205
13206
      /*
13207
       * Return the newly created nodeset after unlinking it from
13208
       * they pseudo parent.
13209
       */
13210
0
      cur = newDoc->children->children;
13211
0
      *list = cur;
13212
0
      while (cur != NULL) {
13213
0
    cur->parent = NULL;
13214
0
    cur = cur->next;
13215
0
      }
13216
0
            newDoc->children->children = NULL;
13217
0
  }
13218
0
  ret = XML_ERR_OK;
13219
0
    }
13220
13221
    /*
13222
     * Record in the parent context the number of entities replacement
13223
     * done when parsing that reference.
13224
     */
13225
0
    if (oldctxt != NULL)
13226
0
        oldctxt->nbentities += ctxt->nbentities;
13227
13228
    /*
13229
     * Also record the size of the entity parsed
13230
     */
13231
0
    if (ctxt->input != NULL && oldctxt != NULL) {
13232
0
  oldctxt->sizeentities += ctxt->input->consumed;
13233
0
  oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13234
0
    }
13235
    /*
13236
     * And record the last error if any
13237
     */
13238
0
    if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13239
0
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13240
13241
0
    if (sax != NULL)
13242
0
  ctxt->sax = oldsax;
13243
0
    if (oldctxt != NULL) {
13244
0
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13245
0
        oldctxt->node_seq.length = ctxt->node_seq.length;
13246
0
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13247
0
    }
13248
0
    ctxt->node_seq.maximum = 0;
13249
0
    ctxt->node_seq.length = 0;
13250
0
    ctxt->node_seq.buffer = NULL;
13251
0
    xmlFreeParserCtxt(ctxt);
13252
0
    newDoc->intSubset = NULL;
13253
0
    newDoc->extSubset = NULL;
13254
0
    xmlFreeDoc(newDoc);
13255
13256
0
    return(ret);
13257
0
}
13258
13259
#ifdef LIBXML_SAX1_ENABLED
13260
/**
13261
 * xmlParseExternalEntity:
13262
 * @doc:  the document the chunk pertains to
13263
 * @sax:  the SAX handler bloc (possibly NULL)
13264
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13265
 * @depth:  Used for loop detection, use 0
13266
 * @URL:  the URL for the entity to load
13267
 * @ID:  the System ID for the entity to load
13268
 * @lst:  the return value for the set of parsed nodes
13269
 *
13270
 * Parse an external general entity
13271
 * An external general parsed entity is well-formed if it matches the
13272
 * production labeled extParsedEnt.
13273
 *
13274
 * [78] extParsedEnt ::= TextDecl? content
13275
 *
13276
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13277
 *    the parser error code otherwise
13278
 */
13279
13280
int
13281
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13282
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13283
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13284
0
                           ID, lst));
13285
0
}
13286
13287
/**
13288
 * xmlParseBalancedChunkMemory:
13289
 * @doc:  the document the chunk pertains to
13290
 * @sax:  the SAX handler bloc (possibly NULL)
13291
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13292
 * @depth:  Used for loop detection, use 0
13293
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13294
 * @lst:  the return value for the set of parsed nodes
13295
 *
13296
 * Parse a well-balanced chunk of an XML document
13297
 * called by the parser
13298
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13299
 * the content production in the XML grammar:
13300
 *
13301
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13302
 *
13303
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13304
 *    the parser error code otherwise
13305
 */
13306
13307
int
13308
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13309
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13310
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13311
0
                                                depth, string, lst, 0 );
13312
0
}
13313
#endif /* LIBXML_SAX1_ENABLED */
13314
13315
/**
13316
 * xmlParseBalancedChunkMemoryInternal:
13317
 * @oldctxt:  the existing parsing context
13318
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13319
 * @user_data:  the user data field for the parser context
13320
 * @lst:  the return value for the set of parsed nodes
13321
 *
13322
 *
13323
 * Parse a well-balanced chunk of an XML document
13324
 * called by the parser
13325
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13326
 * the content production in the XML grammar:
13327
 *
13328
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13329
 *
13330
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13331
 * error code otherwise
13332
 *
13333
 * In case recover is set to 1, the nodelist will not be empty even if
13334
 * the parsed chunk is not well balanced.
13335
 */
13336
static xmlParserErrors
13337
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13338
198k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13339
198k
    xmlParserCtxtPtr ctxt;
13340
198k
    xmlDocPtr newDoc = NULL;
13341
198k
    xmlNodePtr newRoot;
13342
198k
    xmlSAXHandlerPtr oldsax = NULL;
13343
198k
    xmlNodePtr content = NULL;
13344
198k
    xmlNodePtr last = NULL;
13345
198k
    int size;
13346
198k
    xmlParserErrors ret = XML_ERR_OK;
13347
198k
#ifdef SAX2
13348
198k
    int i;
13349
198k
#endif
13350
13351
198k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13352
189k
        (oldctxt->depth >  1024)) {
13353
8.30k
  return(XML_ERR_ENTITY_LOOP);
13354
8.30k
    }
13355
13356
13357
189k
    if (lst != NULL)
13358
174k
        *lst = NULL;
13359
189k
    if (string == NULL)
13360
0
        return(XML_ERR_INTERNAL_ERROR);
13361
13362
189k
    size = xmlStrlen(string);
13363
13364
189k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13365
189k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13366
185k
    if (user_data != NULL)
13367
0
  ctxt->userData = user_data;
13368
185k
    else
13369
185k
  ctxt->userData = ctxt;
13370
185k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13371
185k
    ctxt->dict = oldctxt->dict;
13372
185k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13373
185k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13374
185k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13375
13376
185k
#ifdef SAX2
13377
    /* propagate namespaces down the entity */
13378
1.18M
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13379
995k
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13380
995k
    }
13381
185k
#endif
13382
13383
185k
    oldsax = ctxt->sax;
13384
185k
    ctxt->sax = oldctxt->sax;
13385
185k
    xmlDetectSAX2(ctxt);
13386
185k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13387
185k
    ctxt->options = oldctxt->options;
13388
13389
185k
    ctxt->_private = oldctxt->_private;
13390
185k
    if (oldctxt->myDoc == NULL) {
13391
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13392
0
  if (newDoc == NULL) {
13393
0
      ctxt->sax = oldsax;
13394
0
      ctxt->dict = NULL;
13395
0
      xmlFreeParserCtxt(ctxt);
13396
0
      return(XML_ERR_INTERNAL_ERROR);
13397
0
  }
13398
0
  newDoc->properties = XML_DOC_INTERNAL;
13399
0
  newDoc->dict = ctxt->dict;
13400
0
  xmlDictReference(newDoc->dict);
13401
0
  ctxt->myDoc = newDoc;
13402
185k
    } else {
13403
185k
  ctxt->myDoc = oldctxt->myDoc;
13404
185k
        content = ctxt->myDoc->children;
13405
185k
  last = ctxt->myDoc->last;
13406
185k
    }
13407
185k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13408
185k
    if (newRoot == NULL) {
13409
0
  ctxt->sax = oldsax;
13410
0
  ctxt->dict = NULL;
13411
0
  xmlFreeParserCtxt(ctxt);
13412
0
  if (newDoc != NULL) {
13413
0
      xmlFreeDoc(newDoc);
13414
0
  }
13415
0
  return(XML_ERR_INTERNAL_ERROR);
13416
0
    }
13417
185k
    ctxt->myDoc->children = NULL;
13418
185k
    ctxt->myDoc->last = NULL;
13419
185k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13420
185k
    nodePush(ctxt, ctxt->myDoc->children);
13421
185k
    ctxt->instate = XML_PARSER_CONTENT;
13422
185k
    ctxt->depth = oldctxt->depth + 1;
13423
13424
185k
    ctxt->validate = 0;
13425
185k
    ctxt->loadsubset = oldctxt->loadsubset;
13426
185k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13427
  /*
13428
   * ID/IDREF registration will be done in xmlValidateElement below
13429
   */
13430
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13431
0
    }
13432
185k
    ctxt->dictNames = oldctxt->dictNames;
13433
185k
    ctxt->attsDefault = oldctxt->attsDefault;
13434
185k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13435
13436
185k
    xmlParseContent(ctxt);
13437
185k
    if ((RAW == '<') && (NXT(1) == '/')) {
13438
2.90k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13439
182k
    } else if (RAW != 0) {
13440
282
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13441
282
    }
13442
185k
    if (ctxt->node != ctxt->myDoc->children) {
13443
111k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13444
111k
    }
13445
13446
185k
    if (!ctxt->wellFormed) {
13447
173k
        if (ctxt->errNo == 0)
13448
0
      ret = XML_ERR_INTERNAL_ERROR;
13449
173k
  else
13450
173k
      ret = (xmlParserErrors)ctxt->errNo;
13451
173k
    } else {
13452
11.7k
      ret = XML_ERR_OK;
13453
11.7k
    }
13454
13455
185k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13456
733
  xmlNodePtr cur;
13457
13458
  /*
13459
   * Return the newly created nodeset after unlinking it from
13460
   * they pseudo parent.
13461
   */
13462
733
  cur = ctxt->myDoc->children->children;
13463
733
  *lst = cur;
13464
3.79k
  while (cur != NULL) {
13465
3.06k
#ifdef LIBXML_VALID_ENABLED
13466
3.06k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13467
0
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13468
0
    (cur->type == XML_ELEMENT_NODE)) {
13469
0
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13470
0
      oldctxt->myDoc, cur);
13471
0
      }
13472
3.06k
#endif /* LIBXML_VALID_ENABLED */
13473
3.06k
      cur->parent = NULL;
13474
3.06k
      cur = cur->next;
13475
3.06k
  }
13476
733
  ctxt->myDoc->children->children = NULL;
13477
733
    }
13478
185k
    if (ctxt->myDoc != NULL) {
13479
185k
  xmlFreeNode(ctxt->myDoc->children);
13480
185k
        ctxt->myDoc->children = content;
13481
185k
        ctxt->myDoc->last = last;
13482
185k
    }
13483
13484
    /*
13485
     * Record in the parent context the number of entities replacement
13486
     * done when parsing that reference.
13487
     */
13488
185k
    if (oldctxt != NULL)
13489
185k
        oldctxt->nbentities += ctxt->nbentities;
13490
13491
    /*
13492
     * Also record the last error if any
13493
     */
13494
185k
    if (ctxt->lastError.code != XML_ERR_OK)
13495
173k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13496
13497
185k
    ctxt->sax = oldsax;
13498
185k
    ctxt->dict = NULL;
13499
185k
    ctxt->attsDefault = NULL;
13500
185k
    ctxt->attsSpecial = NULL;
13501
185k
    xmlFreeParserCtxt(ctxt);
13502
185k
    if (newDoc != NULL) {
13503
0
  xmlFreeDoc(newDoc);
13504
0
    }
13505
13506
185k
    return(ret);
13507
185k
}
13508
13509
/**
13510
 * xmlParseInNodeContext:
13511
 * @node:  the context node
13512
 * @data:  the input string
13513
 * @datalen:  the input string length in bytes
13514
 * @options:  a combination of xmlParserOption
13515
 * @lst:  the return value for the set of parsed nodes
13516
 *
13517
 * Parse a well-balanced chunk of an XML document
13518
 * within the context (DTD, namespaces, etc ...) of the given node.
13519
 *
13520
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13521
 * the content production in the XML grammar:
13522
 *
13523
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13524
 *
13525
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13526
 * error code otherwise
13527
 */
13528
xmlParserErrors
13529
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13530
0
                      int options, xmlNodePtr *lst) {
13531
0
#ifdef SAX2
13532
0
    xmlParserCtxtPtr ctxt;
13533
0
    xmlDocPtr doc = NULL;
13534
0
    xmlNodePtr fake, cur;
13535
0
    int nsnr = 0;
13536
13537
0
    xmlParserErrors ret = XML_ERR_OK;
13538
13539
    /*
13540
     * check all input parameters, grab the document
13541
     */
13542
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13543
0
        return(XML_ERR_INTERNAL_ERROR);
13544
0
    switch (node->type) {
13545
0
        case XML_ELEMENT_NODE:
13546
0
        case XML_ATTRIBUTE_NODE:
13547
0
        case XML_TEXT_NODE:
13548
0
        case XML_CDATA_SECTION_NODE:
13549
0
        case XML_ENTITY_REF_NODE:
13550
0
        case XML_PI_NODE:
13551
0
        case XML_COMMENT_NODE:
13552
0
        case XML_DOCUMENT_NODE:
13553
0
        case XML_HTML_DOCUMENT_NODE:
13554
0
      break;
13555
0
  default:
13556
0
      return(XML_ERR_INTERNAL_ERROR);
13557
13558
0
    }
13559
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13560
0
           (node->type != XML_DOCUMENT_NODE) &&
13561
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13562
0
  node = node->parent;
13563
0
    if (node == NULL)
13564
0
  return(XML_ERR_INTERNAL_ERROR);
13565
0
    if (node->type == XML_ELEMENT_NODE)
13566
0
  doc = node->doc;
13567
0
    else
13568
0
        doc = (xmlDocPtr) node;
13569
0
    if (doc == NULL)
13570
0
  return(XML_ERR_INTERNAL_ERROR);
13571
13572
    /*
13573
     * allocate a context and set-up everything not related to the
13574
     * node position in the tree
13575
     */
13576
0
    if (doc->type == XML_DOCUMENT_NODE)
13577
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13578
0
#ifdef LIBXML_HTML_ENABLED
13579
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13580
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13581
        /*
13582
         * When parsing in context, it makes no sense to add implied
13583
         * elements like html/body/etc...
13584
         */
13585
0
        options |= HTML_PARSE_NOIMPLIED;
13586
0
    }
13587
0
#endif
13588
0
    else
13589
0
        return(XML_ERR_INTERNAL_ERROR);
13590
13591
0
    if (ctxt == NULL)
13592
0
        return(XML_ERR_NO_MEMORY);
13593
13594
    /*
13595
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13596
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13597
     * we must wait until the last moment to free the original one.
13598
     */
13599
0
    if (doc->dict != NULL) {
13600
0
        if (ctxt->dict != NULL)
13601
0
      xmlDictFree(ctxt->dict);
13602
0
  ctxt->dict = doc->dict;
13603
0
    } else
13604
0
        options |= XML_PARSE_NODICT;
13605
13606
0
    if (doc->encoding != NULL) {
13607
0
        xmlCharEncodingHandlerPtr hdlr;
13608
13609
0
        if (ctxt->encoding != NULL)
13610
0
      xmlFree((xmlChar *) ctxt->encoding);
13611
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13612
13613
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13614
0
        if (hdlr != NULL) {
13615
0
            xmlSwitchToEncoding(ctxt, hdlr);
13616
0
  } else {
13617
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13618
0
        }
13619
0
    }
13620
13621
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13622
0
    xmlDetectSAX2(ctxt);
13623
0
    ctxt->myDoc = doc;
13624
    /* parsing in context, i.e. as within existing content */
13625
0
    ctxt->instate = XML_PARSER_CONTENT;
13626
13627
0
    fake = xmlNewComment(NULL);
13628
0
    if (fake == NULL) {
13629
0
        xmlFreeParserCtxt(ctxt);
13630
0
  return(XML_ERR_NO_MEMORY);
13631
0
    }
13632
0
    xmlAddChild(node, fake);
13633
13634
0
    if (node->type == XML_ELEMENT_NODE) {
13635
0
  nodePush(ctxt, node);
13636
  /*
13637
   * initialize the SAX2 namespaces stack
13638
   */
13639
0
  cur = node;
13640
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13641
0
      xmlNsPtr ns = cur->nsDef;
13642
0
      const xmlChar *iprefix, *ihref;
13643
13644
0
      while (ns != NULL) {
13645
0
    if (ctxt->dict) {
13646
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13647
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13648
0
    } else {
13649
0
        iprefix = ns->prefix;
13650
0
        ihref = ns->href;
13651
0
    }
13652
13653
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13654
0
        nsPush(ctxt, iprefix, ihref);
13655
0
        nsnr++;
13656
0
    }
13657
0
    ns = ns->next;
13658
0
      }
13659
0
      cur = cur->parent;
13660
0
  }
13661
0
    }
13662
13663
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13664
  /*
13665
   * ID/IDREF registration will be done in xmlValidateElement below
13666
   */
13667
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13668
0
    }
13669
13670
0
#ifdef LIBXML_HTML_ENABLED
13671
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13672
0
        __htmlParseContent(ctxt);
13673
0
    else
13674
0
#endif
13675
0
  xmlParseContent(ctxt);
13676
13677
0
    nsPop(ctxt, nsnr);
13678
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13679
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13680
0
    } else if (RAW != 0) {
13681
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13682
0
    }
13683
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13684
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13685
0
  ctxt->wellFormed = 0;
13686
0
    }
13687
13688
0
    if (!ctxt->wellFormed) {
13689
0
        if (ctxt->errNo == 0)
13690
0
      ret = XML_ERR_INTERNAL_ERROR;
13691
0
  else
13692
0
      ret = (xmlParserErrors)ctxt->errNo;
13693
0
    } else {
13694
0
        ret = XML_ERR_OK;
13695
0
    }
13696
13697
    /*
13698
     * Return the newly created nodeset after unlinking it from
13699
     * the pseudo sibling.
13700
     */
13701
13702
0
    cur = fake->next;
13703
0
    fake->next = NULL;
13704
0
    node->last = fake;
13705
13706
0
    if (cur != NULL) {
13707
0
  cur->prev = NULL;
13708
0
    }
13709
13710
0
    *lst = cur;
13711
13712
0
    while (cur != NULL) {
13713
0
  cur->parent = NULL;
13714
0
  cur = cur->next;
13715
0
    }
13716
13717
0
    xmlUnlinkNode(fake);
13718
0
    xmlFreeNode(fake);
13719
13720
13721
0
    if (ret != XML_ERR_OK) {
13722
0
        xmlFreeNodeList(*lst);
13723
0
  *lst = NULL;
13724
0
    }
13725
13726
0
    if (doc->dict != NULL)
13727
0
        ctxt->dict = NULL;
13728
0
    xmlFreeParserCtxt(ctxt);
13729
13730
0
    return(ret);
13731
#else /* !SAX2 */
13732
    return(XML_ERR_INTERNAL_ERROR);
13733
#endif
13734
0
}
13735
13736
#ifdef LIBXML_SAX1_ENABLED
13737
/**
13738
 * xmlParseBalancedChunkMemoryRecover:
13739
 * @doc:  the document the chunk pertains to
13740
 * @sax:  the SAX handler bloc (possibly NULL)
13741
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13742
 * @depth:  Used for loop detection, use 0
13743
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13744
 * @lst:  the return value for the set of parsed nodes
13745
 * @recover: return nodes even if the data is broken (use 0)
13746
 *
13747
 *
13748
 * Parse a well-balanced chunk of an XML document
13749
 * called by the parser
13750
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13751
 * the content production in the XML grammar:
13752
 *
13753
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13754
 *
13755
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13756
 *    the parser error code otherwise
13757
 *
13758
 * In case recover is set to 1, the nodelist will not be empty even if
13759
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13760
 * some extent.
13761
 */
13762
int
13763
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13764
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13765
0
     int recover) {
13766
0
    xmlParserCtxtPtr ctxt;
13767
0
    xmlDocPtr newDoc;
13768
0
    xmlSAXHandlerPtr oldsax = NULL;
13769
0
    xmlNodePtr content, newRoot;
13770
0
    int size;
13771
0
    int ret = 0;
13772
13773
0
    if (depth > 40) {
13774
0
  return(XML_ERR_ENTITY_LOOP);
13775
0
    }
13776
13777
13778
0
    if (lst != NULL)
13779
0
        *lst = NULL;
13780
0
    if (string == NULL)
13781
0
        return(-1);
13782
13783
0
    size = xmlStrlen(string);
13784
13785
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13786
0
    if (ctxt == NULL) return(-1);
13787
0
    ctxt->userData = ctxt;
13788
0
    if (sax != NULL) {
13789
0
  oldsax = ctxt->sax;
13790
0
        ctxt->sax = sax;
13791
0
  if (user_data != NULL)
13792
0
      ctxt->userData = user_data;
13793
0
    }
13794
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13795
0
    if (newDoc == NULL) {
13796
0
  xmlFreeParserCtxt(ctxt);
13797
0
  return(-1);
13798
0
    }
13799
0
    newDoc->properties = XML_DOC_INTERNAL;
13800
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13801
0
        xmlDictFree(ctxt->dict);
13802
0
  ctxt->dict = doc->dict;
13803
0
  xmlDictReference(ctxt->dict);
13804
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13805
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13806
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13807
0
  ctxt->dictNames = 1;
13808
0
    } else {
13809
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13810
0
    }
13811
0
    if (doc != NULL) {
13812
0
  newDoc->intSubset = doc->intSubset;
13813
0
  newDoc->extSubset = doc->extSubset;
13814
0
    }
13815
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13816
0
    if (newRoot == NULL) {
13817
0
  if (sax != NULL)
13818
0
      ctxt->sax = oldsax;
13819
0
  xmlFreeParserCtxt(ctxt);
13820
0
  newDoc->intSubset = NULL;
13821
0
  newDoc->extSubset = NULL;
13822
0
        xmlFreeDoc(newDoc);
13823
0
  return(-1);
13824
0
    }
13825
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13826
0
    nodePush(ctxt, newRoot);
13827
0
    if (doc == NULL) {
13828
0
  ctxt->myDoc = newDoc;
13829
0
    } else {
13830
0
  ctxt->myDoc = newDoc;
13831
0
  newDoc->children->doc = doc;
13832
  /* Ensure that doc has XML spec namespace */
13833
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13834
0
  newDoc->oldNs = doc->oldNs;
13835
0
    }
13836
0
    ctxt->instate = XML_PARSER_CONTENT;
13837
0
    ctxt->depth = depth;
13838
13839
    /*
13840
     * Doing validity checking on chunk doesn't make sense
13841
     */
13842
0
    ctxt->validate = 0;
13843
0
    ctxt->loadsubset = 0;
13844
0
    xmlDetectSAX2(ctxt);
13845
13846
0
    if ( doc != NULL ){
13847
0
        content = doc->children;
13848
0
        doc->children = NULL;
13849
0
        xmlParseContent(ctxt);
13850
0
        doc->children = content;
13851
0
    }
13852
0
    else {
13853
0
        xmlParseContent(ctxt);
13854
0
    }
13855
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13856
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13857
0
    } else if (RAW != 0) {
13858
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13859
0
    }
13860
0
    if (ctxt->node != newDoc->children) {
13861
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13862
0
    }
13863
13864
0
    if (!ctxt->wellFormed) {
13865
0
        if (ctxt->errNo == 0)
13866
0
      ret = 1;
13867
0
  else
13868
0
      ret = ctxt->errNo;
13869
0
    } else {
13870
0
      ret = 0;
13871
0
    }
13872
13873
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13874
0
  xmlNodePtr cur;
13875
13876
  /*
13877
   * Return the newly created nodeset after unlinking it from
13878
   * they pseudo parent.
13879
   */
13880
0
  cur = newDoc->children->children;
13881
0
  *lst = cur;
13882
0
  while (cur != NULL) {
13883
0
      xmlSetTreeDoc(cur, doc);
13884
0
      cur->parent = NULL;
13885
0
      cur = cur->next;
13886
0
  }
13887
0
  newDoc->children->children = NULL;
13888
0
    }
13889
13890
0
    if (sax != NULL)
13891
0
  ctxt->sax = oldsax;
13892
0
    xmlFreeParserCtxt(ctxt);
13893
0
    newDoc->intSubset = NULL;
13894
0
    newDoc->extSubset = NULL;
13895
0
    newDoc->oldNs = NULL;
13896
0
    xmlFreeDoc(newDoc);
13897
13898
0
    return(ret);
13899
0
}
13900
13901
/**
13902
 * xmlSAXParseEntity:
13903
 * @sax:  the SAX handler block
13904
 * @filename:  the filename
13905
 *
13906
 * parse an XML external entity out of context and build a tree.
13907
 * It use the given SAX function block to handle the parsing callback.
13908
 * If sax is NULL, fallback to the default DOM tree building routines.
13909
 *
13910
 * [78] extParsedEnt ::= TextDecl? content
13911
 *
13912
 * This correspond to a "Well Balanced" chunk
13913
 *
13914
 * Returns the resulting document tree
13915
 */
13916
13917
xmlDocPtr
13918
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13919
0
    xmlDocPtr ret;
13920
0
    xmlParserCtxtPtr ctxt;
13921
13922
0
    ctxt = xmlCreateFileParserCtxt(filename);
13923
0
    if (ctxt == NULL) {
13924
0
  return(NULL);
13925
0
    }
13926
0
    if (sax != NULL) {
13927
0
  if (ctxt->sax != NULL)
13928
0
      xmlFree(ctxt->sax);
13929
0
        ctxt->sax = sax;
13930
0
        ctxt->userData = NULL;
13931
0
    }
13932
13933
0
    xmlParseExtParsedEnt(ctxt);
13934
13935
0
    if (ctxt->wellFormed)
13936
0
  ret = ctxt->myDoc;
13937
0
    else {
13938
0
        ret = NULL;
13939
0
        xmlFreeDoc(ctxt->myDoc);
13940
0
        ctxt->myDoc = NULL;
13941
0
    }
13942
0
    if (sax != NULL)
13943
0
        ctxt->sax = NULL;
13944
0
    xmlFreeParserCtxt(ctxt);
13945
13946
0
    return(ret);
13947
0
}
13948
13949
/**
13950
 * xmlParseEntity:
13951
 * @filename:  the filename
13952
 *
13953
 * parse an XML external entity out of context and build a tree.
13954
 *
13955
 * [78] extParsedEnt ::= TextDecl? content
13956
 *
13957
 * This correspond to a "Well Balanced" chunk
13958
 *
13959
 * Returns the resulting document tree
13960
 */
13961
13962
xmlDocPtr
13963
0
xmlParseEntity(const char *filename) {
13964
0
    return(xmlSAXParseEntity(NULL, filename));
13965
0
}
13966
#endif /* LIBXML_SAX1_ENABLED */
13967
13968
/**
13969
 * xmlCreateEntityParserCtxtInternal:
13970
 * @URL:  the entity URL
13971
 * @ID:  the entity PUBLIC ID
13972
 * @base:  a possible base for the target URI
13973
 * @pctx:  parser context used to set options on new context
13974
 *
13975
 * Create a parser context for an external entity
13976
 * Automatic support for ZLIB/Compress compressed document is provided
13977
 * by default if found at compile-time.
13978
 *
13979
 * Returns the new parser context or NULL
13980
 */
13981
static xmlParserCtxtPtr
13982
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13983
0
                    const xmlChar *base, xmlParserCtxtPtr pctx) {
13984
0
    xmlParserCtxtPtr ctxt;
13985
0
    xmlParserInputPtr inputStream;
13986
0
    char *directory = NULL;
13987
0
    xmlChar *uri;
13988
13989
0
    ctxt = xmlNewParserCtxt();
13990
0
    if (ctxt == NULL) {
13991
0
  return(NULL);
13992
0
    }
13993
13994
0
    if (pctx != NULL) {
13995
0
        ctxt->options = pctx->options;
13996
0
        ctxt->_private = pctx->_private;
13997
0
    }
13998
13999
0
    uri = xmlBuildURI(URL, base);
14000
14001
0
    if (uri == NULL) {
14002
0
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14003
0
  if (inputStream == NULL) {
14004
0
      xmlFreeParserCtxt(ctxt);
14005
0
      return(NULL);
14006
0
  }
14007
14008
0
  inputPush(ctxt, inputStream);
14009
14010
0
  if ((ctxt->directory == NULL) && (directory == NULL))
14011
0
      directory = xmlParserGetDirectory((char *)URL);
14012
0
  if ((ctxt->directory == NULL) && (directory != NULL))
14013
0
      ctxt->directory = directory;
14014
0
    } else {
14015
0
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14016
0
  if (inputStream == NULL) {
14017
0
      xmlFree(uri);
14018
0
      xmlFreeParserCtxt(ctxt);
14019
0
      return(NULL);
14020
0
  }
14021
14022
0
  inputPush(ctxt, inputStream);
14023
14024
0
  if ((ctxt->directory == NULL) && (directory == NULL))
14025
0
      directory = xmlParserGetDirectory((char *)uri);
14026
0
  if ((ctxt->directory == NULL) && (directory != NULL))
14027
0
      ctxt->directory = directory;
14028
0
  xmlFree(uri);
14029
0
    }
14030
0
    return(ctxt);
14031
0
}
14032
14033
/**
14034
 * xmlCreateEntityParserCtxt:
14035
 * @URL:  the entity URL
14036
 * @ID:  the entity PUBLIC ID
14037
 * @base:  a possible base for the target URI
14038
 *
14039
 * Create a parser context for an external entity
14040
 * Automatic support for ZLIB/Compress compressed document is provided
14041
 * by default if found at compile-time.
14042
 *
14043
 * Returns the new parser context or NULL
14044
 */
14045
xmlParserCtxtPtr
14046
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14047
0
                    const xmlChar *base) {
14048
0
    return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14049
14050
0
}
14051
14052
/************************************************************************
14053
 *                  *
14054
 *    Front ends when parsing from a file     *
14055
 *                  *
14056
 ************************************************************************/
14057
14058
/**
14059
 * xmlCreateURLParserCtxt:
14060
 * @filename:  the filename or URL
14061
 * @options:  a combination of xmlParserOption
14062
 *
14063
 * Create a parser context for a file or URL content.
14064
 * Automatic support for ZLIB/Compress compressed document is provided
14065
 * by default if found at compile-time and for file accesses
14066
 *
14067
 * Returns the new parser context or NULL
14068
 */
14069
xmlParserCtxtPtr
14070
xmlCreateURLParserCtxt(const char *filename, int options)
14071
0
{
14072
0
    xmlParserCtxtPtr ctxt;
14073
0
    xmlParserInputPtr inputStream;
14074
0
    char *directory = NULL;
14075
14076
0
    ctxt = xmlNewParserCtxt();
14077
0
    if (ctxt == NULL) {
14078
0
  xmlErrMemory(NULL, "cannot allocate parser context");
14079
0
  return(NULL);
14080
0
    }
14081
14082
0
    if (options)
14083
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14084
0
    ctxt->linenumbers = 1;
14085
14086
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14087
0
    if (inputStream == NULL) {
14088
0
  xmlFreeParserCtxt(ctxt);
14089
0
  return(NULL);
14090
0
    }
14091
14092
0
    inputPush(ctxt, inputStream);
14093
0
    if ((ctxt->directory == NULL) && (directory == NULL))
14094
0
        directory = xmlParserGetDirectory(filename);
14095
0
    if ((ctxt->directory == NULL) && (directory != NULL))
14096
0
        ctxt->directory = directory;
14097
14098
0
    return(ctxt);
14099
0
}
14100
14101
/**
14102
 * xmlCreateFileParserCtxt:
14103
 * @filename:  the filename
14104
 *
14105
 * Create a parser context for a file content.
14106
 * Automatic support for ZLIB/Compress compressed document is provided
14107
 * by default if found at compile-time.
14108
 *
14109
 * Returns the new parser context or NULL
14110
 */
14111
xmlParserCtxtPtr
14112
xmlCreateFileParserCtxt(const char *filename)
14113
0
{
14114
0
    return(xmlCreateURLParserCtxt(filename, 0));
14115
0
}
14116
14117
#ifdef LIBXML_SAX1_ENABLED
14118
/**
14119
 * xmlSAXParseFileWithData:
14120
 * @sax:  the SAX handler block
14121
 * @filename:  the filename
14122
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14123
 *             documents
14124
 * @data:  the userdata
14125
 *
14126
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14127
 * compressed document is provided by default if found at compile-time.
14128
 * It use the given SAX function block to handle the parsing callback.
14129
 * If sax is NULL, fallback to the default DOM tree building routines.
14130
 *
14131
 * User data (void *) is stored within the parser context in the
14132
 * context's _private member, so it is available nearly everywhere in libxml
14133
 *
14134
 * Returns the resulting document tree
14135
 */
14136
14137
xmlDocPtr
14138
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14139
0
                        int recovery, void *data) {
14140
0
    xmlDocPtr ret;
14141
0
    xmlParserCtxtPtr ctxt;
14142
14143
0
    xmlInitParser();
14144
14145
0
    ctxt = xmlCreateFileParserCtxt(filename);
14146
0
    if (ctxt == NULL) {
14147
0
  return(NULL);
14148
0
    }
14149
0
    if (sax != NULL) {
14150
0
  if (ctxt->sax != NULL)
14151
0
      xmlFree(ctxt->sax);
14152
0
        ctxt->sax = sax;
14153
0
    }
14154
0
    xmlDetectSAX2(ctxt);
14155
0
    if (data!=NULL) {
14156
0
  ctxt->_private = data;
14157
0
    }
14158
14159
0
    if (ctxt->directory == NULL)
14160
0
        ctxt->directory = xmlParserGetDirectory(filename);
14161
14162
0
    ctxt->recovery = recovery;
14163
14164
0
    xmlParseDocument(ctxt);
14165
14166
0
    if ((ctxt->wellFormed) || recovery) {
14167
0
        ret = ctxt->myDoc;
14168
0
  if (ret != NULL) {
14169
0
      if (ctxt->input->buf->compressed > 0)
14170
0
    ret->compression = 9;
14171
0
      else
14172
0
    ret->compression = ctxt->input->buf->compressed;
14173
0
  }
14174
0
    }
14175
0
    else {
14176
0
       ret = NULL;
14177
0
       xmlFreeDoc(ctxt->myDoc);
14178
0
       ctxt->myDoc = NULL;
14179
0
    }
14180
0
    if (sax != NULL)
14181
0
        ctxt->sax = NULL;
14182
0
    xmlFreeParserCtxt(ctxt);
14183
14184
0
    return(ret);
14185
0
}
14186
14187
/**
14188
 * xmlSAXParseFile:
14189
 * @sax:  the SAX handler block
14190
 * @filename:  the filename
14191
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14192
 *             documents
14193
 *
14194
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14195
 * compressed document is provided by default if found at compile-time.
14196
 * It use the given SAX function block to handle the parsing callback.
14197
 * If sax is NULL, fallback to the default DOM tree building routines.
14198
 *
14199
 * Returns the resulting document tree
14200
 */
14201
14202
xmlDocPtr
14203
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14204
0
                          int recovery) {
14205
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14206
0
}
14207
14208
/**
14209
 * xmlRecoverDoc:
14210
 * @cur:  a pointer to an array of xmlChar
14211
 *
14212
 * parse an XML in-memory document and build a tree.
14213
 * In the case the document is not Well Formed, a attempt to build a
14214
 * tree is tried anyway
14215
 *
14216
 * Returns the resulting document tree or NULL in case of failure
14217
 */
14218
14219
xmlDocPtr
14220
0
xmlRecoverDoc(const xmlChar *cur) {
14221
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14222
0
}
14223
14224
/**
14225
 * xmlParseFile:
14226
 * @filename:  the filename
14227
 *
14228
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14229
 * compressed document is provided by default if found at compile-time.
14230
 *
14231
 * Returns the resulting document tree if the file was wellformed,
14232
 * NULL otherwise.
14233
 */
14234
14235
xmlDocPtr
14236
0
xmlParseFile(const char *filename) {
14237
0
    return(xmlSAXParseFile(NULL, filename, 0));
14238
0
}
14239
14240
/**
14241
 * xmlRecoverFile:
14242
 * @filename:  the filename
14243
 *
14244
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14245
 * compressed document is provided by default if found at compile-time.
14246
 * In the case the document is not Well Formed, it attempts to build
14247
 * a tree anyway
14248
 *
14249
 * Returns the resulting document tree or NULL in case of failure
14250
 */
14251
14252
xmlDocPtr
14253
0
xmlRecoverFile(const char *filename) {
14254
0
    return(xmlSAXParseFile(NULL, filename, 1));
14255
0
}
14256
14257
14258
/**
14259
 * xmlSetupParserForBuffer:
14260
 * @ctxt:  an XML parser context
14261
 * @buffer:  a xmlChar * buffer
14262
 * @filename:  a file name
14263
 *
14264
 * Setup the parser context to parse a new buffer; Clears any prior
14265
 * contents from the parser context. The buffer parameter must not be
14266
 * NULL, but the filename parameter can be
14267
 */
14268
void
14269
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14270
                             const char* filename)
14271
0
{
14272
0
    xmlParserInputPtr input;
14273
14274
0
    if ((ctxt == NULL) || (buffer == NULL))
14275
0
        return;
14276
14277
0
    input = xmlNewInputStream(ctxt);
14278
0
    if (input == NULL) {
14279
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14280
0
        xmlClearParserCtxt(ctxt);
14281
0
        return;
14282
0
    }
14283
14284
0
    xmlClearParserCtxt(ctxt);
14285
0
    if (filename != NULL)
14286
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14287
0
    input->base = buffer;
14288
0
    input->cur = buffer;
14289
0
    input->end = &buffer[xmlStrlen(buffer)];
14290
0
    inputPush(ctxt, input);
14291
0
}
14292
14293
/**
14294
 * xmlSAXUserParseFile:
14295
 * @sax:  a SAX handler
14296
 * @user_data:  The user data returned on SAX callbacks
14297
 * @filename:  a file name
14298
 *
14299
 * parse an XML file and call the given SAX handler routines.
14300
 * Automatic support for ZLIB/Compress compressed document is provided
14301
 *
14302
 * Returns 0 in case of success or a error number otherwise
14303
 */
14304
int
14305
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14306
0
                    const char *filename) {
14307
0
    int ret = 0;
14308
0
    xmlParserCtxtPtr ctxt;
14309
14310
0
    ctxt = xmlCreateFileParserCtxt(filename);
14311
0
    if (ctxt == NULL) return -1;
14312
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14313
0
  xmlFree(ctxt->sax);
14314
0
    ctxt->sax = sax;
14315
0
    xmlDetectSAX2(ctxt);
14316
14317
0
    if (user_data != NULL)
14318
0
  ctxt->userData = user_data;
14319
14320
0
    xmlParseDocument(ctxt);
14321
14322
0
    if (ctxt->wellFormed)
14323
0
  ret = 0;
14324
0
    else {
14325
0
        if (ctxt->errNo != 0)
14326
0
      ret = ctxt->errNo;
14327
0
  else
14328
0
      ret = -1;
14329
0
    }
14330
0
    if (sax != NULL)
14331
0
  ctxt->sax = NULL;
14332
0
    if (ctxt->myDoc != NULL) {
14333
0
        xmlFreeDoc(ctxt->myDoc);
14334
0
  ctxt->myDoc = NULL;
14335
0
    }
14336
0
    xmlFreeParserCtxt(ctxt);
14337
14338
0
    return ret;
14339
0
}
14340
#endif /* LIBXML_SAX1_ENABLED */
14341
14342
/************************************************************************
14343
 *                  *
14344
 *    Front ends when parsing from memory     *
14345
 *                  *
14346
 ************************************************************************/
14347
14348
/**
14349
 * xmlCreateMemoryParserCtxt:
14350
 * @buffer:  a pointer to a char array
14351
 * @size:  the size of the array
14352
 *
14353
 * Create a parser context for an XML in-memory document.
14354
 *
14355
 * Returns the new parser context or NULL
14356
 */
14357
xmlParserCtxtPtr
14358
189k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14359
189k
    xmlParserCtxtPtr ctxt;
14360
189k
    xmlParserInputPtr input;
14361
189k
    xmlParserInputBufferPtr buf;
14362
14363
189k
    if (buffer == NULL)
14364
0
  return(NULL);
14365
189k
    if (size <= 0)
14366
4.54k
  return(NULL);
14367
14368
185k
    ctxt = xmlNewParserCtxt();
14369
185k
    if (ctxt == NULL)
14370
0
  return(NULL);
14371
14372
    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14373
185k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14374
185k
    if (buf == NULL) {
14375
0
  xmlFreeParserCtxt(ctxt);
14376
0
  return(NULL);
14377
0
    }
14378
14379
185k
    input = xmlNewInputStream(ctxt);
14380
185k
    if (input == NULL) {
14381
0
  xmlFreeParserInputBuffer(buf);
14382
0
  xmlFreeParserCtxt(ctxt);
14383
0
  return(NULL);
14384
0
    }
14385
14386
185k
    input->filename = NULL;
14387
185k
    input->buf = buf;
14388
185k
    xmlBufResetInput(input->buf->buffer, input);
14389
14390
185k
    inputPush(ctxt, input);
14391
185k
    return(ctxt);
14392
185k
}
14393
14394
#ifdef LIBXML_SAX1_ENABLED
14395
/**
14396
 * xmlSAXParseMemoryWithData:
14397
 * @sax:  the SAX handler block
14398
 * @buffer:  an pointer to a char array
14399
 * @size:  the size of the array
14400
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14401
 *             documents
14402
 * @data:  the userdata
14403
 *
14404
 * parse an XML in-memory block and use the given SAX function block
14405
 * to handle the parsing callback. If sax is NULL, fallback to the default
14406
 * DOM tree building routines.
14407
 *
14408
 * User data (void *) is stored within the parser context in the
14409
 * context's _private member, so it is available nearly everywhere in libxml
14410
 *
14411
 * Returns the resulting document tree
14412
 */
14413
14414
xmlDocPtr
14415
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14416
0
            int size, int recovery, void *data) {
14417
0
    xmlDocPtr ret;
14418
0
    xmlParserCtxtPtr ctxt;
14419
14420
0
    xmlInitParser();
14421
14422
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
  if (ctxt->sax != NULL)
14426
0
      xmlFree(ctxt->sax);
14427
0
        ctxt->sax = sax;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
0
    if (data!=NULL) {
14431
0
  ctxt->_private=data;
14432
0
    }
14433
14434
0
    ctxt->recovery = recovery;
14435
14436
0
    xmlParseDocument(ctxt);
14437
14438
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14439
0
    else {
14440
0
       ret = NULL;
14441
0
       xmlFreeDoc(ctxt->myDoc);
14442
0
       ctxt->myDoc = NULL;
14443
0
    }
14444
0
    if (sax != NULL)
14445
0
  ctxt->sax = NULL;
14446
0
    xmlFreeParserCtxt(ctxt);
14447
14448
0
    return(ret);
14449
0
}
14450
14451
/**
14452
 * xmlSAXParseMemory:
14453
 * @sax:  the SAX handler block
14454
 * @buffer:  an pointer to a char array
14455
 * @size:  the size of the array
14456
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14457
 *             documents
14458
 *
14459
 * parse an XML in-memory block and use the given SAX function block
14460
 * to handle the parsing callback. If sax is NULL, fallback to the default
14461
 * DOM tree building routines.
14462
 *
14463
 * Returns the resulting document tree
14464
 */
14465
xmlDocPtr
14466
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14467
0
            int size, int recovery) {
14468
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14469
0
}
14470
14471
/**
14472
 * xmlParseMemory:
14473
 * @buffer:  an pointer to a char array
14474
 * @size:  the size of the array
14475
 *
14476
 * parse an XML in-memory block and build a tree.
14477
 *
14478
 * Returns the resulting document tree
14479
 */
14480
14481
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14482
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14483
0
}
14484
14485
/**
14486
 * xmlRecoverMemory:
14487
 * @buffer:  an pointer to a char array
14488
 * @size:  the size of the array
14489
 *
14490
 * parse an XML in-memory block and build a tree.
14491
 * In the case the document is not Well Formed, an attempt to
14492
 * build a tree is tried anyway
14493
 *
14494
 * Returns the resulting document tree or NULL in case of error
14495
 */
14496
14497
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14498
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14499
0
}
14500
14501
/**
14502
 * xmlSAXUserParseMemory:
14503
 * @sax:  a SAX handler
14504
 * @user_data:  The user data returned on SAX callbacks
14505
 * @buffer:  an in-memory XML document input
14506
 * @size:  the length of the XML document in bytes
14507
 *
14508
 * A better SAX parsing routine.
14509
 * parse an XML in-memory buffer and call the given SAX handler routines.
14510
 *
14511
 * Returns 0 in case of success or a error number otherwise
14512
 */
14513
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14514
0
        const char *buffer, int size) {
14515
0
    int ret = 0;
14516
0
    xmlParserCtxtPtr ctxt;
14517
14518
0
    xmlInitParser();
14519
14520
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14521
0
    if (ctxt == NULL) return -1;
14522
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14523
0
        xmlFree(ctxt->sax);
14524
0
    ctxt->sax = sax;
14525
0
    xmlDetectSAX2(ctxt);
14526
14527
0
    if (user_data != NULL)
14528
0
  ctxt->userData = user_data;
14529
14530
0
    xmlParseDocument(ctxt);
14531
14532
0
    if (ctxt->wellFormed)
14533
0
  ret = 0;
14534
0
    else {
14535
0
        if (ctxt->errNo != 0)
14536
0
      ret = ctxt->errNo;
14537
0
  else
14538
0
      ret = -1;
14539
0
    }
14540
0
    if (sax != NULL)
14541
0
        ctxt->sax = NULL;
14542
0
    if (ctxt->myDoc != NULL) {
14543
0
        xmlFreeDoc(ctxt->myDoc);
14544
0
  ctxt->myDoc = NULL;
14545
0
    }
14546
0
    xmlFreeParserCtxt(ctxt);
14547
14548
0
    return ret;
14549
0
}
14550
#endif /* LIBXML_SAX1_ENABLED */
14551
14552
/**
14553
 * xmlCreateDocParserCtxt:
14554
 * @cur:  a pointer to an array of xmlChar
14555
 *
14556
 * Creates a parser context for an XML in-memory document.
14557
 *
14558
 * Returns the new parser context or NULL
14559
 */
14560
xmlParserCtxtPtr
14561
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14562
0
    int len;
14563
14564
0
    if (cur == NULL)
14565
0
  return(NULL);
14566
0
    len = xmlStrlen(cur);
14567
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14568
0
}
14569
14570
#ifdef LIBXML_SAX1_ENABLED
14571
/**
14572
 * xmlSAXParseDoc:
14573
 * @sax:  the SAX handler block
14574
 * @cur:  a pointer to an array of xmlChar
14575
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14576
 *             documents
14577
 *
14578
 * parse an XML in-memory document and build a tree.
14579
 * It use the given SAX function block to handle the parsing callback.
14580
 * If sax is NULL, fallback to the default DOM tree building routines.
14581
 *
14582
 * Returns the resulting document tree
14583
 */
14584
14585
xmlDocPtr
14586
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14587
0
    xmlDocPtr ret;
14588
0
    xmlParserCtxtPtr ctxt;
14589
0
    xmlSAXHandlerPtr oldsax = NULL;
14590
14591
0
    if (cur == NULL) return(NULL);
14592
14593
14594
0
    ctxt = xmlCreateDocParserCtxt(cur);
14595
0
    if (ctxt == NULL) return(NULL);
14596
0
    if (sax != NULL) {
14597
0
        oldsax = ctxt->sax;
14598
0
        ctxt->sax = sax;
14599
0
        ctxt->userData = NULL;
14600
0
    }
14601
0
    xmlDetectSAX2(ctxt);
14602
14603
0
    xmlParseDocument(ctxt);
14604
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14605
0
    else {
14606
0
       ret = NULL;
14607
0
       xmlFreeDoc(ctxt->myDoc);
14608
0
       ctxt->myDoc = NULL;
14609
0
    }
14610
0
    if (sax != NULL)
14611
0
  ctxt->sax = oldsax;
14612
0
    xmlFreeParserCtxt(ctxt);
14613
14614
0
    return(ret);
14615
0
}
14616
14617
/**
14618
 * xmlParseDoc:
14619
 * @cur:  a pointer to an array of xmlChar
14620
 *
14621
 * parse an XML in-memory document and build a tree.
14622
 *
14623
 * Returns the resulting document tree
14624
 */
14625
14626
xmlDocPtr
14627
0
xmlParseDoc(const xmlChar *cur) {
14628
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14629
0
}
14630
#endif /* LIBXML_SAX1_ENABLED */
14631
14632
#ifdef LIBXML_LEGACY_ENABLED
14633
/************************************************************************
14634
 *                  *
14635
 *  Specific function to keep track of entities references    *
14636
 *  and used by the XSLT debugger         *
14637
 *                  *
14638
 ************************************************************************/
14639
14640
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14641
14642
/**
14643
 * xmlAddEntityReference:
14644
 * @ent : A valid entity
14645
 * @firstNode : A valid first node for children of entity
14646
 * @lastNode : A valid last node of children entity
14647
 *
14648
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14649
 */
14650
static void
14651
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14652
                      xmlNodePtr lastNode)
14653
0
{
14654
0
    if (xmlEntityRefFunc != NULL) {
14655
0
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14656
0
    }
14657
0
}
14658
14659
14660
/**
14661
 * xmlSetEntityReferenceFunc:
14662
 * @func: A valid function
14663
 *
14664
 * Set the function to call call back when a xml reference has been made
14665
 */
14666
void
14667
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14668
0
{
14669
0
    xmlEntityRefFunc = func;
14670
0
}
14671
#endif /* LIBXML_LEGACY_ENABLED */
14672
14673
/************************************************************************
14674
 *                  *
14675
 *        Miscellaneous       *
14676
 *                  *
14677
 ************************************************************************/
14678
14679
#ifdef LIBXML_XPATH_ENABLED
14680
#include <libxml/xpath.h>
14681
#endif
14682
14683
extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14684
static int xmlParserInitialized = 0;
14685
14686
/**
14687
 * xmlInitParser:
14688
 *
14689
 * Initialization function for the XML parser.
14690
 * This is not reentrant. Call once before processing in case of
14691
 * use in multithreaded programs.
14692
 */
14693
14694
void
14695
0
xmlInitParser(void) {
14696
0
    if (xmlParserInitialized != 0)
14697
0
  return;
14698
14699
0
#ifdef LIBXML_THREAD_ENABLED
14700
0
    __xmlGlobalInitMutexLock();
14701
0
    if (xmlParserInitialized == 0) {
14702
0
#endif
14703
0
  xmlInitThreads();
14704
0
  xmlInitGlobals();
14705
0
  if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14706
0
      (xmlGenericError == NULL))
14707
0
      initGenericErrorDefaultFunc(NULL);
14708
0
  xmlInitMemory();
14709
0
        xmlInitializeDict();
14710
0
  xmlInitCharEncodingHandlers();
14711
0
  xmlDefaultSAXHandlerInit();
14712
0
  xmlRegisterDefaultInputCallbacks();
14713
0
#ifdef LIBXML_OUTPUT_ENABLED
14714
0
  xmlRegisterDefaultOutputCallbacks();
14715
0
#endif /* LIBXML_OUTPUT_ENABLED */
14716
0
#ifdef LIBXML_HTML_ENABLED
14717
0
  htmlInitAutoClose();
14718
0
  htmlDefaultSAXHandlerInit();
14719
0
#endif
14720
0
#ifdef LIBXML_XPATH_ENABLED
14721
0
  xmlXPathInit();
14722
0
#endif
14723
0
  xmlParserInitialized = 1;
14724
0
#ifdef LIBXML_THREAD_ENABLED
14725
0
    }
14726
0
    __xmlGlobalInitMutexUnlock();
14727
0
#endif
14728
0
}
14729
14730
/**
14731
 * xmlCleanupParser:
14732
 *
14733
 * This function name is somewhat misleading. It does not clean up
14734
 * parser state, it cleans up memory allocated by the library itself.
14735
 * It is a cleanup function for the XML library. It tries to reclaim all
14736
 * related global memory allocated for the library processing.
14737
 * It doesn't deallocate any document related memory. One should
14738
 * call xmlCleanupParser() only when the process has finished using
14739
 * the library and all XML/HTML documents built with it.
14740
 * See also xmlInitParser() which has the opposite function of preparing
14741
 * the library for operations.
14742
 *
14743
 * WARNING: if your application is multithreaded or has plugin support
14744
 *          calling this may crash the application if another thread or
14745
 *          a plugin is still using libxml2. It's sometimes very hard to
14746
 *          guess if libxml2 is in use in the application, some libraries
14747
 *          or plugins may use it without notice. In case of doubt abstain
14748
 *          from calling this function or do it just before calling exit()
14749
 *          to avoid leak reports from valgrind !
14750
 */
14751
14752
void
14753
0
xmlCleanupParser(void) {
14754
0
    if (!xmlParserInitialized)
14755
0
  return;
14756
14757
0
    xmlCleanupCharEncodingHandlers();
14758
0
#ifdef LIBXML_CATALOG_ENABLED
14759
0
    xmlCatalogCleanup();
14760
0
#endif
14761
0
    xmlDictCleanup();
14762
0
    xmlCleanupInputCallbacks();
14763
0
#ifdef LIBXML_OUTPUT_ENABLED
14764
0
    xmlCleanupOutputCallbacks();
14765
0
#endif
14766
0
#ifdef LIBXML_SCHEMAS_ENABLED
14767
0
    xmlSchemaCleanupTypes();
14768
0
    xmlRelaxNGCleanupTypes();
14769
0
#endif
14770
0
    xmlResetLastError();
14771
0
    xmlCleanupGlobals();
14772
0
    xmlCleanupThreads(); /* must be last if called not from the main thread */
14773
0
    xmlCleanupMemory();
14774
0
    xmlParserInitialized = 0;
14775
0
}
14776
14777
/************************************************************************
14778
 *                  *
14779
 *  New set (2.6.0) of simpler and more flexible APIs   *
14780
 *                  *
14781
 ************************************************************************/
14782
14783
/**
14784
 * DICT_FREE:
14785
 * @str:  a string
14786
 *
14787
 * Free a string if it is not owned by the "dict" dictionary in the
14788
 * current scope
14789
 */
14790
#define DICT_FREE(str)            \
14791
0
  if ((str) && ((!dict) ||       \
14792
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14793
0
      xmlFree((char *)(str));
14794
14795
/**
14796
 * xmlCtxtReset:
14797
 * @ctxt: an XML parser context
14798
 *
14799
 * Reset a parser context
14800
 */
14801
void
14802
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14803
0
{
14804
0
    xmlParserInputPtr input;
14805
0
    xmlDictPtr dict;
14806
14807
0
    if (ctxt == NULL)
14808
0
        return;
14809
14810
0
    dict = ctxt->dict;
14811
14812
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14813
0
        xmlFreeInputStream(input);
14814
0
    }
14815
0
    ctxt->inputNr = 0;
14816
0
    ctxt->input = NULL;
14817
14818
0
    ctxt->spaceNr = 0;
14819
0
    if (ctxt->spaceTab != NULL) {
14820
0
  ctxt->spaceTab[0] = -1;
14821
0
  ctxt->space = &ctxt->spaceTab[0];
14822
0
    } else {
14823
0
        ctxt->space = NULL;
14824
0
    }
14825
14826
14827
0
    ctxt->nodeNr = 0;
14828
0
    ctxt->node = NULL;
14829
14830
0
    ctxt->nameNr = 0;
14831
0
    ctxt->name = NULL;
14832
14833
0
    DICT_FREE(ctxt->version);
14834
0
    ctxt->version = NULL;
14835
0
    DICT_FREE(ctxt->encoding);
14836
0
    ctxt->encoding = NULL;
14837
0
    DICT_FREE(ctxt->directory);
14838
0
    ctxt->directory = NULL;
14839
0
    DICT_FREE(ctxt->extSubURI);
14840
0
    ctxt->extSubURI = NULL;
14841
0
    DICT_FREE(ctxt->extSubSystem);
14842
0
    ctxt->extSubSystem = NULL;
14843
0
    if (ctxt->myDoc != NULL)
14844
0
        xmlFreeDoc(ctxt->myDoc);
14845
0
    ctxt->myDoc = NULL;
14846
14847
0
    ctxt->standalone = -1;
14848
0
    ctxt->hasExternalSubset = 0;
14849
0
    ctxt->hasPErefs = 0;
14850
0
    ctxt->html = 0;
14851
0
    ctxt->external = 0;
14852
0
    ctxt->instate = XML_PARSER_START;
14853
0
    ctxt->token = 0;
14854
14855
0
    ctxt->wellFormed = 1;
14856
0
    ctxt->nsWellFormed = 1;
14857
0
    ctxt->disableSAX = 0;
14858
0
    ctxt->valid = 1;
14859
#if 0
14860
    ctxt->vctxt.userData = ctxt;
14861
    ctxt->vctxt.error = xmlParserValidityError;
14862
    ctxt->vctxt.warning = xmlParserValidityWarning;
14863
#endif
14864
0
    ctxt->record_info = 0;
14865
0
    ctxt->nbChars = 0;
14866
0
    ctxt->checkIndex = 0;
14867
0
    ctxt->inSubset = 0;
14868
0
    ctxt->errNo = XML_ERR_OK;
14869
0
    ctxt->depth = 0;
14870
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14871
0
    ctxt->catalogs = NULL;
14872
0
    ctxt->nbentities = 0;
14873
0
    ctxt->sizeentities = 0;
14874
0
    ctxt->sizeentcopy = 0;
14875
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14876
14877
0
    if (ctxt->attsDefault != NULL) {
14878
0
        xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14879
0
        ctxt->attsDefault = NULL;
14880
0
    }
14881
0
    if (ctxt->attsSpecial != NULL) {
14882
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14883
0
        ctxt->attsSpecial = NULL;
14884
0
    }
14885
14886
0
#ifdef LIBXML_CATALOG_ENABLED
14887
0
    if (ctxt->catalogs != NULL)
14888
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14889
0
#endif
14890
0
    if (ctxt->lastError.code != XML_ERR_OK)
14891
0
        xmlResetError(&ctxt->lastError);
14892
0
}
14893
14894
/**
14895
 * xmlCtxtResetPush:
14896
 * @ctxt: an XML parser context
14897
 * @chunk:  a pointer to an array of chars
14898
 * @size:  number of chars in the array
14899
 * @filename:  an optional file name or URI
14900
 * @encoding:  the document encoding, or NULL
14901
 *
14902
 * Reset a push parser context
14903
 *
14904
 * Returns 0 in case of success and 1 in case of error
14905
 */
14906
int
14907
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14908
                 int size, const char *filename, const char *encoding)
14909
0
{
14910
0
    xmlParserInputPtr inputStream;
14911
0
    xmlParserInputBufferPtr buf;
14912
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14913
14914
0
    if (ctxt == NULL)
14915
0
        return(1);
14916
14917
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14918
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14919
14920
0
    buf = xmlAllocParserInputBuffer(enc);
14921
0
    if (buf == NULL)
14922
0
        return(1);
14923
14924
0
    if (ctxt == NULL) {
14925
0
        xmlFreeParserInputBuffer(buf);
14926
0
        return(1);
14927
0
    }
14928
14929
0
    xmlCtxtReset(ctxt);
14930
14931
0
    if (ctxt->pushTab == NULL) {
14932
0
        ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14933
0
                                      sizeof(xmlChar *));
14934
0
        if (ctxt->pushTab == NULL) {
14935
0
      xmlErrMemory(ctxt, NULL);
14936
0
            xmlFreeParserInputBuffer(buf);
14937
0
            return(1);
14938
0
        }
14939
0
    }
14940
14941
0
    if (filename == NULL) {
14942
0
        ctxt->directory = NULL;
14943
0
    } else {
14944
0
        ctxt->directory = xmlParserGetDirectory(filename);
14945
0
    }
14946
14947
0
    inputStream = xmlNewInputStream(ctxt);
14948
0
    if (inputStream == NULL) {
14949
0
        xmlFreeParserInputBuffer(buf);
14950
0
        return(1);
14951
0
    }
14952
14953
0
    if (filename == NULL)
14954
0
        inputStream->filename = NULL;
14955
0
    else
14956
0
        inputStream->filename = (char *)
14957
0
            xmlCanonicPath((const xmlChar *) filename);
14958
0
    inputStream->buf = buf;
14959
0
    xmlBufResetInput(buf->buffer, inputStream);
14960
14961
0
    inputPush(ctxt, inputStream);
14962
14963
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14964
0
        (ctxt->input->buf != NULL)) {
14965
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14966
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14967
14968
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14969
14970
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14971
#ifdef DEBUG_PUSH
14972
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14973
#endif
14974
0
    }
14975
14976
0
    if (encoding != NULL) {
14977
0
        xmlCharEncodingHandlerPtr hdlr;
14978
14979
0
        if (ctxt->encoding != NULL)
14980
0
      xmlFree((xmlChar *) ctxt->encoding);
14981
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14982
14983
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14984
0
        if (hdlr != NULL) {
14985
0
            xmlSwitchToEncoding(ctxt, hdlr);
14986
0
  } else {
14987
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14988
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14989
0
        }
14990
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14991
0
        xmlSwitchEncoding(ctxt, enc);
14992
0
    }
14993
14994
0
    return(0);
14995
0
}
14996
14997
14998
/**
14999
 * xmlCtxtUseOptionsInternal:
15000
 * @ctxt: an XML parser context
15001
 * @options:  a combination of xmlParserOption
15002
 * @encoding:  the user provided encoding to use
15003
 *
15004
 * Applies the options to the parser context
15005
 *
15006
 * Returns 0 in case of success, the set of unknown or unimplemented options
15007
 *         in case of error.
15008
 */
15009
static int
15010
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15011
211k
{
15012
211k
    if (ctxt == NULL)
15013
0
        return(-1);
15014
211k
    if (encoding != NULL) {
15015
0
        if (ctxt->encoding != NULL)
15016
0
      xmlFree((xmlChar *) ctxt->encoding);
15017
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15018
0
    }
15019
211k
    if (options & XML_PARSE_RECOVER) {
15020
206k
        ctxt->recovery = 1;
15021
206k
        options -= XML_PARSE_RECOVER;
15022
206k
  ctxt->options |= XML_PARSE_RECOVER;
15023
206k
    } else
15024
5.26k
        ctxt->recovery = 0;
15025
211k
    if (options & XML_PARSE_DTDLOAD) {
15026
0
        ctxt->loadsubset = XML_DETECT_IDS;
15027
0
        options -= XML_PARSE_DTDLOAD;
15028
0
  ctxt->options |= XML_PARSE_DTDLOAD;
15029
0
    } else
15030
211k
        ctxt->loadsubset = 0;
15031
211k
    if (options & XML_PARSE_DTDATTR) {
15032
0
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15033
0
        options -= XML_PARSE_DTDATTR;
15034
0
  ctxt->options |= XML_PARSE_DTDATTR;
15035
0
    }
15036
211k
    if (options & XML_PARSE_NOENT) {
15037
0
        ctxt->replaceEntities = 1;
15038
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
15039
0
        options -= XML_PARSE_NOENT;
15040
0
  ctxt->options |= XML_PARSE_NOENT;
15041
0
    } else
15042
211k
        ctxt->replaceEntities = 0;
15043
211k
    if (options & XML_PARSE_PEDANTIC) {
15044
0
        ctxt->pedantic = 1;
15045
0
        options -= XML_PARSE_PEDANTIC;
15046
0
  ctxt->options |= XML_PARSE_PEDANTIC;
15047
0
    } else
15048
211k
        ctxt->pedantic = 0;
15049
211k
    if (options & XML_PARSE_NOBLANKS) {
15050
211k
        ctxt->keepBlanks = 0;
15051
211k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15052
211k
        options -= XML_PARSE_NOBLANKS;
15053
211k
  ctxt->options |= XML_PARSE_NOBLANKS;
15054
211k
    } else
15055
0
        ctxt->keepBlanks = 1;
15056
211k
    if (options & XML_PARSE_DTDVALID) {
15057
0
        ctxt->validate = 1;
15058
0
        if (options & XML_PARSE_NOWARNING)
15059
0
            ctxt->vctxt.warning = NULL;
15060
0
        if (options & XML_PARSE_NOERROR)
15061
0
            ctxt->vctxt.error = NULL;
15062
0
        options -= XML_PARSE_DTDVALID;
15063
0
  ctxt->options |= XML_PARSE_DTDVALID;
15064
0
    } else
15065
211k
        ctxt->validate = 0;
15066
211k
    if (options & XML_PARSE_NOWARNING) {
15067
0
        ctxt->sax->warning = NULL;
15068
0
        options -= XML_PARSE_NOWARNING;
15069
0
    }
15070
211k
    if (options & XML_PARSE_NOERROR) {
15071
0
        ctxt->sax->error = NULL;
15072
0
        ctxt->sax->fatalError = NULL;
15073
0
        options -= XML_PARSE_NOERROR;
15074
0
    }
15075
211k
#ifdef LIBXML_SAX1_ENABLED
15076
211k
    if (options & XML_PARSE_SAX1) {
15077
0
        ctxt->sax->startElement = xmlSAX2StartElement;
15078
0
        ctxt->sax->endElement = xmlSAX2EndElement;
15079
0
        ctxt->sax->startElementNs = NULL;
15080
0
        ctxt->sax->endElementNs = NULL;
15081
0
        ctxt->sax->initialized = 1;
15082
0
        options -= XML_PARSE_SAX1;
15083
0
  ctxt->options |= XML_PARSE_SAX1;
15084
0
    }
15085
211k
#endif /* LIBXML_SAX1_ENABLED */
15086
211k
    if (options & XML_PARSE_NODICT) {
15087
0
        ctxt->dictNames = 0;
15088
0
        options -= XML_PARSE_NODICT;
15089
0
  ctxt->options |= XML_PARSE_NODICT;
15090
211k
    } else {
15091
211k
        ctxt->dictNames = 1;
15092
211k
    }
15093
211k
    if (options & XML_PARSE_NOCDATA) {
15094
0
        ctxt->sax->cdataBlock = NULL;
15095
0
        options -= XML_PARSE_NOCDATA;
15096
0
  ctxt->options |= XML_PARSE_NOCDATA;
15097
0
    }
15098
211k
    if (options & XML_PARSE_NSCLEAN) {
15099
0
  ctxt->options |= XML_PARSE_NSCLEAN;
15100
0
        options -= XML_PARSE_NSCLEAN;
15101
0
    }
15102
211k
    if (options & XML_PARSE_NONET) {
15103
211k
  ctxt->options |= XML_PARSE_NONET;
15104
211k
        options -= XML_PARSE_NONET;
15105
211k
    }
15106
211k
    if (options & XML_PARSE_COMPACT) {
15107
211k
  ctxt->options |= XML_PARSE_COMPACT;
15108
211k
        options -= XML_PARSE_COMPACT;
15109
211k
    }
15110
211k
    if (options & XML_PARSE_OLD10) {
15111
0
  ctxt->options |= XML_PARSE_OLD10;
15112
0
        options -= XML_PARSE_OLD10;
15113
0
    }
15114
211k
    if (options & XML_PARSE_NOBASEFIX) {
15115
0
  ctxt->options |= XML_PARSE_NOBASEFIX;
15116
0
        options -= XML_PARSE_NOBASEFIX;
15117
0
    }
15118
211k
    if (options & XML_PARSE_HUGE) {
15119
0
  ctxt->options |= XML_PARSE_HUGE;
15120
0
        options -= XML_PARSE_HUGE;
15121
0
        if (ctxt->dict != NULL)
15122
0
            xmlDictSetLimit(ctxt->dict, 0);
15123
0
    }
15124
211k
    if (options & XML_PARSE_OLDSAX) {
15125
0
  ctxt->options |= XML_PARSE_OLDSAX;
15126
0
        options -= XML_PARSE_OLDSAX;
15127
0
    }
15128
211k
    if (options & XML_PARSE_IGNORE_ENC) {
15129
0
  ctxt->options |= XML_PARSE_IGNORE_ENC;
15130
0
        options -= XML_PARSE_IGNORE_ENC;
15131
0
    }
15132
211k
    if (options & XML_PARSE_BIG_LINES) {
15133
0
  ctxt->options |= XML_PARSE_BIG_LINES;
15134
0
        options -= XML_PARSE_BIG_LINES;
15135
0
    }
15136
211k
    ctxt->linenumbers = 1;
15137
211k
    return (options);
15138
211k
}
15139
15140
/**
15141
 * xmlCtxtUseOptions:
15142
 * @ctxt: an XML parser context
15143
 * @options:  a combination of xmlParserOption
15144
 *
15145
 * Applies the options to the parser context
15146
 *
15147
 * Returns 0 in case of success, the set of unknown or unimplemented options
15148
 *         in case of error.
15149
 */
15150
int
15151
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15152
211k
{
15153
211k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15154
211k
}
15155
15156
/**
15157
 * xmlDoRead:
15158
 * @ctxt:  an XML parser context
15159
 * @URL:  the base URL to use for the document
15160
 * @encoding:  the document encoding, or NULL
15161
 * @options:  a combination of xmlParserOption
15162
 * @reuse:  keep the context for reuse
15163
 *
15164
 * Common front-end for the xmlRead functions
15165
 *
15166
 * Returns the resulting document tree or NULL
15167
 */
15168
static xmlDocPtr
15169
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15170
          int options, int reuse)
15171
0
{
15172
0
    xmlDocPtr ret;
15173
15174
0
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15175
0
    if (encoding != NULL) {
15176
0
        xmlCharEncodingHandlerPtr hdlr;
15177
15178
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15179
0
  if (hdlr != NULL)
15180
0
      xmlSwitchToEncoding(ctxt, hdlr);
15181
0
    }
15182
0
    if ((URL != NULL) && (ctxt->input != NULL) &&
15183
0
        (ctxt->input->filename == NULL))
15184
0
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15185
0
    xmlParseDocument(ctxt);
15186
0
    if ((ctxt->wellFormed) || ctxt->recovery)
15187
0
        ret = ctxt->myDoc;
15188
0
    else {
15189
0
        ret = NULL;
15190
0
  if (ctxt->myDoc != NULL) {
15191
0
      xmlFreeDoc(ctxt->myDoc);
15192
0
  }
15193
0
    }
15194
0
    ctxt->myDoc = NULL;
15195
0
    if (!reuse) {
15196
0
  xmlFreeParserCtxt(ctxt);
15197
0
    }
15198
15199
0
    return (ret);
15200
0
}
15201
15202
/**
15203
 * xmlReadDoc:
15204
 * @cur:  a pointer to a zero terminated string
15205
 * @URL:  the base URL to use for the document
15206
 * @encoding:  the document encoding, or NULL
15207
 * @options:  a combination of xmlParserOption
15208
 *
15209
 * parse an XML in-memory document and build a tree.
15210
 *
15211
 * Returns the resulting document tree
15212
 */
15213
xmlDocPtr
15214
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15215
0
{
15216
0
    xmlParserCtxtPtr ctxt;
15217
15218
0
    if (cur == NULL)
15219
0
        return (NULL);
15220
0
    xmlInitParser();
15221
15222
0
    ctxt = xmlCreateDocParserCtxt(cur);
15223
0
    if (ctxt == NULL)
15224
0
        return (NULL);
15225
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15226
0
}
15227
15228
/**
15229
 * xmlReadFile:
15230
 * @filename:  a file or URL
15231
 * @encoding:  the document encoding, or NULL
15232
 * @options:  a combination of xmlParserOption
15233
 *
15234
 * parse an XML file from the filesystem or the network.
15235
 *
15236
 * Returns the resulting document tree
15237
 */
15238
xmlDocPtr
15239
xmlReadFile(const char *filename, const char *encoding, int options)
15240
0
{
15241
0
    xmlParserCtxtPtr ctxt;
15242
15243
0
    xmlInitParser();
15244
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15245
0
    if (ctxt == NULL)
15246
0
        return (NULL);
15247
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15248
0
}
15249
15250
/**
15251
 * xmlReadMemory:
15252
 * @buffer:  a pointer to a char array
15253
 * @size:  the size of the array
15254
 * @URL:  the base URL to use for the document
15255
 * @encoding:  the document encoding, or NULL
15256
 * @options:  a combination of xmlParserOption
15257
 *
15258
 * parse an XML in-memory document and build a tree.
15259
 *
15260
 * Returns the resulting document tree
15261
 */
15262
xmlDocPtr
15263
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15264
0
{
15265
0
    xmlParserCtxtPtr ctxt;
15266
15267
0
    xmlInitParser();
15268
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15269
0
    if (ctxt == NULL)
15270
0
        return (NULL);
15271
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15272
0
}
15273
15274
/**
15275
 * xmlReadFd:
15276
 * @fd:  an open file descriptor
15277
 * @URL:  the base URL to use for the document
15278
 * @encoding:  the document encoding, or NULL
15279
 * @options:  a combination of xmlParserOption
15280
 *
15281
 * parse an XML from a file descriptor and build a tree.
15282
 * NOTE that the file descriptor will not be closed when the
15283
 *      reader is closed or reset.
15284
 *
15285
 * Returns the resulting document tree
15286
 */
15287
xmlDocPtr
15288
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15289
0
{
15290
0
    xmlParserCtxtPtr ctxt;
15291
0
    xmlParserInputBufferPtr input;
15292
0
    xmlParserInputPtr stream;
15293
15294
0
    if (fd < 0)
15295
0
        return (NULL);
15296
0
    xmlInitParser();
15297
15298
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15299
0
    if (input == NULL)
15300
0
        return (NULL);
15301
0
    input->closecallback = NULL;
15302
0
    ctxt = xmlNewParserCtxt();
15303
0
    if (ctxt == NULL) {
15304
0
        xmlFreeParserInputBuffer(input);
15305
0
        return (NULL);
15306
0
    }
15307
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15308
0
    if (stream == NULL) {
15309
0
        xmlFreeParserInputBuffer(input);
15310
0
  xmlFreeParserCtxt(ctxt);
15311
0
        return (NULL);
15312
0
    }
15313
0
    inputPush(ctxt, stream);
15314
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15315
0
}
15316
15317
/**
15318
 * xmlReadIO:
15319
 * @ioread:  an I/O read function
15320
 * @ioclose:  an I/O close function
15321
 * @ioctx:  an I/O handler
15322
 * @URL:  the base URL to use for the document
15323
 * @encoding:  the document encoding, or NULL
15324
 * @options:  a combination of xmlParserOption
15325
 *
15326
 * parse an XML document from I/O functions and source and build a tree.
15327
 *
15328
 * Returns the resulting document tree
15329
 */
15330
xmlDocPtr
15331
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15332
          void *ioctx, const char *URL, const char *encoding, int options)
15333
0
{
15334
0
    xmlParserCtxtPtr ctxt;
15335
0
    xmlParserInputBufferPtr input;
15336
0
    xmlParserInputPtr stream;
15337
15338
0
    if (ioread == NULL)
15339
0
        return (NULL);
15340
0
    xmlInitParser();
15341
15342
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15343
0
                                         XML_CHAR_ENCODING_NONE);
15344
0
    if (input == NULL) {
15345
0
        if (ioclose != NULL)
15346
0
            ioclose(ioctx);
15347
0
        return (NULL);
15348
0
    }
15349
0
    ctxt = xmlNewParserCtxt();
15350
0
    if (ctxt == NULL) {
15351
0
        xmlFreeParserInputBuffer(input);
15352
0
        return (NULL);
15353
0
    }
15354
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15355
0
    if (stream == NULL) {
15356
0
        xmlFreeParserInputBuffer(input);
15357
0
  xmlFreeParserCtxt(ctxt);
15358
0
        return (NULL);
15359
0
    }
15360
0
    inputPush(ctxt, stream);
15361
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15362
0
}
15363
15364
/**
15365
 * xmlCtxtReadDoc:
15366
 * @ctxt:  an XML parser context
15367
 * @cur:  a pointer to a zero terminated string
15368
 * @URL:  the base URL to use for the document
15369
 * @encoding:  the document encoding, or NULL
15370
 * @options:  a combination of xmlParserOption
15371
 *
15372
 * parse an XML in-memory document and build a tree.
15373
 * This reuses the existing @ctxt parser context
15374
 *
15375
 * Returns the resulting document tree
15376
 */
15377
xmlDocPtr
15378
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15379
               const char *URL, const char *encoding, int options)
15380
0
{
15381
0
    xmlParserInputPtr stream;
15382
15383
0
    if (cur == NULL)
15384
0
        return (NULL);
15385
0
    if (ctxt == NULL)
15386
0
        return (NULL);
15387
0
    xmlInitParser();
15388
15389
0
    xmlCtxtReset(ctxt);
15390
15391
0
    stream = xmlNewStringInputStream(ctxt, cur);
15392
0
    if (stream == NULL) {
15393
0
        return (NULL);
15394
0
    }
15395
0
    inputPush(ctxt, stream);
15396
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15397
0
}
15398
15399
/**
15400
 * xmlCtxtReadFile:
15401
 * @ctxt:  an XML parser context
15402
 * @filename:  a file or URL
15403
 * @encoding:  the document encoding, or NULL
15404
 * @options:  a combination of xmlParserOption
15405
 *
15406
 * parse an XML file from the filesystem or the network.
15407
 * This reuses the existing @ctxt parser context
15408
 *
15409
 * Returns the resulting document tree
15410
 */
15411
xmlDocPtr
15412
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15413
                const char *encoding, int options)
15414
0
{
15415
0
    xmlParserInputPtr stream;
15416
15417
0
    if (filename == NULL)
15418
0
        return (NULL);
15419
0
    if (ctxt == NULL)
15420
0
        return (NULL);
15421
0
    xmlInitParser();
15422
15423
0
    xmlCtxtReset(ctxt);
15424
15425
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15426
0
    if (stream == NULL) {
15427
0
        return (NULL);
15428
0
    }
15429
0
    inputPush(ctxt, stream);
15430
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15431
0
}
15432
15433
/**
15434
 * xmlCtxtReadMemory:
15435
 * @ctxt:  an XML parser context
15436
 * @buffer:  a pointer to a char array
15437
 * @size:  the size of the array
15438
 * @URL:  the base URL to use for the document
15439
 * @encoding:  the document encoding, or NULL
15440
 * @options:  a combination of xmlParserOption
15441
 *
15442
 * parse an XML in-memory document and build a tree.
15443
 * This reuses the existing @ctxt parser context
15444
 *
15445
 * Returns the resulting document tree
15446
 */
15447
xmlDocPtr
15448
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15449
                  const char *URL, const char *encoding, int options)
15450
0
{
15451
0
    xmlParserInputBufferPtr input;
15452
0
    xmlParserInputPtr stream;
15453
15454
0
    if (ctxt == NULL)
15455
0
        return (NULL);
15456
0
    if (buffer == NULL)
15457
0
        return (NULL);
15458
0
    xmlInitParser();
15459
15460
0
    xmlCtxtReset(ctxt);
15461
15462
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15463
0
    if (input == NULL) {
15464
0
  return(NULL);
15465
0
    }
15466
15467
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15468
0
    if (stream == NULL) {
15469
0
  xmlFreeParserInputBuffer(input);
15470
0
  return(NULL);
15471
0
    }
15472
15473
0
    inputPush(ctxt, stream);
15474
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15475
0
}
15476
15477
/**
15478
 * xmlCtxtReadFd:
15479
 * @ctxt:  an XML parser context
15480
 * @fd:  an open file descriptor
15481
 * @URL:  the base URL to use for the document
15482
 * @encoding:  the document encoding, or NULL
15483
 * @options:  a combination of xmlParserOption
15484
 *
15485
 * parse an XML from a file descriptor and build a tree.
15486
 * This reuses the existing @ctxt parser context
15487
 * NOTE that the file descriptor will not be closed when the
15488
 *      reader is closed or reset.
15489
 *
15490
 * Returns the resulting document tree
15491
 */
15492
xmlDocPtr
15493
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15494
              const char *URL, const char *encoding, int options)
15495
0
{
15496
0
    xmlParserInputBufferPtr input;
15497
0
    xmlParserInputPtr stream;
15498
15499
0
    if (fd < 0)
15500
0
        return (NULL);
15501
0
    if (ctxt == NULL)
15502
0
        return (NULL);
15503
0
    xmlInitParser();
15504
15505
0
    xmlCtxtReset(ctxt);
15506
15507
15508
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15509
0
    if (input == NULL)
15510
0
        return (NULL);
15511
0
    input->closecallback = NULL;
15512
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15513
0
    if (stream == NULL) {
15514
0
        xmlFreeParserInputBuffer(input);
15515
0
        return (NULL);
15516
0
    }
15517
0
    inputPush(ctxt, stream);
15518
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15519
0
}
15520
15521
/**
15522
 * xmlCtxtReadIO:
15523
 * @ctxt:  an XML parser context
15524
 * @ioread:  an I/O read function
15525
 * @ioclose:  an I/O close function
15526
 * @ioctx:  an I/O handler
15527
 * @URL:  the base URL to use for the document
15528
 * @encoding:  the document encoding, or NULL
15529
 * @options:  a combination of xmlParserOption
15530
 *
15531
 * parse an XML document from I/O functions and source and build a tree.
15532
 * This reuses the existing @ctxt parser context
15533
 *
15534
 * Returns the resulting document tree
15535
 */
15536
xmlDocPtr
15537
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15538
              xmlInputCloseCallback ioclose, void *ioctx,
15539
        const char *URL,
15540
              const char *encoding, int options)
15541
0
{
15542
0
    xmlParserInputBufferPtr input;
15543
0
    xmlParserInputPtr stream;
15544
15545
0
    if (ioread == NULL)
15546
0
        return (NULL);
15547
0
    if (ctxt == NULL)
15548
0
        return (NULL);
15549
0
    xmlInitParser();
15550
15551
0
    xmlCtxtReset(ctxt);
15552
15553
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15554
0
                                         XML_CHAR_ENCODING_NONE);
15555
0
    if (input == NULL) {
15556
0
        if (ioclose != NULL)
15557
0
            ioclose(ioctx);
15558
0
        return (NULL);
15559
0
    }
15560
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15561
0
    if (stream == NULL) {
15562
0
        xmlFreeParserInputBuffer(input);
15563
0
        return (NULL);
15564
0
    }
15565
0
    inputPush(ctxt, stream);
15566
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15567
0
}
15568
15569
#define bottom_parser
15570
#include "elfgcchack.h"