Coverage Report

Created: 2023-05-11 17:20

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/enc.h"
80
#include "private/error.h"
81
#include "private/html.h"
82
#include "private/io.h"
83
#include "private/parser.h"
84
#include "private/threads.h"
85
86
struct _xmlStartTag {
87
    const xmlChar *prefix;
88
    const xmlChar *URI;
89
    int line;
90
    int nsNr;
91
};
92
93
static void
94
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
95
96
static xmlParserCtxtPtr
97
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
98
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
99
        xmlParserCtxtPtr pctx);
100
101
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
3.23M
#define XML_MAX_HUGE_LENGTH 1000000000
116
117
21.0k
#define XML_PARSER_BIG_ENTITY 1000
118
#define XML_PARSER_LOT_ENTITY 5000
119
120
/*
121
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
122
 *    replacement over the size in byte of the input indicates that you have
123
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
124
 *    replacement per byte of input.
125
 */
126
647k
#define XML_PARSER_NON_LINEAR 10
127
128
/*
129
 * xmlParserEntityCheck
130
 *
131
 * Function to check non-linear entity expansion behaviour
132
 * This is here to detect and stop exponential linear entity expansion
133
 * This is not a limitation of the parser but a safety
134
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
135
 * parser option.
136
 */
137
static int
138
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
139
                     xmlEntityPtr ent, size_t replacement)
140
1.91M
{
141
1.91M
    size_t consumed = 0;
142
1.91M
    int i;
143
144
1.91M
    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
145
1.05M
        return (0);
146
857k
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
147
814
        return (1);
148
149
    /*
150
     * This may look absurd but is needed to detect
151
     * entities problems
152
     */
153
856k
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
154
856k
  (ent->content != NULL) && (ent->checked == 0) &&
155
856k
  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
156
136k
  unsigned long oldnbent = ctxt->nbentities, diff;
157
136k
  xmlChar *rep;
158
159
136k
  ent->checked = 1;
160
161
136k
        ++ctxt->depth;
162
136k
  rep = xmlStringDecodeEntities(ctxt, ent->content,
163
136k
          XML_SUBSTITUTE_REF, 0, 0, 0);
164
136k
        --ctxt->depth;
165
136k
  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
166
1.14k
      ent->content[0] = 0;
167
1.14k
  }
168
169
136k
        diff = ctxt->nbentities - oldnbent + 1;
170
136k
        if (diff > INT_MAX / 2)
171
0
            diff = INT_MAX / 2;
172
136k
  ent->checked = diff * 2;
173
136k
  if (rep != NULL) {
174
135k
      if (xmlStrchr(rep, '<'))
175
5.62k
    ent->checked |= 1;
176
135k
      xmlFree(rep);
177
135k
      rep = NULL;
178
135k
  }
179
136k
    }
180
181
    /*
182
     * Prevent entity exponential check, not just replacement while
183
     * parsing the DTD
184
     * The check is potentially costly so do that only once in a thousand
185
     */
186
856k
    if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
187
856k
        (ctxt->nbentities % 1024 == 0)) {
188
0
  for (i = 0;i < ctxt->inputNr;i++) {
189
0
      consumed += ctxt->inputTab[i]->consumed +
190
0
                 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
191
0
  }
192
0
  if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
193
0
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
194
0
      ctxt->instate = XML_PARSER_EOF;
195
0
      return (1);
196
0
  }
197
0
  consumed = 0;
198
0
    }
199
200
201
202
856k
    if (replacement != 0) {
203
29.6k
  if (replacement < XML_MAX_TEXT_LENGTH)
204
29.6k
      return(0);
205
206
        /*
207
   * If the volume of entity copy reaches 10 times the
208
   * amount of parsed data and over the large text threshold
209
   * then that's very likely to be an abuse.
210
   */
211
0
        if (ctxt->input != NULL) {
212
0
      consumed = ctxt->input->consumed +
213
0
                 (ctxt->input->cur - ctxt->input->base);
214
0
  }
215
0
        consumed += ctxt->sizeentities;
216
217
0
        if (replacement < XML_PARSER_NON_LINEAR * consumed)
218
0
      return(0);
219
826k
    } else if (size != 0) {
220
        /*
221
         * Do the check based on the replacement size of the entity
222
         */
223
21.0k
        if (size < XML_PARSER_BIG_ENTITY)
224
20.6k
      return(0);
225
226
        /*
227
         * A limit on the amount of text data reasonably used
228
         */
229
456
        if (ctxt->input != NULL) {
230
456
            consumed = ctxt->input->consumed +
231
456
                (ctxt->input->cur - ctxt->input->base);
232
456
        }
233
456
        consumed += ctxt->sizeentities;
234
235
456
        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
236
456
      (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
237
237
            return (0);
238
805k
    } else if (ent != NULL) {
239
        /*
240
         * use the number of parsed entities in the replacement
241
         */
242
646k
        size = ent->checked / 2;
243
244
        /*
245
         * The amount of data parsed counting entities size only once
246
         */
247
646k
        if (ctxt->input != NULL) {
248
646k
            consumed = ctxt->input->consumed +
249
646k
                (ctxt->input->cur - ctxt->input->base);
250
646k
        }
251
646k
        consumed += ctxt->sizeentities;
252
253
        /*
254
         * Check the density of entities for the amount of data
255
   * knowing an entity reference will take at least 3 bytes
256
         */
257
646k
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
258
646k
            return (0);
259
646k
    } else {
260
        /*
261
         * strange we got no data for checking
262
         */
263
158k
  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
264
158k
       (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
265
158k
      (ctxt->nbentities <= 10000))
266
158k
      return (0);
267
158k
    }
268
219
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
269
219
    return (1);
270
856k
}
271
272
/**
273
 * xmlParserMaxDepth:
274
 *
275
 * arbitrary depth limit for the XML documents that we allow to
276
 * process. This is not a limitation of the parser but a safety
277
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
278
 * parser option.
279
 */
280
unsigned int xmlParserMaxDepth = 256;
281
282
283
284
#define SAX2 1
285
40.4M
#define XML_PARSER_BIG_BUFFER_SIZE 300
286
183M
#define XML_PARSER_BUFFER_SIZE 100
287
1.04M
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
288
289
/**
290
 * XML_PARSER_CHUNK_SIZE
291
 *
292
 * When calling GROW that's the minimal amount of data
293
 * the parser expected to have received. It is not a hard
294
 * limit but an optimization when reading strings like Names
295
 * It is not strictly needed as long as inputs available characters
296
 * are followed by 0, which should be provided by the I/O level
297
 */
298
42.2M
#define XML_PARSER_CHUNK_SIZE 100
299
300
/*
301
 * List of XML prefixed PI allowed by W3C specs
302
 */
303
304
static const char* const xmlW3CPIs[] = {
305
    "xml-stylesheet",
306
    "xml-model",
307
    NULL
308
};
309
310
311
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
312
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
313
                                              const xmlChar **str);
314
315
static xmlParserErrors
316
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
317
                xmlSAXHandlerPtr sax,
318
          void *user_data, int depth, const xmlChar *URL,
319
          const xmlChar *ID, xmlNodePtr *list);
320
321
static int
322
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
323
                          const char *encoding);
324
#ifdef LIBXML_LEGACY_ENABLED
325
static void
326
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
327
                      xmlNodePtr lastNode);
328
#endif /* LIBXML_LEGACY_ENABLED */
329
330
static xmlParserErrors
331
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
332
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
333
334
static int
335
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
336
337
/************************************************************************
338
 *                  *
339
 *    Some factorized error routines        *
340
 *                  *
341
 ************************************************************************/
342
343
/**
344
 * xmlErrAttributeDup:
345
 * @ctxt:  an XML parser context
346
 * @prefix:  the attribute prefix
347
 * @localname:  the attribute localname
348
 *
349
 * Handle a redefinition of attribute error
350
 */
351
static void
352
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
353
                   const xmlChar * localname)
354
7.97k
{
355
7.97k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
356
7.97k
        (ctxt->instate == XML_PARSER_EOF))
357
0
  return;
358
7.97k
    if (ctxt != NULL)
359
7.97k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
360
361
7.97k
    if (prefix == NULL)
362
5.52k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
363
5.52k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
364
5.52k
                        (const char *) localname, NULL, NULL, 0, 0,
365
5.52k
                        "Attribute %s redefined\n", localname);
366
2.44k
    else
367
2.44k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
368
2.44k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
369
2.44k
                        (const char *) prefix, (const char *) localname,
370
2.44k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
371
2.44k
                        localname);
372
7.97k
    if (ctxt != NULL) {
373
7.97k
  ctxt->wellFormed = 0;
374
7.97k
  if (ctxt->recovery == 0)
375
3.89k
      ctxt->disableSAX = 1;
376
7.97k
    }
377
7.97k
}
378
379
/**
380
 * xmlFatalErr:
381
 * @ctxt:  an XML parser context
382
 * @error:  the error number
383
 * @extra:  extra information string
384
 *
385
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
386
 */
387
static void
388
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
389
7.59M
{
390
7.59M
    const char *errmsg;
391
392
7.59M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393
7.59M
        (ctxt->instate == XML_PARSER_EOF))
394
3.05M
  return;
395
4.54M
    switch (error) {
396
10.3k
        case XML_ERR_INVALID_HEX_CHARREF:
397
10.3k
            errmsg = "CharRef: invalid hexadecimal value";
398
10.3k
            break;
399
29.9k
        case XML_ERR_INVALID_DEC_CHARREF:
400
29.9k
            errmsg = "CharRef: invalid decimal value";
401
29.9k
            break;
402
0
        case XML_ERR_INVALID_CHARREF:
403
0
            errmsg = "CharRef: invalid value";
404
0
            break;
405
184k
        case XML_ERR_INTERNAL_ERROR:
406
184k
            errmsg = "internal error";
407
184k
            break;
408
0
        case XML_ERR_PEREF_AT_EOF:
409
0
            errmsg = "PEReference at end of document";
410
0
            break;
411
0
        case XML_ERR_PEREF_IN_PROLOG:
412
0
            errmsg = "PEReference in prolog";
413
0
            break;
414
0
        case XML_ERR_PEREF_IN_EPILOG:
415
0
            errmsg = "PEReference in epilog";
416
0
            break;
417
0
        case XML_ERR_PEREF_NO_NAME:
418
0
            errmsg = "PEReference: no name";
419
0
            break;
420
5.95k
        case XML_ERR_PEREF_SEMICOL_MISSING:
421
5.95k
            errmsg = "PEReference: expecting ';'";
422
5.95k
            break;
423
3.21M
        case XML_ERR_ENTITY_LOOP:
424
3.21M
            errmsg = "Detected an entity reference loop";
425
3.21M
            break;
426
0
        case XML_ERR_ENTITY_NOT_STARTED:
427
0
            errmsg = "EntityValue: \" or ' expected";
428
0
            break;
429
335
        case XML_ERR_ENTITY_PE_INTERNAL:
430
335
            errmsg = "PEReferences forbidden in internal subset";
431
335
            break;
432
3.50k
        case XML_ERR_ENTITY_NOT_FINISHED:
433
3.50k
            errmsg = "EntityValue: \" or ' expected";
434
3.50k
            break;
435
28.3k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
436
28.3k
            errmsg = "AttValue: \" or ' expected";
437
28.3k
            break;
438
65.4k
        case XML_ERR_LT_IN_ATTRIBUTE:
439
65.4k
            errmsg = "Unescaped '<' not allowed in attributes values";
440
65.4k
            break;
441
7.45k
        case XML_ERR_LITERAL_NOT_STARTED:
442
7.45k
            errmsg = "SystemLiteral \" or ' expected";
443
7.45k
            break;
444
9.09k
        case XML_ERR_LITERAL_NOT_FINISHED:
445
9.09k
            errmsg = "Unfinished System or Public ID \" or ' expected";
446
9.09k
            break;
447
10.6k
        case XML_ERR_MISPLACED_CDATA_END:
448
10.6k
            errmsg = "Sequence ']]>' not allowed in content";
449
10.6k
            break;
450
6.56k
        case XML_ERR_URI_REQUIRED:
451
6.56k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
452
6.56k
            break;
453
893
        case XML_ERR_PUBID_REQUIRED:
454
893
            errmsg = "PUBLIC, the Public Identifier is missing";
455
893
            break;
456
19.6k
        case XML_ERR_HYPHEN_IN_COMMENT:
457
19.6k
            errmsg = "Comment must not contain '--' (double-hyphen)";
458
19.6k
            break;
459
5.91k
        case XML_ERR_PI_NOT_STARTED:
460
5.91k
            errmsg = "xmlParsePI : no target name";
461
5.91k
            break;
462
1.04k
        case XML_ERR_RESERVED_XML_NAME:
463
1.04k
            errmsg = "Invalid PI name";
464
1.04k
            break;
465
411
        case XML_ERR_NOTATION_NOT_STARTED:
466
411
            errmsg = "NOTATION: Name expected here";
467
411
            break;
468
1.46k
        case XML_ERR_NOTATION_NOT_FINISHED:
469
1.46k
            errmsg = "'>' required to close NOTATION declaration";
470
1.46k
            break;
471
8.69k
        case XML_ERR_VALUE_REQUIRED:
472
8.69k
            errmsg = "Entity value required";
473
8.69k
            break;
474
541
        case XML_ERR_URI_FRAGMENT:
475
541
            errmsg = "Fragment not allowed";
476
541
            break;
477
8.08k
        case XML_ERR_ATTLIST_NOT_STARTED:
478
8.08k
            errmsg = "'(' required to start ATTLIST enumeration";
479
8.08k
            break;
480
458
        case XML_ERR_NMTOKEN_REQUIRED:
481
458
            errmsg = "NmToken expected in ATTLIST enumeration";
482
458
            break;
483
1.81k
        case XML_ERR_ATTLIST_NOT_FINISHED:
484
1.81k
            errmsg = "')' required to finish ATTLIST enumeration";
485
1.81k
            break;
486
2.28k
        case XML_ERR_MIXED_NOT_STARTED:
487
2.28k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
488
2.28k
            break;
489
0
        case XML_ERR_PCDATA_REQUIRED:
490
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
491
0
            break;
492
6.54k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
493
6.54k
            errmsg = "ContentDecl : Name or '(' expected";
494
6.54k
            break;
495
17.9k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
496
17.9k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
497
17.9k
            break;
498
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
499
0
            errmsg =
500
0
                "PEReference: forbidden within markup decl in internal subset";
501
0
            break;
502
135k
        case XML_ERR_GT_REQUIRED:
503
135k
            errmsg = "expected '>'";
504
135k
            break;
505
545
        case XML_ERR_CONDSEC_INVALID:
506
545
            errmsg = "XML conditional section '[' expected";
507
545
            break;
508
28.3k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
509
28.3k
            errmsg = "Content error in the external subset";
510
28.3k
            break;
511
3.10k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
512
3.10k
            errmsg =
513
3.10k
                "conditional section INCLUDE or IGNORE keyword expected";
514
3.10k
            break;
515
2.79k
        case XML_ERR_CONDSEC_NOT_FINISHED:
516
2.79k
            errmsg = "XML conditional section not closed";
517
2.79k
            break;
518
215
        case XML_ERR_XMLDECL_NOT_STARTED:
519
215
            errmsg = "Text declaration '<?xml' required";
520
215
            break;
521
115k
        case XML_ERR_XMLDECL_NOT_FINISHED:
522
115k
            errmsg = "parsing XML declaration: '?>' expected";
523
115k
            break;
524
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
525
0
            errmsg = "external parsed entities cannot be standalone";
526
0
            break;
527
76.5k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
528
76.5k
            errmsg = "EntityRef: expecting ';'";
529
76.5k
            break;
530
96.3k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
531
96.3k
            errmsg = "DOCTYPE improperly terminated";
532
96.3k
            break;
533
0
        case XML_ERR_LTSLASH_REQUIRED:
534
0
            errmsg = "EndTag: '</' not found";
535
0
            break;
536
7.46k
        case XML_ERR_EQUAL_REQUIRED:
537
7.46k
            errmsg = "expected '='";
538
7.46k
            break;
539
29.5k
        case XML_ERR_STRING_NOT_CLOSED:
540
29.5k
            errmsg = "String not closed expecting \" or '";
541
29.5k
            break;
542
7.67k
        case XML_ERR_STRING_NOT_STARTED:
543
7.67k
            errmsg = "String not started expecting ' or \"";
544
7.67k
            break;
545
594
        case XML_ERR_ENCODING_NAME:
546
594
            errmsg = "Invalid XML encoding name";
547
594
            break;
548
1.85k
        case XML_ERR_STANDALONE_VALUE:
549
1.85k
            errmsg = "standalone accepts only 'yes' or 'no'";
550
1.85k
            break;
551
48.1k
        case XML_ERR_DOCUMENT_EMPTY:
552
48.1k
            errmsg = "Document is empty";
553
48.1k
            break;
554
260k
        case XML_ERR_DOCUMENT_END:
555
260k
            errmsg = "Extra content at the end of the document";
556
260k
            break;
557
8.35k
        case XML_ERR_NOT_WELL_BALANCED:
558
8.35k
            errmsg = "chunk is not well balanced";
559
8.35k
            break;
560
0
        case XML_ERR_EXTRA_CONTENT:
561
0
            errmsg = "extra content at the end of well balanced chunk";
562
0
            break;
563
62.6k
        case XML_ERR_VERSION_MISSING:
564
62.6k
            errmsg = "Malformed declaration expecting version";
565
62.6k
            break;
566
9
        case XML_ERR_NAME_TOO_LONG:
567
9
            errmsg = "Name too long";
568
9
            break;
569
#if 0
570
        case:
571
            errmsg = "";
572
            break;
573
#endif
574
1.06k
        default:
575
1.06k
            errmsg = "Unregistered error message";
576
4.54M
    }
577
4.54M
    if (ctxt != NULL)
578
4.54M
  ctxt->errNo = error;
579
4.54M
    if (info == NULL) {
580
4.35M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581
4.35M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
582
4.35M
                        errmsg);
583
4.35M
    } else {
584
184k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
585
184k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
586
184k
                        errmsg, info);
587
184k
    }
588
4.54M
    if (ctxt != NULL) {
589
4.54M
  ctxt->wellFormed = 0;
590
4.54M
  if (ctxt->recovery == 0)
591
3.81M
      ctxt->disableSAX = 1;
592
4.54M
    }
593
4.54M
}
594
595
/**
596
 * xmlFatalErrMsg:
597
 * @ctxt:  an XML parser context
598
 * @error:  the error number
599
 * @msg:  the error message
600
 *
601
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
602
 */
603
static void LIBXML_ATTR_FORMAT(3,0)
604
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
605
               const char *msg)
606
2.20M
{
607
2.20M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
608
2.20M
        (ctxt->instate == XML_PARSER_EOF))
609
0
  return;
610
2.20M
    if (ctxt != NULL)
611
2.20M
  ctxt->errNo = error;
612
2.20M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
613
2.20M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
614
2.20M
    if (ctxt != NULL) {
615
2.20M
  ctxt->wellFormed = 0;
616
2.20M
  if (ctxt->recovery == 0)
617
692k
      ctxt->disableSAX = 1;
618
2.20M
    }
619
2.20M
}
620
621
/**
622
 * xmlWarningMsg:
623
 * @ctxt:  an XML parser context
624
 * @error:  the error number
625
 * @msg:  the error message
626
 * @str1:  extra data
627
 * @str2:  extra data
628
 *
629
 * Handle a warning.
630
 */
631
static void LIBXML_ATTR_FORMAT(3,0)
632
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633
              const char *msg, const xmlChar *str1, const xmlChar *str2)
634
79.8k
{
635
79.8k
    xmlStructuredErrorFunc schannel = NULL;
636
637
79.8k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638
79.8k
        (ctxt->instate == XML_PARSER_EOF))
639
0
  return;
640
79.8k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
641
79.8k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
642
46.2k
        schannel = ctxt->sax->serror;
643
79.8k
    if (ctxt != NULL) {
644
79.8k
        __xmlRaiseError(schannel,
645
79.8k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
646
79.8k
                    ctxt->userData,
647
79.8k
                    ctxt, NULL, XML_FROM_PARSER, error,
648
79.8k
                    XML_ERR_WARNING, NULL, 0,
649
79.8k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
650
79.8k
        msg, (const char *) str1, (const char *) str2);
651
79.8k
    } else {
652
0
        __xmlRaiseError(schannel, NULL, NULL,
653
0
                    ctxt, NULL, XML_FROM_PARSER, error,
654
0
                    XML_ERR_WARNING, NULL, 0,
655
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
656
0
        msg, (const char *) str1, (const char *) str2);
657
0
    }
658
79.8k
}
659
660
/**
661
 * xmlValidityError:
662
 * @ctxt:  an XML parser context
663
 * @error:  the error number
664
 * @msg:  the error message
665
 * @str1:  extra data
666
 *
667
 * Handle a validity error.
668
 */
669
static void LIBXML_ATTR_FORMAT(3,0)
670
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
671
              const char *msg, const xmlChar *str1, const xmlChar *str2)
672
17.5k
{
673
17.5k
    xmlStructuredErrorFunc schannel = NULL;
674
675
17.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676
17.5k
        (ctxt->instate == XML_PARSER_EOF))
677
0
  return;
678
17.5k
    if (ctxt != NULL) {
679
17.5k
  ctxt->errNo = error;
680
17.5k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
681
10.7k
      schannel = ctxt->sax->serror;
682
17.5k
    }
683
17.5k
    if (ctxt != NULL) {
684
17.5k
        __xmlRaiseError(schannel,
685
17.5k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
686
17.5k
                    ctxt, NULL, XML_FROM_DTD, error,
687
17.5k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
688
17.5k
        (const char *) str2, NULL, 0, 0,
689
17.5k
        msg, (const char *) str1, (const char *) str2);
690
17.5k
  ctxt->valid = 0;
691
17.5k
    } else {
692
0
        __xmlRaiseError(schannel, NULL, NULL,
693
0
                    ctxt, NULL, XML_FROM_DTD, error,
694
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
695
0
        (const char *) str2, NULL, 0, 0,
696
0
        msg, (const char *) str1, (const char *) str2);
697
0
    }
698
17.5k
}
699
700
/**
701
 * xmlFatalErrMsgInt:
702
 * @ctxt:  an XML parser context
703
 * @error:  the error number
704
 * @msg:  the error message
705
 * @val:  an integer value
706
 *
707
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
708
 */
709
static void LIBXML_ATTR_FORMAT(3,0)
710
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
711
                  const char *msg, int val)
712
877k
{
713
877k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714
877k
        (ctxt->instate == XML_PARSER_EOF))
715
0
  return;
716
877k
    if (ctxt != NULL)
717
877k
  ctxt->errNo = error;
718
877k
    __xmlRaiseError(NULL, NULL, NULL,
719
877k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
720
877k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
721
877k
    if (ctxt != NULL) {
722
877k
  ctxt->wellFormed = 0;
723
877k
  if (ctxt->recovery == 0)
724
269k
      ctxt->disableSAX = 1;
725
877k
    }
726
877k
}
727
728
/**
729
 * xmlFatalErrMsgStrIntStr:
730
 * @ctxt:  an XML parser context
731
 * @error:  the error number
732
 * @msg:  the error message
733
 * @str1:  an string info
734
 * @val:  an integer value
735
 * @str2:  an string info
736
 *
737
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
738
 */
739
static void LIBXML_ATTR_FORMAT(3,0)
740
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
741
                  const char *msg, const xmlChar *str1, int val,
742
      const xmlChar *str2)
743
672k
{
744
672k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
745
672k
        (ctxt->instate == XML_PARSER_EOF))
746
0
  return;
747
672k
    if (ctxt != NULL)
748
672k
  ctxt->errNo = error;
749
672k
    __xmlRaiseError(NULL, NULL, NULL,
750
672k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
751
672k
                    NULL, 0, (const char *) str1, (const char *) str2,
752
672k
        NULL, val, 0, msg, str1, val, str2);
753
672k
    if (ctxt != NULL) {
754
672k
  ctxt->wellFormed = 0;
755
672k
  if (ctxt->recovery == 0)
756
199k
      ctxt->disableSAX = 1;
757
672k
    }
758
672k
}
759
760
/**
761
 * xmlFatalErrMsgStr:
762
 * @ctxt:  an XML parser context
763
 * @error:  the error number
764
 * @msg:  the error message
765
 * @val:  a string value
766
 *
767
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
768
 */
769
static void LIBXML_ATTR_FORMAT(3,0)
770
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
771
                  const char *msg, const xmlChar * val)
772
706k
{
773
706k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
774
706k
        (ctxt->instate == XML_PARSER_EOF))
775
0
  return;
776
706k
    if (ctxt != NULL)
777
706k
  ctxt->errNo = error;
778
706k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
779
706k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
780
706k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
781
706k
                    val);
782
706k
    if (ctxt != NULL) {
783
706k
  ctxt->wellFormed = 0;
784
706k
  if (ctxt->recovery == 0)
785
199k
      ctxt->disableSAX = 1;
786
706k
    }
787
706k
}
788
789
/**
790
 * xmlErrMsgStr:
791
 * @ctxt:  an XML parser context
792
 * @error:  the error number
793
 * @msg:  the error message
794
 * @val:  a string value
795
 *
796
 * Handle a non fatal parser error
797
 */
798
static void LIBXML_ATTR_FORMAT(3,0)
799
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
800
                  const char *msg, const xmlChar * val)
801
50.6k
{
802
50.6k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
803
50.6k
        (ctxt->instate == XML_PARSER_EOF))
804
0
  return;
805
50.6k
    if (ctxt != NULL)
806
50.6k
  ctxt->errNo = error;
807
50.6k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
808
50.6k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
809
50.6k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
810
50.6k
                    val);
811
50.6k
}
812
813
/**
814
 * xmlNsErr:
815
 * @ctxt:  an XML parser context
816
 * @error:  the error number
817
 * @msg:  the message
818
 * @info1:  extra information string
819
 * @info2:  extra information string
820
 *
821
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
822
 */
823
static void LIBXML_ATTR_FORMAT(3,0)
824
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
825
         const char *msg,
826
         const xmlChar * info1, const xmlChar * info2,
827
         const xmlChar * info3)
828
296k
{
829
296k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
830
296k
        (ctxt->instate == XML_PARSER_EOF))
831
0
  return;
832
296k
    if (ctxt != NULL)
833
296k
  ctxt->errNo = error;
834
296k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
835
296k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
836
296k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
837
296k
                    info1, info2, info3);
838
296k
    if (ctxt != NULL)
839
296k
  ctxt->nsWellFormed = 0;
840
296k
}
841
842
/**
843
 * xmlNsWarn
844
 * @ctxt:  an XML parser context
845
 * @error:  the error number
846
 * @msg:  the message
847
 * @info1:  extra information string
848
 * @info2:  extra information string
849
 *
850
 * Handle a namespace warning error
851
 */
852
static void LIBXML_ATTR_FORMAT(3,0)
853
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
854
         const char *msg,
855
         const xmlChar * info1, const xmlChar * info2,
856
         const xmlChar * info3)
857
6.24k
{
858
6.24k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
859
6.24k
        (ctxt->instate == XML_PARSER_EOF))
860
0
  return;
861
6.24k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
862
6.24k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
863
6.24k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
864
6.24k
                    info1, info2, info3);
865
6.24k
}
866
867
/************************************************************************
868
 *                  *
869
 *    Library wide options          *
870
 *                  *
871
 ************************************************************************/
872
873
/**
874
  * xmlHasFeature:
875
  * @feature: the feature to be examined
876
  *
877
  * Examines if the library has been compiled with a given feature.
878
  *
879
  * Returns a non-zero value if the feature exist, otherwise zero.
880
  * Returns zero (0) if the feature does not exist or an unknown
881
  * unknown feature is requested, non-zero otherwise.
882
  */
883
int
884
xmlHasFeature(xmlFeature feature)
885
0
{
886
0
    switch (feature) {
887
0
  case XML_WITH_THREAD:
888
0
#ifdef LIBXML_THREAD_ENABLED
889
0
      return(1);
890
#else
891
      return(0);
892
#endif
893
0
        case XML_WITH_TREE:
894
0
#ifdef LIBXML_TREE_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_OUTPUT:
900
0
#ifdef LIBXML_OUTPUT_ENABLED
901
0
            return(1);
902
#else
903
            return(0);
904
#endif
905
0
        case XML_WITH_PUSH:
906
0
#ifdef LIBXML_PUSH_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_READER:
912
0
#ifdef LIBXML_READER_ENABLED
913
0
            return(1);
914
#else
915
            return(0);
916
#endif
917
0
        case XML_WITH_PATTERN:
918
0
#ifdef LIBXML_PATTERN_ENABLED
919
0
            return(1);
920
#else
921
            return(0);
922
#endif
923
0
        case XML_WITH_WRITER:
924
0
#ifdef LIBXML_WRITER_ENABLED
925
0
            return(1);
926
#else
927
            return(0);
928
#endif
929
0
        case XML_WITH_SAX1:
930
0
#ifdef LIBXML_SAX1_ENABLED
931
0
            return(1);
932
#else
933
            return(0);
934
#endif
935
0
        case XML_WITH_FTP:
936
#ifdef LIBXML_FTP_ENABLED
937
            return(1);
938
#else
939
0
            return(0);
940
0
#endif
941
0
        case XML_WITH_HTTP:
942
#ifdef LIBXML_HTTP_ENABLED
943
            return(1);
944
#else
945
0
            return(0);
946
0
#endif
947
0
        case XML_WITH_VALID:
948
0
#ifdef LIBXML_VALID_ENABLED
949
0
            return(1);
950
#else
951
            return(0);
952
#endif
953
0
        case XML_WITH_HTML:
954
0
#ifdef LIBXML_HTML_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_LEGACY:
960
#ifdef LIBXML_LEGACY_ENABLED
961
            return(1);
962
#else
963
0
            return(0);
964
0
#endif
965
0
        case XML_WITH_C14N:
966
0
#ifdef LIBXML_C14N_ENABLED
967
0
            return(1);
968
#else
969
            return(0);
970
#endif
971
0
        case XML_WITH_CATALOG:
972
0
#ifdef LIBXML_CATALOG_ENABLED
973
0
            return(1);
974
#else
975
            return(0);
976
#endif
977
0
        case XML_WITH_XPATH:
978
0
#ifdef LIBXML_XPATH_ENABLED
979
0
            return(1);
980
#else
981
            return(0);
982
#endif
983
0
        case XML_WITH_XPTR:
984
0
#ifdef LIBXML_XPTR_ENABLED
985
0
            return(1);
986
#else
987
            return(0);
988
#endif
989
0
        case XML_WITH_XINCLUDE:
990
0
#ifdef LIBXML_XINCLUDE_ENABLED
991
0
            return(1);
992
#else
993
            return(0);
994
#endif
995
0
        case XML_WITH_ICONV:
996
0
#ifdef LIBXML_ICONV_ENABLED
997
0
            return(1);
998
#else
999
            return(0);
1000
#endif
1001
0
        case XML_WITH_ISO8859X:
1002
0
#ifdef LIBXML_ISO8859X_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_UNICODE:
1008
0
#ifdef LIBXML_UNICODE_ENABLED
1009
0
            return(1);
1010
#else
1011
            return(0);
1012
#endif
1013
0
        case XML_WITH_REGEXP:
1014
0
#ifdef LIBXML_REGEXP_ENABLED
1015
0
            return(1);
1016
#else
1017
            return(0);
1018
#endif
1019
0
        case XML_WITH_AUTOMATA:
1020
0
#ifdef LIBXML_AUTOMATA_ENABLED
1021
0
            return(1);
1022
#else
1023
            return(0);
1024
#endif
1025
0
        case XML_WITH_EXPR:
1026
#ifdef LIBXML_EXPR_ENABLED
1027
            return(1);
1028
#else
1029
0
            return(0);
1030
0
#endif
1031
0
        case XML_WITH_SCHEMAS:
1032
0
#ifdef LIBXML_SCHEMAS_ENABLED
1033
0
            return(1);
1034
#else
1035
            return(0);
1036
#endif
1037
0
        case XML_WITH_SCHEMATRON:
1038
0
#ifdef LIBXML_SCHEMATRON_ENABLED
1039
0
            return(1);
1040
#else
1041
            return(0);
1042
#endif
1043
0
        case XML_WITH_MODULES:
1044
0
#ifdef LIBXML_MODULES_ENABLED
1045
0
            return(1);
1046
#else
1047
            return(0);
1048
#endif
1049
0
        case XML_WITH_DEBUG:
1050
#ifdef LIBXML_DEBUG_ENABLED
1051
            return(1);
1052
#else
1053
0
            return(0);
1054
0
#endif
1055
0
        case XML_WITH_DEBUG_MEM:
1056
#ifdef DEBUG_MEMORY_LOCATION
1057
            return(1);
1058
#else
1059
0
            return(0);
1060
0
#endif
1061
0
        case XML_WITH_DEBUG_RUN:
1062
0
            return(0);
1063
0
        case XML_WITH_ZLIB:
1064
0
#ifdef LIBXML_ZLIB_ENABLED
1065
0
            return(1);
1066
#else
1067
            return(0);
1068
#endif
1069
0
        case XML_WITH_LZMA:
1070
0
#ifdef LIBXML_LZMA_ENABLED
1071
0
            return(1);
1072
#else
1073
            return(0);
1074
#endif
1075
0
        case XML_WITH_ICU:
1076
#ifdef LIBXML_ICU_ENABLED
1077
            return(1);
1078
#else
1079
0
            return(0);
1080
0
#endif
1081
0
        default:
1082
0
      break;
1083
0
     }
1084
0
     return(0);
1085
0
}
1086
1087
/************************************************************************
1088
 *                  *
1089
 *    SAX2 defaulted attributes handling      *
1090
 *                  *
1091
 ************************************************************************/
1092
1093
/**
1094
 * xmlDetectSAX2:
1095
 * @ctxt:  an XML parser context
1096
 *
1097
 * Do the SAX2 detection and specific initialization
1098
 */
1099
static void
1100
4.66M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1101
4.66M
    xmlSAXHandlerPtr sax;
1102
1103
    /* Avoid unused variable warning if features are disabled. */
1104
4.66M
    (void) sax;
1105
1106
4.66M
    if (ctxt == NULL) return;
1107
4.66M
    sax = ctxt->sax;
1108
4.66M
#ifdef LIBXML_SAX1_ENABLED
1109
4.66M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1110
4.66M
        ((sax->startElementNs != NULL) ||
1111
2.82M
         (sax->endElementNs != NULL) ||
1112
2.82M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1113
2.82M
        ctxt->sax2 = 1;
1114
#else
1115
    ctxt->sax2 = 1;
1116
#endif /* LIBXML_SAX1_ENABLED */
1117
1118
4.66M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1119
4.66M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1120
4.66M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1121
4.66M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1122
4.66M
    (ctxt->str_xml_ns == NULL)) {
1123
0
        xmlErrMemory(ctxt, NULL);
1124
0
    }
1125
4.66M
}
1126
1127
typedef struct _xmlDefAttrs xmlDefAttrs;
1128
typedef xmlDefAttrs *xmlDefAttrsPtr;
1129
struct _xmlDefAttrs {
1130
    int nbAttrs;  /* number of defaulted attributes on that element */
1131
    int maxAttrs;       /* the size of the array */
1132
#if __STDC_VERSION__ >= 199901L
1133
    /* Using a C99 flexible array member avoids UBSan errors. */
1134
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1135
#else
1136
    const xmlChar *values[5];
1137
#endif
1138
};
1139
1140
/**
1141
 * xmlAttrNormalizeSpace:
1142
 * @src: the source string
1143
 * @dst: the target string
1144
 *
1145
 * Normalize the space in non CDATA attribute values:
1146
 * If the attribute type is not CDATA, then the XML processor MUST further
1147
 * process the normalized attribute value by discarding any leading and
1148
 * trailing space (#x20) characters, and by replacing sequences of space
1149
 * (#x20) characters by a single space (#x20) character.
1150
 * Note that the size of dst need to be at least src, and if one doesn't need
1151
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1152
 * passing src as dst is just fine.
1153
 *
1154
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1155
 *         is needed.
1156
 */
1157
static xmlChar *
1158
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1159
79.4k
{
1160
79.4k
    if ((src == NULL) || (dst == NULL))
1161
0
        return(NULL);
1162
1163
109k
    while (*src == 0x20) src++;
1164
737k
    while (*src != 0) {
1165
657k
  if (*src == 0x20) {
1166
203k
      while (*src == 0x20) src++;
1167
54.9k
      if (*src != 0)
1168
37.9k
    *dst++ = 0x20;
1169
602k
  } else {
1170
602k
      *dst++ = *src++;
1171
602k
  }
1172
657k
    }
1173
79.4k
    *dst = 0;
1174
79.4k
    if (dst == src)
1175
56.3k
       return(NULL);
1176
23.1k
    return(dst);
1177
79.4k
}
1178
1179
/**
1180
 * xmlAttrNormalizeSpace2:
1181
 * @src: the source string
1182
 *
1183
 * Normalize the space in non CDATA attribute values, a slightly more complex
1184
 * front end to avoid allocation problems when running on attribute values
1185
 * coming from the input.
1186
 *
1187
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1188
 *         is needed.
1189
 */
1190
static const xmlChar *
1191
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1192
54.0k
{
1193
54.0k
    int i;
1194
54.0k
    int remove_head = 0;
1195
54.0k
    int need_realloc = 0;
1196
54.0k
    const xmlChar *cur;
1197
1198
54.0k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1199
0
        return(NULL);
1200
54.0k
    i = *len;
1201
54.0k
    if (i <= 0)
1202
2.50k
        return(NULL);
1203
1204
51.5k
    cur = src;
1205
70.0k
    while (*cur == 0x20) {
1206
18.4k
        cur++;
1207
18.4k
  remove_head++;
1208
18.4k
    }
1209
439k
    while (*cur != 0) {
1210
397k
  if (*cur == 0x20) {
1211
33.4k
      cur++;
1212
33.4k
      if ((*cur == 0x20) || (*cur == 0)) {
1213
8.93k
          need_realloc = 1;
1214
8.93k
    break;
1215
8.93k
      }
1216
33.4k
  } else
1217
363k
      cur++;
1218
397k
    }
1219
51.5k
    if (need_realloc) {
1220
8.93k
        xmlChar *ret;
1221
1222
8.93k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1223
8.93k
  if (ret == NULL) {
1224
0
      xmlErrMemory(ctxt, NULL);
1225
0
      return(NULL);
1226
0
  }
1227
8.93k
  xmlAttrNormalizeSpace(ret, ret);
1228
8.93k
  *len = strlen((const char *)ret);
1229
8.93k
        return(ret);
1230
42.6k
    } else if (remove_head) {
1231
1.81k
        *len -= remove_head;
1232
1.81k
        memmove(src, src + remove_head, 1 + *len);
1233
1.81k
  return(src);
1234
1.81k
    }
1235
40.8k
    return(NULL);
1236
51.5k
}
1237
1238
/**
1239
 * xmlAddDefAttrs:
1240
 * @ctxt:  an XML parser context
1241
 * @fullname:  the element fullname
1242
 * @fullattr:  the attribute fullname
1243
 * @value:  the attribute value
1244
 *
1245
 * Add a defaulted attribute for an element
1246
 */
1247
static void
1248
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1249
               const xmlChar *fullname,
1250
               const xmlChar *fullattr,
1251
122k
               const xmlChar *value) {
1252
122k
    xmlDefAttrsPtr defaults;
1253
122k
    int len;
1254
122k
    const xmlChar *name;
1255
122k
    const xmlChar *prefix;
1256
1257
    /*
1258
     * Allows to detect attribute redefinitions
1259
     */
1260
122k
    if (ctxt->attsSpecial != NULL) {
1261
82.1k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1262
29.1k
      return;
1263
82.1k
    }
1264
1265
93.4k
    if (ctxt->attsDefault == NULL) {
1266
45.0k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1267
45.0k
  if (ctxt->attsDefault == NULL)
1268
0
      goto mem_error;
1269
45.0k
    }
1270
1271
    /*
1272
     * split the element name into prefix:localname , the string found
1273
     * are within the DTD and then not associated to namespace names.
1274
     */
1275
93.4k
    name = xmlSplitQName3(fullname, &len);
1276
93.4k
    if (name == NULL) {
1277
66.2k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1278
66.2k
  prefix = NULL;
1279
66.2k
    } else {
1280
27.2k
        name = xmlDictLookup(ctxt->dict, name, -1);
1281
27.2k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1282
27.2k
    }
1283
1284
    /*
1285
     * make sure there is some storage
1286
     */
1287
93.4k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1288
93.4k
    if (defaults == NULL) {
1289
62.1k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1290
62.1k
                     (4 * 5) * sizeof(const xmlChar *));
1291
62.1k
  if (defaults == NULL)
1292
0
      goto mem_error;
1293
62.1k
  defaults->nbAttrs = 0;
1294
62.1k
  defaults->maxAttrs = 4;
1295
62.1k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1296
62.1k
                          defaults, NULL) < 0) {
1297
0
      xmlFree(defaults);
1298
0
      goto mem_error;
1299
0
  }
1300
62.1k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1301
2.58k
        xmlDefAttrsPtr temp;
1302
1303
2.58k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1304
2.58k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1305
2.58k
  if (temp == NULL)
1306
0
      goto mem_error;
1307
2.58k
  defaults = temp;
1308
2.58k
  defaults->maxAttrs *= 2;
1309
2.58k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1310
2.58k
                          defaults, NULL) < 0) {
1311
0
      xmlFree(defaults);
1312
0
      goto mem_error;
1313
0
  }
1314
2.58k
    }
1315
1316
    /*
1317
     * Split the element name into prefix:localname , the string found
1318
     * are within the DTD and hen not associated to namespace names.
1319
     */
1320
93.4k
    name = xmlSplitQName3(fullattr, &len);
1321
93.4k
    if (name == NULL) {
1322
62.6k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1323
62.6k
  prefix = NULL;
1324
62.6k
    } else {
1325
30.7k
        name = xmlDictLookup(ctxt->dict, name, -1);
1326
30.7k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1327
30.7k
    }
1328
1329
93.4k
    defaults->values[5 * defaults->nbAttrs] = name;
1330
93.4k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1331
    /* intern the string and precompute the end */
1332
93.4k
    len = xmlStrlen(value);
1333
93.4k
    value = xmlDictLookup(ctxt->dict, value, len);
1334
93.4k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1335
93.4k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1336
93.4k
    if (ctxt->external)
1337
6.89k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1338
86.5k
    else
1339
86.5k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1340
93.4k
    defaults->nbAttrs++;
1341
1342
93.4k
    return;
1343
1344
0
mem_error:
1345
0
    xmlErrMemory(ctxt, NULL);
1346
0
    return;
1347
93.4k
}
1348
1349
/**
1350
 * xmlAddSpecialAttr:
1351
 * @ctxt:  an XML parser context
1352
 * @fullname:  the element fullname
1353
 * @fullattr:  the attribute fullname
1354
 * @type:  the attribute type
1355
 *
1356
 * Register this attribute type
1357
 */
1358
static void
1359
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1360
      const xmlChar *fullname,
1361
      const xmlChar *fullattr,
1362
      int type)
1363
1.09M
{
1364
1.09M
    if (ctxt->attsSpecial == NULL) {
1365
87.6k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1366
87.6k
  if (ctxt->attsSpecial == NULL)
1367
0
      goto mem_error;
1368
87.6k
    }
1369
1370
1.09M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1371
236k
        return;
1372
1373
862k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1374
862k
                     (void *) (ptrdiff_t) type);
1375
862k
    return;
1376
1377
0
mem_error:
1378
0
    xmlErrMemory(ctxt, NULL);
1379
0
    return;
1380
1.09M
}
1381
1382
/**
1383
 * xmlCleanSpecialAttrCallback:
1384
 *
1385
 * Removes CDATA attributes from the special attribute table
1386
 */
1387
static void
1388
xmlCleanSpecialAttrCallback(void *payload, void *data,
1389
                            const xmlChar *fullname, const xmlChar *fullattr,
1390
858k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1391
858k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1392
1393
858k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1394
396k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1395
396k
    }
1396
858k
}
1397
1398
/**
1399
 * xmlCleanSpecialAttr:
1400
 * @ctxt:  an XML parser context
1401
 *
1402
 * Trim the list of attributes defined to remove all those of type
1403
 * CDATA as they are not special. This call should be done when finishing
1404
 * to parse the DTD and before starting to parse the document root.
1405
 */
1406
static void
1407
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1408
382k
{
1409
382k
    if (ctxt->attsSpecial == NULL)
1410
296k
        return;
1411
1412
86.2k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1413
1414
86.2k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1415
12.4k
        xmlHashFree(ctxt->attsSpecial, NULL);
1416
12.4k
        ctxt->attsSpecial = NULL;
1417
12.4k
    }
1418
86.2k
    return;
1419
382k
}
1420
1421
/**
1422
 * xmlCheckLanguageID:
1423
 * @lang:  pointer to the string value
1424
 *
1425
 * Checks that the value conforms to the LanguageID production:
1426
 *
1427
 * NOTE: this is somewhat deprecated, those productions were removed from
1428
 *       the XML Second edition.
1429
 *
1430
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1431
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1432
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1433
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1434
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1435
 * [38] Subcode ::= ([a-z] | [A-Z])+
1436
 *
1437
 * The current REC reference the successors of RFC 1766, currently 5646
1438
 *
1439
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1440
 * langtag       = language
1441
 *                 ["-" script]
1442
 *                 ["-" region]
1443
 *                 *("-" variant)
1444
 *                 *("-" extension)
1445
 *                 ["-" privateuse]
1446
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1447
 *                 ["-" extlang]       ; sometimes followed by
1448
 *                                     ; extended language subtags
1449
 *               / 4ALPHA              ; or reserved for future use
1450
 *               / 5*8ALPHA            ; or registered language subtag
1451
 *
1452
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1453
 *                 *2("-" 3ALPHA)      ; permanently reserved
1454
 *
1455
 * script        = 4ALPHA              ; ISO 15924 code
1456
 *
1457
 * region        = 2ALPHA              ; ISO 3166-1 code
1458
 *               / 3DIGIT              ; UN M.49 code
1459
 *
1460
 * variant       = 5*8alphanum         ; registered variants
1461
 *               / (DIGIT 3alphanum)
1462
 *
1463
 * extension     = singleton 1*("-" (2*8alphanum))
1464
 *
1465
 *                                     ; Single alphanumerics
1466
 *                                     ; "x" reserved for private use
1467
 * singleton     = DIGIT               ; 0 - 9
1468
 *               / %x41-57             ; A - W
1469
 *               / %x59-5A             ; Y - Z
1470
 *               / %x61-77             ; a - w
1471
 *               / %x79-7A             ; y - z
1472
 *
1473
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1474
 * The parser below doesn't try to cope with extension or privateuse
1475
 * that could be added but that's not interoperable anyway
1476
 *
1477
 * Returns 1 if correct 0 otherwise
1478
 **/
1479
int
1480
xmlCheckLanguageID(const xmlChar * lang)
1481
13.5k
{
1482
13.5k
    const xmlChar *cur = lang, *nxt;
1483
1484
13.5k
    if (cur == NULL)
1485
490
        return (0);
1486
13.0k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1487
13.0k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1488
13.0k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1489
13.0k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1490
        /*
1491
         * Still allow IANA code and user code which were coming
1492
         * from the previous version of the XML-1.0 specification
1493
         * it's deprecated but we should not fail
1494
         */
1495
594
        cur += 2;
1496
5.08k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1497
5.08k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1498
4.48k
            cur++;
1499
594
        return(cur[0] == 0);
1500
594
    }
1501
12.4k
    nxt = cur;
1502
51.0k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1503
51.0k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1504
38.6k
           nxt++;
1505
12.4k
    if (nxt - cur >= 4) {
1506
        /*
1507
         * Reserved
1508
         */
1509
1.06k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1510
619
            return(0);
1511
449
        return(1);
1512
1.06k
    }
1513
11.4k
    if (nxt - cur < 2)
1514
633
        return(0);
1515
    /* we got an ISO 639 code */
1516
10.7k
    if (nxt[0] == 0)
1517
5.13k
        return(1);
1518
5.64k
    if (nxt[0] != '-')
1519
586
        return(0);
1520
1521
5.05k
    nxt++;
1522
5.05k
    cur = nxt;
1523
    /* now we can have extlang or script or region or variant */
1524
5.05k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1525
639
        goto region_m49;
1526
1527
21.7k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1528
21.7k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1529
17.3k
           nxt++;
1530
4.42k
    if (nxt - cur == 4)
1531
1.16k
        goto script;
1532
3.25k
    if (nxt - cur == 2)
1533
882
        goto region;
1534
2.37k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535
226
        goto variant;
1536
2.14k
    if (nxt - cur != 3)
1537
653
        return(0);
1538
    /* we parsed an extlang */
1539
1.49k
    if (nxt[0] == 0)
1540
189
        return(1);
1541
1.30k
    if (nxt[0] != '-')
1542
232
        return(0);
1543
1544
1.07k
    nxt++;
1545
1.07k
    cur = nxt;
1546
    /* now we can have script or region or variant */
1547
1.07k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1548
227
        goto region_m49;
1549
1550
6.12k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1551
6.12k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1552
5.27k
           nxt++;
1553
848
    if (nxt - cur == 2)
1554
176
        goto region;
1555
672
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1556
166
        goto variant;
1557
506
    if (nxt - cur != 4)
1558
311
        return(0);
1559
    /* we parsed a script */
1560
1.35k
script:
1561
1.35k
    if (nxt[0] == 0)
1562
210
        return(1);
1563
1.14k
    if (nxt[0] != '-')
1564
271
        return(0);
1565
1566
877
    nxt++;
1567
877
    cur = nxt;
1568
    /* now we can have region or variant */
1569
877
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1570
205
        goto region_m49;
1571
1572
5.36k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1573
5.36k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1574
4.69k
           nxt++;
1575
1576
672
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1577
171
        goto variant;
1578
501
    if (nxt - cur != 2)
1579
342
        return(0);
1580
    /* we parsed a region */
1581
1.60k
region:
1582
1.60k
    if (nxt[0] == 0)
1583
231
        return(1);
1584
1.36k
    if (nxt[0] != '-')
1585
665
        return(0);
1586
1587
704
    nxt++;
1588
704
    cur = nxt;
1589
    /* now we can just have a variant */
1590
6.56k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1591
6.56k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1592
5.86k
           nxt++;
1593
1594
704
    if ((nxt - cur < 5) || (nxt - cur > 8))
1595
451
        return(0);
1596
1597
    /* we parsed a variant */
1598
816
variant:
1599
816
    if (nxt[0] == 0)
1600
205
        return(1);
1601
611
    if (nxt[0] != '-')
1602
547
        return(0);
1603
    /* extensions and private use subtags not checked */
1604
64
    return (1);
1605
1606
1.07k
region_m49:
1607
1.07k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1608
1.07k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1609
383
        nxt += 3;
1610
383
        goto region;
1611
383
    }
1612
688
    return(0);
1613
1.07k
}
1614
1615
/************************************************************************
1616
 *                  *
1617
 *    Parser stacks related functions and macros    *
1618
 *                  *
1619
 ************************************************************************/
1620
1621
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1622
                                            const xmlChar ** str);
1623
1624
#ifdef SAX2
1625
/**
1626
 * nsPush:
1627
 * @ctxt:  an XML parser context
1628
 * @prefix:  the namespace prefix or NULL
1629
 * @URL:  the namespace name
1630
 *
1631
 * Pushes a new parser namespace on top of the ns stack
1632
 *
1633
 * Returns -1 in case of error, -2 if the namespace should be discarded
1634
 *     and the index in the stack otherwise.
1635
 */
1636
static int
1637
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1638
615k
{
1639
615k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1640
97.8k
        int i;
1641
113k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1642
19.9k
      if (ctxt->nsTab[i] == prefix) {
1643
    /* in scope */
1644
4.12k
          if (ctxt->nsTab[i + 1] == URL)
1645
1.59k
        return(-2);
1646
    /* out of scope keep it */
1647
2.52k
    break;
1648
4.12k
      }
1649
19.9k
  }
1650
97.8k
    }
1651
614k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1652
173k
  ctxt->nsMax = 10;
1653
173k
  ctxt->nsNr = 0;
1654
173k
  ctxt->nsTab = (const xmlChar **)
1655
173k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1656
173k
  if (ctxt->nsTab == NULL) {
1657
0
      xmlErrMemory(ctxt, NULL);
1658
0
      ctxt->nsMax = 0;
1659
0
            return (-1);
1660
0
  }
1661
440k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1662
9.99k
        const xmlChar ** tmp;
1663
9.99k
        ctxt->nsMax *= 2;
1664
9.99k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1665
9.99k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1666
9.99k
        if (tmp == NULL) {
1667
0
            xmlErrMemory(ctxt, NULL);
1668
0
      ctxt->nsMax /= 2;
1669
0
            return (-1);
1670
0
        }
1671
9.99k
  ctxt->nsTab = tmp;
1672
9.99k
    }
1673
614k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1674
614k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1675
614k
    return (ctxt->nsNr);
1676
614k
}
1677
/**
1678
 * nsPop:
1679
 * @ctxt: an XML parser context
1680
 * @nr:  the number to pop
1681
 *
1682
 * Pops the top @nr parser prefix/namespace from the ns stack
1683
 *
1684
 * Returns the number of namespaces removed
1685
 */
1686
static int
1687
nsPop(xmlParserCtxtPtr ctxt, int nr)
1688
67.4k
{
1689
67.4k
    int i;
1690
1691
67.4k
    if (ctxt->nsTab == NULL) return(0);
1692
67.4k
    if (ctxt->nsNr < nr) {
1693
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1694
0
        nr = ctxt->nsNr;
1695
0
    }
1696
67.4k
    if (ctxt->nsNr <= 0)
1697
0
        return (0);
1698
1699
222k
    for (i = 0;i < nr;i++) {
1700
154k
         ctxt->nsNr--;
1701
154k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1702
154k
    }
1703
67.4k
    return(nr);
1704
67.4k
}
1705
#endif
1706
1707
static int
1708
221k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1709
221k
    const xmlChar **atts;
1710
221k
    int *attallocs;
1711
221k
    int maxatts;
1712
1713
221k
    if (ctxt->atts == NULL) {
1714
221k
  maxatts = 55; /* allow for 10 attrs by default */
1715
221k
  atts = (const xmlChar **)
1716
221k
         xmlMalloc(maxatts * sizeof(xmlChar *));
1717
221k
  if (atts == NULL) goto mem_error;
1718
221k
  ctxt->atts = atts;
1719
221k
  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1720
221k
  if (attallocs == NULL) goto mem_error;
1721
221k
  ctxt->attallocs = attallocs;
1722
221k
  ctxt->maxatts = maxatts;
1723
221k
    } else if (nr + 5 > ctxt->maxatts) {
1724
217
  maxatts = (nr + 5) * 2;
1725
217
  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1726
217
             maxatts * sizeof(const xmlChar *));
1727
217
  if (atts == NULL) goto mem_error;
1728
217
  ctxt->atts = atts;
1729
217
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1730
217
                               (maxatts / 5) * sizeof(int));
1731
217
  if (attallocs == NULL) goto mem_error;
1732
217
  ctxt->attallocs = attallocs;
1733
217
  ctxt->maxatts = maxatts;
1734
217
    }
1735
221k
    return(ctxt->maxatts);
1736
0
mem_error:
1737
0
    xmlErrMemory(ctxt, NULL);
1738
0
    return(-1);
1739
221k
}
1740
1741
/**
1742
 * inputPush:
1743
 * @ctxt:  an XML parser context
1744
 * @value:  the parser input
1745
 *
1746
 * Pushes a new parser input on top of the input stack
1747
 *
1748
 * Returns -1 in case of error, the index in the stack otherwise
1749
 */
1750
int
1751
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1752
4.97M
{
1753
4.97M
    if ((ctxt == NULL) || (value == NULL))
1754
0
        return(-1);
1755
4.97M
    if (ctxt->inputNr >= ctxt->inputMax) {
1756
2.26k
        ctxt->inputMax *= 2;
1757
2.26k
        ctxt->inputTab =
1758
2.26k
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1759
2.26k
                                             ctxt->inputMax *
1760
2.26k
                                             sizeof(ctxt->inputTab[0]));
1761
2.26k
        if (ctxt->inputTab == NULL) {
1762
0
            xmlErrMemory(ctxt, NULL);
1763
0
      ctxt->inputMax /= 2;
1764
0
            return (-1);
1765
0
        }
1766
2.26k
    }
1767
4.97M
    ctxt->inputTab[ctxt->inputNr] = value;
1768
4.97M
    ctxt->input = value;
1769
4.97M
    return (ctxt->inputNr++);
1770
4.97M
}
1771
/**
1772
 * inputPop:
1773
 * @ctxt: an XML parser context
1774
 *
1775
 * Pops the top parser input from the input stack
1776
 *
1777
 * Returns the input just removed
1778
 */
1779
xmlParserInputPtr
1780
inputPop(xmlParserCtxtPtr ctxt)
1781
13.9M
{
1782
13.9M
    xmlParserInputPtr ret;
1783
1784
13.9M
    if (ctxt == NULL)
1785
0
        return(NULL);
1786
13.9M
    if (ctxt->inputNr <= 0)
1787
9.06M
        return (NULL);
1788
4.92M
    ctxt->inputNr--;
1789
4.92M
    if (ctxt->inputNr > 0)
1790
545k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1791
4.37M
    else
1792
4.37M
        ctxt->input = NULL;
1793
4.92M
    ret = ctxt->inputTab[ctxt->inputNr];
1794
4.92M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1795
4.92M
    return (ret);
1796
13.9M
}
1797
/**
1798
 * nodePush:
1799
 * @ctxt:  an XML parser context
1800
 * @value:  the element node
1801
 *
1802
 * Pushes a new element node on top of the node stack
1803
 *
1804
 * Returns -1 in case of error, the index in the stack otherwise
1805
 */
1806
int
1807
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1808
14.2M
{
1809
14.2M
    if (ctxt == NULL) return(0);
1810
14.2M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1811
38.8k
        xmlNodePtr *tmp;
1812
1813
38.8k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1814
38.8k
                                      ctxt->nodeMax * 2 *
1815
38.8k
                                      sizeof(ctxt->nodeTab[0]));
1816
38.8k
        if (tmp == NULL) {
1817
0
            xmlErrMemory(ctxt, NULL);
1818
0
            return (-1);
1819
0
        }
1820
38.8k
        ctxt->nodeTab = tmp;
1821
38.8k
  ctxt->nodeMax *= 2;
1822
38.8k
    }
1823
14.2M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1824
14.2M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1825
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1826
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1827
0
        xmlParserMaxDepth);
1828
0
  xmlHaltParser(ctxt);
1829
0
  return(-1);
1830
0
    }
1831
14.2M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1832
14.2M
    ctxt->node = value;
1833
14.2M
    return (ctxt->nodeNr++);
1834
14.2M
}
1835
1836
/**
1837
 * nodePop:
1838
 * @ctxt: an XML parser context
1839
 *
1840
 * Pops the top element node from the node stack
1841
 *
1842
 * Returns the node just removed
1843
 */
1844
xmlNodePtr
1845
nodePop(xmlParserCtxtPtr ctxt)
1846
6.95M
{
1847
6.95M
    xmlNodePtr ret;
1848
1849
6.95M
    if (ctxt == NULL) return(NULL);
1850
6.95M
    if (ctxt->nodeNr <= 0)
1851
84.9k
        return (NULL);
1852
6.87M
    ctxt->nodeNr--;
1853
6.87M
    if (ctxt->nodeNr > 0)
1854
6.51M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1855
361k
    else
1856
361k
        ctxt->node = NULL;
1857
6.87M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1858
6.87M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1859
6.87M
    return (ret);
1860
6.95M
}
1861
1862
/**
1863
 * nameNsPush:
1864
 * @ctxt:  an XML parser context
1865
 * @value:  the element name
1866
 * @prefix:  the element prefix
1867
 * @URI:  the element namespace name
1868
 * @line:  the current line number for error messages
1869
 * @nsNr:  the number of namespaces pushed on the namespace table
1870
 *
1871
 * Pushes a new element name/prefix/URL on top of the name stack
1872
 *
1873
 * Returns -1 in case of error, the index in the stack otherwise
1874
 */
1875
static int
1876
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1877
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1878
10.9M
{
1879
10.9M
    xmlStartTag *tag;
1880
1881
10.9M
    if (ctxt->nameNr >= ctxt->nameMax) {
1882
44.4k
        const xmlChar * *tmp;
1883
44.4k
        xmlStartTag *tmp2;
1884
44.4k
        ctxt->nameMax *= 2;
1885
44.4k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1886
44.4k
                                    ctxt->nameMax *
1887
44.4k
                                    sizeof(ctxt->nameTab[0]));
1888
44.4k
        if (tmp == NULL) {
1889
0
      ctxt->nameMax /= 2;
1890
0
      goto mem_error;
1891
0
        }
1892
44.4k
  ctxt->nameTab = tmp;
1893
44.4k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1894
44.4k
                                    ctxt->nameMax *
1895
44.4k
                                    sizeof(ctxt->pushTab[0]));
1896
44.4k
        if (tmp2 == NULL) {
1897
0
      ctxt->nameMax /= 2;
1898
0
      goto mem_error;
1899
0
        }
1900
44.4k
  ctxt->pushTab = tmp2;
1901
10.9M
    } else if (ctxt->pushTab == NULL) {
1902
3.73M
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1903
3.73M
                                            sizeof(ctxt->pushTab[0]));
1904
3.73M
        if (ctxt->pushTab == NULL)
1905
0
            goto mem_error;
1906
3.73M
    }
1907
10.9M
    ctxt->nameTab[ctxt->nameNr] = value;
1908
10.9M
    ctxt->name = value;
1909
10.9M
    tag = &ctxt->pushTab[ctxt->nameNr];
1910
10.9M
    tag->prefix = prefix;
1911
10.9M
    tag->URI = URI;
1912
10.9M
    tag->line = line;
1913
10.9M
    tag->nsNr = nsNr;
1914
10.9M
    return (ctxt->nameNr++);
1915
0
mem_error:
1916
0
    xmlErrMemory(ctxt, NULL);
1917
0
    return (-1);
1918
10.9M
}
1919
#ifdef LIBXML_PUSH_ENABLED
1920
/**
1921
 * nameNsPop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element/prefix/URI name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
static const xmlChar *
1929
nameNsPop(xmlParserCtxtPtr ctxt)
1930
1.03M
{
1931
1.03M
    const xmlChar *ret;
1932
1933
1.03M
    if (ctxt->nameNr <= 0)
1934
0
        return (NULL);
1935
1.03M
    ctxt->nameNr--;
1936
1.03M
    if (ctxt->nameNr > 0)
1937
990k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
46.6k
    else
1939
46.6k
        ctxt->name = NULL;
1940
1.03M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
1.03M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
1.03M
    return (ret);
1943
1.03M
}
1944
#endif /* LIBXML_PUSH_ENABLED */
1945
1946
/**
1947
 * namePush:
1948
 * @ctxt:  an XML parser context
1949
 * @value:  the element name
1950
 *
1951
 * Pushes a new element name on top of the name stack
1952
 *
1953
 * Returns -1 in case of error, the index in the stack otherwise
1954
 */
1955
int
1956
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1957
0
{
1958
0
    if (ctxt == NULL) return (-1);
1959
1960
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1961
0
        const xmlChar * *tmp;
1962
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1963
0
                                    ctxt->nameMax * 2 *
1964
0
                                    sizeof(ctxt->nameTab[0]));
1965
0
        if (tmp == NULL) {
1966
0
      goto mem_error;
1967
0
        }
1968
0
  ctxt->nameTab = tmp;
1969
0
        ctxt->nameMax *= 2;
1970
0
    }
1971
0
    ctxt->nameTab[ctxt->nameNr] = value;
1972
0
    ctxt->name = value;
1973
0
    return (ctxt->nameNr++);
1974
0
mem_error:
1975
0
    xmlErrMemory(ctxt, NULL);
1976
0
    return (-1);
1977
0
}
1978
/**
1979
 * namePop:
1980
 * @ctxt: an XML parser context
1981
 *
1982
 * Pops the top element name from the name stack
1983
 *
1984
 * Returns the name just removed
1985
 */
1986
const xmlChar *
1987
namePop(xmlParserCtxtPtr ctxt)
1988
5.44M
{
1989
5.44M
    const xmlChar *ret;
1990
1991
5.44M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1992
0
        return (NULL);
1993
5.44M
    ctxt->nameNr--;
1994
5.44M
    if (ctxt->nameNr > 0)
1995
5.20M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1996
236k
    else
1997
236k
        ctxt->name = NULL;
1998
5.44M
    ret = ctxt->nameTab[ctxt->nameNr];
1999
5.44M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2000
5.44M
    return (ret);
2001
5.44M
}
2002
2003
13.3M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2004
13.3M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2005
45.4k
        int *tmp;
2006
2007
45.4k
  ctxt->spaceMax *= 2;
2008
45.4k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2009
45.4k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2010
45.4k
        if (tmp == NULL) {
2011
0
      xmlErrMemory(ctxt, NULL);
2012
0
      ctxt->spaceMax /=2;
2013
0
      return(-1);
2014
0
  }
2015
45.4k
  ctxt->spaceTab = tmp;
2016
45.4k
    }
2017
13.3M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2018
13.3M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2019
13.3M
    return(ctxt->spaceNr++);
2020
13.3M
}
2021
2022
9.21M
static int spacePop(xmlParserCtxtPtr ctxt) {
2023
9.21M
    int ret;
2024
9.21M
    if (ctxt->spaceNr <= 0) return(0);
2025
9.19M
    ctxt->spaceNr--;
2026
9.19M
    if (ctxt->spaceNr > 0)
2027
9.16M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2028
33.7k
    else
2029
33.7k
        ctxt->space = &ctxt->spaceTab[0];
2030
9.19M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2031
9.19M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2032
9.19M
    return(ret);
2033
9.21M
}
2034
2035
/*
2036
 * Macros for accessing the content. Those should be used only by the parser,
2037
 * and not exported.
2038
 *
2039
 * Dirty macros, i.e. one often need to make assumption on the context to
2040
 * use them
2041
 *
2042
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2043
 *           To be used with extreme caution since operations consuming
2044
 *           characters may move the input buffer to a different location !
2045
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2046
 *           This should be used internally by the parser
2047
 *           only to compare to ASCII values otherwise it would break when
2048
 *           running with UTF-8 encoding.
2049
 *   RAW     same as CUR but in the input buffer, bypass any token
2050
 *           extraction that may have been done
2051
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2052
 *           to compare on ASCII based substring.
2053
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2054
 *           strings without newlines within the parser.
2055
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2056
 *           defined char within the parser.
2057
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2058
 *
2059
 *   NEXT    Skip to the next character, this does the proper decoding
2060
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2061
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2062
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2063
 *           to the number of xmlChars used for the encoding [0-5].
2064
 *   CUR_SCHAR  same but operate on a string instead of the context
2065
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2066
 *            the index
2067
 *   GROW, SHRINK  handling of input buffers
2068
 */
2069
2070
209M
#define RAW (*ctxt->input->cur)
2071
88.8M
#define CUR (*ctxt->input->cur)
2072
96.6M
#define NXT(val) ctxt->input->cur[(val)]
2073
19.7M
#define CUR_PTR ctxt->input->cur
2074
577k
#define BASE_PTR ctxt->input->base
2075
2076
#define CMP4( s, c1, c2, c3, c4 ) \
2077
86.4M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2078
43.6M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2079
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2080
77.9M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2081
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2082
67.7M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2083
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2084
63.2M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2085
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2086
57.3M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2087
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2088
26.9M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2089
26.9M
    ((unsigned char *) s)[ 8 ] == c9 )
2090
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2091
258k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2092
258k
    ((unsigned char *) s)[ 9 ] == c10 )
2093
2094
20.4M
#define SKIP(val) do {             \
2095
20.4M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2096
20.4M
    if (*ctxt->input->cur == 0)           \
2097
20.4M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2098
20.4M
  } while (0)
2099
2100
68.1k
#define SKIPL(val) do {             \
2101
68.1k
    int skipl;                \
2102
12.8M
    for(skipl=0; skipl<val; skipl++) {         \
2103
12.8M
  if (*(ctxt->input->cur) == '\n') {       \
2104
81.5k
  ctxt->input->line++; ctxt->input->col = 1;      \
2105
12.7M
  } else ctxt->input->col++;         \
2106
12.8M
  ctxt->input->cur++;           \
2107
12.8M
    }                  \
2108
68.1k
    if (*ctxt->input->cur == 0)           \
2109
68.1k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2110
68.1k
  } while (0)
2111
2112
57.9M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2113
57.9M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2114
57.9M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2115
57.9M
  xmlSHRINK (ctxt);
2116
2117
141k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2118
141k
    xmlParserInputShrink(ctxt->input);
2119
141k
    if (*ctxt->input->cur == 0)
2120
4.54k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2121
141k
}
2122
2123
245M
#define GROW if ((ctxt->progressive == 0) &&       \
2124
245M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2125
245M
  xmlGROW (ctxt);
2126
2127
48.2M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2128
48.2M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2129
48.2M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2130
2131
48.2M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2132
48.2M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2133
48.2M
         ((ctxt->input->buf) &&
2134
0
          (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2135
48.2M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2136
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2137
0
        xmlHaltParser(ctxt);
2138
0
  return;
2139
0
    }
2140
48.2M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2141
48.2M
    if ((ctxt->input->cur > ctxt->input->end) ||
2142
48.2M
        (ctxt->input->cur < ctxt->input->base)) {
2143
0
        xmlHaltParser(ctxt);
2144
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2145
0
  return;
2146
0
    }
2147
48.2M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2148
3.94M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2149
48.2M
}
2150
2151
73.7M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2152
2153
100M
#define NEXT xmlNextChar(ctxt)
2154
2155
22.7M
#define NEXT1 {               \
2156
22.7M
  ctxt->input->col++;           \
2157
22.7M
  ctxt->input->cur++;           \
2158
22.7M
  if (*ctxt->input->cur == 0)         \
2159
22.7M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2160
22.7M
    }
2161
2162
186M
#define NEXTL(l) do {             \
2163
186M
    if (*(ctxt->input->cur) == '\n') {         \
2164
2.43M
  ctxt->input->line++; ctxt->input->col = 1;      \
2165
184M
    } else ctxt->input->col++;           \
2166
186M
    ctxt->input->cur += l;        \
2167
186M
  } while (0)
2168
2169
191M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2170
177M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2171
2172
#define COPY_BUF(l,b,i,v)           \
2173
332M
    if (l == 1) b[i++] = v;           \
2174
332M
    else i += xmlCopyCharMultiByte(&b[i],v)
2175
2176
#define CUR_CONSUMED \
2177
87.8M
    (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2178
2179
/**
2180
 * xmlSkipBlankChars:
2181
 * @ctxt:  the XML parser context
2182
 *
2183
 * skip all blanks character found at that point in the input streams.
2184
 * It pops up finished entities in the process if allowable at that point.
2185
 *
2186
 * Returns the number of space chars skipped
2187
 */
2188
2189
int
2190
73.7M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2191
73.7M
    int res = 0;
2192
2193
    /*
2194
     * It's Okay to use CUR/NEXT here since all the blanks are on
2195
     * the ASCII range.
2196
     */
2197
73.7M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2198
73.7M
        (ctxt->instate == XML_PARSER_START)) {
2199
48.3M
  const xmlChar *cur;
2200
  /*
2201
   * if we are in the document content, go really fast
2202
   */
2203
48.3M
  cur = ctxt->input->cur;
2204
48.3M
  while (IS_BLANK_CH(*cur)) {
2205
35.6M
      if (*cur == '\n') {
2206
1.52M
    ctxt->input->line++; ctxt->input->col = 1;
2207
34.0M
      } else {
2208
34.0M
    ctxt->input->col++;
2209
34.0M
      }
2210
35.6M
      cur++;
2211
35.6M
      if (res < INT_MAX)
2212
35.6M
    res++;
2213
35.6M
      if (*cur == 0) {
2214
136k
    ctxt->input->cur = cur;
2215
136k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2216
136k
    cur = ctxt->input->cur;
2217
136k
      }
2218
35.6M
  }
2219
48.3M
  ctxt->input->cur = cur;
2220
48.3M
    } else {
2221
25.3M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2222
2223
75.2M
  while (1) {
2224
75.2M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2225
48.8M
    NEXT;
2226
48.8M
      } else if (CUR == '%') {
2227
                /*
2228
                 * Need to handle support of entities branching here
2229
                 */
2230
1.12M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2231
560k
                    break;
2232
567k
          xmlParsePEReference(ctxt);
2233
25.2M
            } else if (CUR == 0) {
2234
455k
                if (ctxt->inputNr <= 1)
2235
44.8k
                    break;
2236
410k
                xmlPopInput(ctxt);
2237
24.7M
            } else {
2238
24.7M
                break;
2239
24.7M
            }
2240
2241
            /*
2242
             * Also increase the counter when entering or exiting a PERef.
2243
             * The spec says: "When a parameter-entity reference is recognized
2244
             * in the DTD and included, its replacement text MUST be enlarged
2245
             * by the attachment of one leading and one following space (#x20)
2246
             * character."
2247
             */
2248
49.8M
      if (res < INT_MAX)
2249
49.8M
    res++;
2250
49.8M
        }
2251
25.3M
    }
2252
73.7M
    return(res);
2253
73.7M
}
2254
2255
/************************************************************************
2256
 *                  *
2257
 *    Commodity functions to handle entities      *
2258
 *                  *
2259
 ************************************************************************/
2260
2261
/**
2262
 * xmlPopInput:
2263
 * @ctxt:  an XML parser context
2264
 *
2265
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2266
 *          pop it and return the next char.
2267
 *
2268
 * Returns the current xmlChar in the parser context
2269
 */
2270
xmlChar
2271
423k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2272
423k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2273
423k
    if (xmlParserDebugEntities)
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Popping input %d\n", ctxt->inputNr);
2276
423k
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2277
423k
        (ctxt->instate != XML_PARSER_EOF))
2278
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2279
0
                    "Unfinished entity outside the DTD");
2280
423k
    xmlFreeInputStream(inputPop(ctxt));
2281
423k
    if (*ctxt->input->cur == 0)
2282
316
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2283
423k
    return(CUR);
2284
423k
}
2285
2286
/**
2287
 * xmlPushInput:
2288
 * @ctxt:  an XML parser context
2289
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2290
 *
2291
 * xmlPushInput: switch to a new input stream which is stacked on top
2292
 *               of the previous one(s).
2293
 * Returns -1 in case of error or the index in the input stack
2294
 */
2295
int
2296
602k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2297
602k
    int ret;
2298
602k
    if (input == NULL) return(-1);
2299
2300
601k
    if (xmlParserDebugEntities) {
2301
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2302
0
      xmlGenericError(xmlGenericErrorContext,
2303
0
        "%s(%d): ", ctxt->input->filename,
2304
0
        ctxt->input->line);
2305
0
  xmlGenericError(xmlGenericErrorContext,
2306
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2307
0
    }
2308
601k
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2309
601k
        (ctxt->inputNr > 1024)) {
2310
463
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2311
119k
        while (ctxt->inputNr > 1)
2312
118k
            xmlFreeInputStream(inputPop(ctxt));
2313
463
  return(-1);
2314
463
    }
2315
600k
    ret = inputPush(ctxt, input);
2316
600k
    if (ctxt->instate == XML_PARSER_EOF)
2317
0
        return(-1);
2318
600k
    GROW;
2319
600k
    return(ret);
2320
600k
}
2321
2322
/**
2323
 * xmlParseCharRef:
2324
 * @ctxt:  an XML parser context
2325
 *
2326
 * DEPRECATED: Internal function, don't use.
2327
 *
2328
 * parse Reference declarations
2329
 *
2330
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2331
 *                  '&#x' [0-9a-fA-F]+ ';'
2332
 *
2333
 * [ WFC: Legal Character ]
2334
 * Characters referred to using character references must match the
2335
 * production for Char.
2336
 *
2337
 * Returns the value parsed (as an int), 0 in case of error
2338
 */
2339
int
2340
472k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2341
472k
    int val = 0;
2342
472k
    int count = 0;
2343
2344
    /*
2345
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2346
     */
2347
472k
    if ((RAW == '&') && (NXT(1) == '#') &&
2348
472k
        (NXT(2) == 'x')) {
2349
165k
  SKIP(3);
2350
165k
  GROW;
2351
1.02M
  while (RAW != ';') { /* loop blocked by count */
2352
871k
      if (count++ > 20) {
2353
59.0k
    count = 0;
2354
59.0k
    GROW;
2355
59.0k
                if (ctxt->instate == XML_PARSER_EOF)
2356
0
                    return(0);
2357
59.0k
      }
2358
871k
      if ((RAW >= '0') && (RAW <= '9'))
2359
733k
          val = val * 16 + (CUR - '0');
2360
138k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2361
124k
          val = val * 16 + (CUR - 'a') + 10;
2362
13.6k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2363
6.11k
          val = val * 16 + (CUR - 'A') + 10;
2364
7.51k
      else {
2365
7.51k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2366
7.51k
    val = 0;
2367
7.51k
    break;
2368
7.51k
      }
2369
864k
      if (val > 0x110000)
2370
646k
          val = 0x110000;
2371
2372
864k
      NEXT;
2373
864k
      count++;
2374
864k
  }
2375
165k
  if (RAW == ';') {
2376
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2377
157k
      ctxt->input->col++;
2378
157k
      ctxt->input->cur++;
2379
157k
  }
2380
306k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2381
306k
  SKIP(2);
2382
306k
  GROW;
2383
1.61M
  while (RAW != ';') { /* loop blocked by count */
2384
1.33M
      if (count++ > 20) {
2385
48.4k
    count = 0;
2386
48.4k
    GROW;
2387
48.4k
                if (ctxt->instate == XML_PARSER_EOF)
2388
0
                    return(0);
2389
48.4k
      }
2390
1.33M
      if ((RAW >= '0') && (RAW <= '9'))
2391
1.30M
          val = val * 10 + (CUR - '0');
2392
26.8k
      else {
2393
26.8k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2394
26.8k
    val = 0;
2395
26.8k
    break;
2396
26.8k
      }
2397
1.30M
      if (val > 0x110000)
2398
528k
          val = 0x110000;
2399
2400
1.30M
      NEXT;
2401
1.30M
      count++;
2402
1.30M
  }
2403
306k
  if (RAW == ';') {
2404
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2405
280k
      ctxt->input->col++;
2406
280k
      ctxt->input->cur++;
2407
280k
  }
2408
306k
    } else {
2409
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2410
0
    }
2411
2412
    /*
2413
     * [ WFC: Legal Character ]
2414
     * Characters referred to using character references must match the
2415
     * production for Char.
2416
     */
2417
472k
    if (val >= 0x110000) {
2418
1.16k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2419
1.16k
                "xmlParseCharRef: character reference out of bounds\n",
2420
1.16k
          val);
2421
471k
    } else if (IS_CHAR(val)) {
2422
434k
        return(val);
2423
434k
    } else {
2424
36.3k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2425
36.3k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2426
36.3k
                    val);
2427
36.3k
    }
2428
37.5k
    return(0);
2429
472k
}
2430
2431
/**
2432
 * xmlParseStringCharRef:
2433
 * @ctxt:  an XML parser context
2434
 * @str:  a pointer to an index in the string
2435
 *
2436
 * parse Reference declarations, variant parsing from a string rather
2437
 * than an an input flow.
2438
 *
2439
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2440
 *                  '&#x' [0-9a-fA-F]+ ';'
2441
 *
2442
 * [ WFC: Legal Character ]
2443
 * Characters referred to using character references must match the
2444
 * production for Char.
2445
 *
2446
 * Returns the value parsed (as an int), 0 in case of error, str will be
2447
 *         updated to the current value of the index
2448
 */
2449
static int
2450
339k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2451
339k
    const xmlChar *ptr;
2452
339k
    xmlChar cur;
2453
339k
    int val = 0;
2454
2455
339k
    if ((str == NULL) || (*str == NULL)) return(0);
2456
339k
    ptr = *str;
2457
339k
    cur = *ptr;
2458
339k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2459
71.6k
  ptr += 3;
2460
71.6k
  cur = *ptr;
2461
169k
  while (cur != ';') { /* Non input consuming loop */
2462
100k
      if ((cur >= '0') && (cur <= '9'))
2463
31.7k
          val = val * 16 + (cur - '0');
2464
69.1k
      else if ((cur >= 'a') && (cur <= 'f'))
2465
8.28k
          val = val * 16 + (cur - 'a') + 10;
2466
60.8k
      else if ((cur >= 'A') && (cur <= 'F'))
2467
58.0k
          val = val * 16 + (cur - 'A') + 10;
2468
2.85k
      else {
2469
2.85k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2470
2.85k
    val = 0;
2471
2.85k
    break;
2472
2.85k
      }
2473
98.0k
      if (val > 0x110000)
2474
12.3k
          val = 0x110000;
2475
2476
98.0k
      ptr++;
2477
98.0k
      cur = *ptr;
2478
98.0k
  }
2479
71.6k
  if (cur == ';')
2480
68.7k
      ptr++;
2481
267k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2482
267k
  ptr += 2;
2483
267k
  cur = *ptr;
2484
948k
  while (cur != ';') { /* Non input consuming loops */
2485
683k
      if ((cur >= '0') && (cur <= '9'))
2486
680k
          val = val * 10 + (cur - '0');
2487
3.09k
      else {
2488
3.09k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2489
3.09k
    val = 0;
2490
3.09k
    break;
2491
3.09k
      }
2492
680k
      if (val > 0x110000)
2493
6.99k
          val = 0x110000;
2494
2495
680k
      ptr++;
2496
680k
      cur = *ptr;
2497
680k
  }
2498
267k
  if (cur == ';')
2499
264k
      ptr++;
2500
267k
    } else {
2501
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2502
0
  return(0);
2503
0
    }
2504
339k
    *str = ptr;
2505
2506
    /*
2507
     * [ WFC: Legal Character ]
2508
     * Characters referred to using character references must match the
2509
     * production for Char.
2510
     */
2511
339k
    if (val >= 0x110000) {
2512
338
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2513
338
                "xmlParseStringCharRef: character reference out of bounds\n",
2514
338
                val);
2515
338k
    } else if (IS_CHAR(val)) {
2516
332k
        return(val);
2517
332k
    } else {
2518
6.53k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2519
6.53k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2520
6.53k
        val);
2521
6.53k
    }
2522
6.87k
    return(0);
2523
339k
}
2524
2525
/**
2526
 * xmlParserHandlePEReference:
2527
 * @ctxt:  the parser context
2528
 *
2529
 * [69] PEReference ::= '%' Name ';'
2530
 *
2531
 * [ WFC: No Recursion ]
2532
 * A parsed entity must not contain a recursive
2533
 * reference to itself, either directly or indirectly.
2534
 *
2535
 * [ WFC: Entity Declared ]
2536
 * In a document without any DTD, a document with only an internal DTD
2537
 * subset which contains no parameter entity references, or a document
2538
 * with "standalone='yes'", ...  ... The declaration of a parameter
2539
 * entity must precede any reference to it...
2540
 *
2541
 * [ VC: Entity Declared ]
2542
 * In a document with an external subset or external parameter entities
2543
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2544
 * must precede any reference to it...
2545
 *
2546
 * [ WFC: In DTD ]
2547
 * Parameter-entity references may only appear in the DTD.
2548
 * NOTE: misleading but this is handled.
2549
 *
2550
 * A PEReference may have been detected in the current input stream
2551
 * the handling is done accordingly to
2552
 *      http://www.w3.org/TR/REC-xml#entproc
2553
 * i.e.
2554
 *   - Included in literal in entity values
2555
 *   - Included as Parameter Entity reference within DTDs
2556
 */
2557
void
2558
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2559
0
    switch(ctxt->instate) {
2560
0
  case XML_PARSER_CDATA_SECTION:
2561
0
      return;
2562
0
        case XML_PARSER_COMMENT:
2563
0
      return;
2564
0
  case XML_PARSER_START_TAG:
2565
0
      return;
2566
0
  case XML_PARSER_END_TAG:
2567
0
      return;
2568
0
        case XML_PARSER_EOF:
2569
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2570
0
      return;
2571
0
        case XML_PARSER_PROLOG:
2572
0
  case XML_PARSER_START:
2573
0
  case XML_PARSER_MISC:
2574
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2575
0
      return;
2576
0
  case XML_PARSER_ENTITY_DECL:
2577
0
        case XML_PARSER_CONTENT:
2578
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2579
0
        case XML_PARSER_PI:
2580
0
  case XML_PARSER_SYSTEM_LITERAL:
2581
0
  case XML_PARSER_PUBLIC_LITERAL:
2582
      /* we just ignore it there */
2583
0
      return;
2584
0
        case XML_PARSER_EPILOG:
2585
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2586
0
      return;
2587
0
  case XML_PARSER_ENTITY_VALUE:
2588
      /*
2589
       * NOTE: in the case of entity values, we don't do the
2590
       *       substitution here since we need the literal
2591
       *       entity value to be able to save the internal
2592
       *       subset of the document.
2593
       *       This will be handled by xmlStringDecodeEntities
2594
       */
2595
0
      return;
2596
0
        case XML_PARSER_DTD:
2597
      /*
2598
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2599
       * In the internal DTD subset, parameter-entity references
2600
       * can occur only where markup declarations can occur, not
2601
       * within markup declarations.
2602
       * In that case this is handled in xmlParseMarkupDecl
2603
       */
2604
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2605
0
    return;
2606
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2607
0
    return;
2608
0
            break;
2609
0
        case XML_PARSER_IGNORE:
2610
0
            return;
2611
0
    }
2612
2613
0
    xmlParsePEReference(ctxt);
2614
0
}
2615
2616
/*
2617
 * Macro used to grow the current buffer.
2618
 * buffer##_size is expected to be a size_t
2619
 * mem_error: is expected to handle memory allocation failures
2620
 */
2621
224k
#define growBuffer(buffer, n) {           \
2622
224k
    xmlChar *tmp;             \
2623
224k
    size_t new_size = buffer##_size * 2 + n;                            \
2624
224k
    if (new_size < buffer##_size) goto mem_error;                       \
2625
224k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2626
224k
    if (tmp == NULL) goto mem_error;         \
2627
224k
    buffer = tmp;             \
2628
224k
    buffer##_size = new_size;                                           \
2629
224k
}
2630
2631
/**
2632
 * xmlStringLenDecodeEntities:
2633
 * @ctxt:  the parser context
2634
 * @str:  the input string
2635
 * @len: the string length
2636
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637
 * @end:  an end marker xmlChar, 0 if none
2638
 * @end2:  an end marker xmlChar, 0 if none
2639
 * @end3:  an end marker xmlChar, 0 if none
2640
 *
2641
 * Takes a entity string content and process to do the adequate substitutions.
2642
 *
2643
 * [67] Reference ::= EntityRef | CharRef
2644
 *
2645
 * [69] PEReference ::= '%' Name ';'
2646
 *
2647
 * Returns A newly allocated string with the substitution done. The caller
2648
 *      must deallocate it !
2649
 */
2650
xmlChar *
2651
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2652
2.33M
          int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2653
2.33M
    xmlChar *buffer = NULL;
2654
2.33M
    size_t buffer_size = 0;
2655
2.33M
    size_t nbchars = 0;
2656
2657
2.33M
    xmlChar *current = NULL;
2658
2.33M
    xmlChar *rep = NULL;
2659
2.33M
    const xmlChar *last;
2660
2.33M
    xmlEntityPtr ent;
2661
2.33M
    int c,l;
2662
2663
2.33M
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2664
0
  return(NULL);
2665
2.33M
    last = str + len;
2666
2667
2.33M
    if (((ctxt->depth > 40) &&
2668
2.33M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2669
2.33M
  (ctxt->depth > 1024)) {
2670
1.43k
  xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2671
1.43k
  return(NULL);
2672
1.43k
    }
2673
2674
    /*
2675
     * allocate a translation buffer.
2676
     */
2677
2.33M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2678
2.33M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2679
2.33M
    if (buffer == NULL) goto mem_error;
2680
2681
    /*
2682
     * OK loop until we reach one of the ending char or a size limit.
2683
     * we are operating on already parsed values.
2684
     */
2685
2.33M
    if (str < last)
2686
2.13M
  c = CUR_SCHAR(str, l);
2687
198k
    else
2688
198k
        c = 0;
2689
162M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2690
162M
           (c != end2) && (c != end3) &&
2691
162M
           (ctxt->instate != XML_PARSER_EOF)) {
2692
2693
160M
  if (c == 0) break;
2694
160M
        if ((c == '&') && (str[1] == '#')) {
2695
339k
      int val = xmlParseStringCharRef(ctxt, &str);
2696
339k
      if (val == 0)
2697
6.87k
                goto int_error;
2698
332k
      COPY_BUF(0,buffer,nbchars,val);
2699
332k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2700
1.35k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2701
1.35k
      }
2702
159M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2703
193k
      if (xmlParserDebugEntities)
2704
0
    xmlGenericError(xmlGenericErrorContext,
2705
0
      "String decoding Entity Reference: %.30s\n",
2706
0
      str);
2707
193k
      ent = xmlParseStringEntityRef(ctxt, &str);
2708
193k
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2709
193k
      if (ent != NULL)
2710
158k
          ctxt->nbentities += ent->checked / 2;
2711
193k
      if ((ent != NULL) &&
2712
193k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2713
9.89k
    if (ent->content != NULL) {
2714
9.89k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2715
9.89k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2716
38
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2717
38
        }
2718
9.89k
    } else {
2719
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2720
0
          "predefined entity has no content\n");
2721
0
                    goto int_error;
2722
0
    }
2723
183k
      } else if ((ent != NULL) && (ent->content != NULL)) {
2724
147k
    ctxt->depth++;
2725
147k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2726
147k
                            0, 0, 0);
2727
147k
    ctxt->depth--;
2728
147k
    if (rep == NULL) {
2729
106k
                    ent->content[0] = 0;
2730
106k
                    goto int_error;
2731
106k
                }
2732
2733
40.8k
                current = rep;
2734
399k
                while (*current != 0) { /* non input consuming loop */
2735
358k
                    buffer[nbchars++] = *current++;
2736
358k
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2737
148
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2738
0
                            goto int_error;
2739
444
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2740
444
                    }
2741
358k
                }
2742
40.8k
                xmlFree(rep);
2743
40.8k
                rep = NULL;
2744
40.8k
      } else if (ent != NULL) {
2745
1.56k
    int i = xmlStrlen(ent->name);
2746
1.56k
    const xmlChar *cur = ent->name;
2747
2748
1.56k
    buffer[nbchars++] = '&';
2749
1.56k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2750
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2751
0
    }
2752
6.25k
    for (;i > 0;i--)
2753
4.68k
        buffer[nbchars++] = *cur++;
2754
1.56k
    buffer[nbchars++] = ';';
2755
1.56k
      }
2756
159M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2757
826k
      if (xmlParserDebugEntities)
2758
0
    xmlGenericError(xmlGenericErrorContext,
2759
0
      "String decoding PE Reference: %.30s\n", str);
2760
826k
      ent = xmlParseStringPEReference(ctxt, &str);
2761
826k
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2762
826k
      if (ent != NULL)
2763
549k
          ctxt->nbentities += ent->checked / 2;
2764
826k
      if (ent != NULL) {
2765
549k
                if (ent->content == NULL) {
2766
        /*
2767
         * Note: external parsed entities will not be loaded,
2768
         * it is not required for a non-validating parser to
2769
         * complete external PEReferences coming from the
2770
         * internal subset
2771
         */
2772
5.88k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2773
5.88k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2774
5.88k
      (ctxt->validate != 0)) {
2775
5.63k
      xmlLoadEntityContent(ctxt, ent);
2776
5.63k
        } else {
2777
246
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2778
246
      "not validating will not read content for PE entity %s\n",
2779
246
                          ent->name, NULL);
2780
246
        }
2781
5.88k
    }
2782
549k
    ctxt->depth++;
2783
549k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2784
549k
                            0, 0, 0);
2785
549k
    ctxt->depth--;
2786
549k
    if (rep == NULL) {
2787
73.4k
                    if (ent->content != NULL)
2788
70.8k
                        ent->content[0] = 0;
2789
73.4k
                    goto int_error;
2790
73.4k
                }
2791
476k
                current = rep;
2792
21.3M
                while (*current != 0) { /* non input consuming loop */
2793
20.8M
                    buffer[nbchars++] = *current++;
2794
20.8M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2795
34.5k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2796
219
                            goto int_error;
2797
103k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798
103k
                    }
2799
20.8M
                }
2800
475k
                xmlFree(rep);
2801
475k
                rep = NULL;
2802
475k
      }
2803
158M
  } else {
2804
158M
      COPY_BUF(l,buffer,nbchars,c);
2805
158M
      str += l;
2806
158M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2807
299k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2808
299k
      }
2809
158M
  }
2810
159M
  if (str < last)
2811
157M
      c = CUR_SCHAR(str, l);
2812
1.94M
  else
2813
1.94M
      c = 0;
2814
159M
    }
2815
2.14M
    buffer[nbchars] = 0;
2816
2.14M
    return(buffer);
2817
2818
0
mem_error:
2819
0
    xmlErrMemory(ctxt, NULL);
2820
186k
int_error:
2821
186k
    if (rep != NULL)
2822
219
        xmlFree(rep);
2823
186k
    if (buffer != NULL)
2824
186k
        xmlFree(buffer);
2825
186k
    return(NULL);
2826
0
}
2827
2828
/**
2829
 * xmlStringDecodeEntities:
2830
 * @ctxt:  the parser context
2831
 * @str:  the input string
2832
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2833
 * @end:  an end marker xmlChar, 0 if none
2834
 * @end2:  an end marker xmlChar, 0 if none
2835
 * @end3:  an end marker xmlChar, 0 if none
2836
 *
2837
 * Takes a entity string content and process to do the adequate substitutions.
2838
 *
2839
 * [67] Reference ::= EntityRef | CharRef
2840
 *
2841
 * [69] PEReference ::= '%' Name ';'
2842
 *
2843
 * Returns A newly allocated string with the substitution done. The caller
2844
 *      must deallocate it !
2845
 */
2846
xmlChar *
2847
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2848
2.32M
            xmlChar end, xmlChar  end2, xmlChar end3) {
2849
2.32M
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2850
2.31M
    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2851
2.31M
           end, end2, end3));
2852
2.32M
}
2853
2854
/************************************************************************
2855
 *                  *
2856
 *    Commodity functions, cleanup needed ?     *
2857
 *                  *
2858
 ************************************************************************/
2859
2860
/**
2861
 * areBlanks:
2862
 * @ctxt:  an XML parser context
2863
 * @str:  a xmlChar *
2864
 * @len:  the size of @str
2865
 * @blank_chars: we know the chars are blanks
2866
 *
2867
 * Is this a sequence of blank chars that one can ignore ?
2868
 *
2869
 * Returns 1 if ignorable 0 otherwise.
2870
 */
2871
2872
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2873
4.09M
                     int blank_chars) {
2874
4.09M
    int i, ret;
2875
4.09M
    xmlNodePtr lastChild;
2876
2877
    /*
2878
     * Don't spend time trying to differentiate them, the same callback is
2879
     * used !
2880
     */
2881
4.09M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2882
231k
  return(0);
2883
2884
    /*
2885
     * Check for xml:space value.
2886
     */
2887
3.86M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2888
3.86M
        (*(ctxt->space) == -2))
2889
851k
  return(0);
2890
2891
    /*
2892
     * Check that the string is made of blanks
2893
     */
2894
3.01M
    if (blank_chars == 0) {
2895
6.54M
  for (i = 0;i < len;i++)
2896
5.82M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2897
1.03M
    }
2898
2899
    /*
2900
     * Look if the element is mixed content in the DTD if available
2901
     */
2902
2.69M
    if (ctxt->node == NULL) return(0);
2903
2.61M
    if (ctxt->myDoc != NULL) {
2904
2.61M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2905
2.61M
        if (ret == 0) return(1);
2906
2.54M
        if (ret == 1) return(0);
2907
2.54M
    }
2908
2909
    /*
2910
     * Otherwise, heuristic :-\
2911
     */
2912
2.54M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2913
2.49M
    if ((ctxt->node->children == NULL) &&
2914
2.49M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2915
2916
2.49M
    lastChild = xmlGetLastChild(ctxt->node);
2917
2.49M
    if (lastChild == NULL) {
2918
789k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2919
789k
            (ctxt->node->content != NULL)) return(0);
2920
1.70M
    } else if (xmlNodeIsText(lastChild))
2921
52.7k
        return(0);
2922
1.64M
    else if ((ctxt->node->children != NULL) &&
2923
1.64M
             (xmlNodeIsText(ctxt->node->children)))
2924
10.0k
        return(0);
2925
2.42M
    return(1);
2926
2.49M
}
2927
2928
/************************************************************************
2929
 *                  *
2930
 *    Extra stuff for namespace support     *
2931
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2932
 *                  *
2933
 ************************************************************************/
2934
2935
/**
2936
 * xmlSplitQName:
2937
 * @ctxt:  an XML parser context
2938
 * @name:  an XML parser context
2939
 * @prefix:  a xmlChar **
2940
 *
2941
 * parse an UTF8 encoded XML qualified name string
2942
 *
2943
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2944
 *
2945
 * [NS 6] Prefix ::= NCName
2946
 *
2947
 * [NS 7] LocalPart ::= NCName
2948
 *
2949
 * Returns the local part, and prefix is updated
2950
 *   to get the Prefix if any.
2951
 */
2952
2953
xmlChar *
2954
8.51M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2955
8.51M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2956
8.51M
    xmlChar *buffer = NULL;
2957
8.51M
    int len = 0;
2958
8.51M
    int max = XML_MAX_NAMELEN;
2959
8.51M
    xmlChar *ret = NULL;
2960
8.51M
    const xmlChar *cur = name;
2961
8.51M
    int c;
2962
2963
8.51M
    if (prefix == NULL) return(NULL);
2964
8.51M
    *prefix = NULL;
2965
2966
8.51M
    if (cur == NULL) return(NULL);
2967
2968
#ifndef XML_XML_NAMESPACE
2969
    /* xml: prefix is not really a namespace */
2970
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2971
        (cur[2] == 'l') && (cur[3] == ':'))
2972
  return(xmlStrdup(name));
2973
#endif
2974
2975
    /* nasty but well=formed */
2976
8.51M
    if (cur[0] == ':')
2977
5.23k
  return(xmlStrdup(name));
2978
2979
8.51M
    c = *cur++;
2980
38.4M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2981
29.9M
  buf[len++] = c;
2982
29.9M
  c = *cur++;
2983
29.9M
    }
2984
8.51M
    if (len >= max) {
2985
  /*
2986
   * Okay someone managed to make a huge name, so he's ready to pay
2987
   * for the processing speed.
2988
   */
2989
7.56k
  max = len * 2;
2990
2991
7.56k
  buffer = (xmlChar *) xmlMallocAtomic(max);
2992
7.56k
  if (buffer == NULL) {
2993
0
      xmlErrMemory(ctxt, NULL);
2994
0
      return(NULL);
2995
0
  }
2996
7.56k
  memcpy(buffer, buf, len);
2997
11.5M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2998
11.5M
      if (len + 10 > max) {
2999
12.2k
          xmlChar *tmp;
3000
3001
12.2k
    max *= 2;
3002
12.2k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3003
12.2k
    if (tmp == NULL) {
3004
0
        xmlFree(buffer);
3005
0
        xmlErrMemory(ctxt, NULL);
3006
0
        return(NULL);
3007
0
    }
3008
12.2k
    buffer = tmp;
3009
12.2k
      }
3010
11.5M
      buffer[len++] = c;
3011
11.5M
      c = *cur++;
3012
11.5M
  }
3013
7.56k
  buffer[len] = 0;
3014
7.56k
    }
3015
3016
8.51M
    if ((c == ':') && (*cur == 0)) {
3017
7.57k
        if (buffer != NULL)
3018
234
      xmlFree(buffer);
3019
7.57k
  *prefix = NULL;
3020
7.57k
  return(xmlStrdup(name));
3021
7.57k
    }
3022
3023
8.50M
    if (buffer == NULL)
3024
8.49M
  ret = xmlStrndup(buf, len);
3025
7.33k
    else {
3026
7.33k
  ret = buffer;
3027
7.33k
  buffer = NULL;
3028
7.33k
  max = XML_MAX_NAMELEN;
3029
7.33k
    }
3030
3031
3032
8.50M
    if (c == ':') {
3033
1.94M
  c = *cur;
3034
1.94M
        *prefix = ret;
3035
1.94M
  if (c == 0) {
3036
0
      return(xmlStrndup(BAD_CAST "", 0));
3037
0
  }
3038
1.94M
  len = 0;
3039
3040
  /*
3041
   * Check that the first character is proper to start
3042
   * a new name
3043
   */
3044
1.94M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3045
1.94M
        ((c >= 0x41) && (c <= 0x5A)) ||
3046
1.94M
        (c == '_') || (c == ':'))) {
3047
4.34k
      int l;
3048
4.34k
      int first = CUR_SCHAR(cur, l);
3049
3050
4.34k
      if (!IS_LETTER(first) && (first != '_')) {
3051
2.23k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3052
2.23k
          "Name %s is not XML Namespace compliant\n",
3053
2.23k
          name);
3054
2.23k
      }
3055
4.34k
  }
3056
1.94M
  cur++;
3057
3058
12.6M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3059
10.7M
      buf[len++] = c;
3060
10.7M
      c = *cur++;
3061
10.7M
  }
3062
1.94M
  if (len >= max) {
3063
      /*
3064
       * Okay someone managed to make a huge name, so he's ready to pay
3065
       * for the processing speed.
3066
       */
3067
2.25k
      max = len * 2;
3068
3069
2.25k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3070
2.25k
      if (buffer == NULL) {
3071
0
          xmlErrMemory(ctxt, NULL);
3072
0
    return(NULL);
3073
0
      }
3074
2.25k
      memcpy(buffer, buf, len);
3075
6.63M
      while (c != 0) { /* tested bigname2.xml */
3076
6.63M
    if (len + 10 > max) {
3077
5.62k
        xmlChar *tmp;
3078
3079
5.62k
        max *= 2;
3080
5.62k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3081
5.62k
        if (tmp == NULL) {
3082
0
      xmlErrMemory(ctxt, NULL);
3083
0
      xmlFree(buffer);
3084
0
      return(NULL);
3085
0
        }
3086
5.62k
        buffer = tmp;
3087
5.62k
    }
3088
6.63M
    buffer[len++] = c;
3089
6.63M
    c = *cur++;
3090
6.63M
      }
3091
2.25k
      buffer[len] = 0;
3092
2.25k
  }
3093
3094
1.94M
  if (buffer == NULL)
3095
1.93M
      ret = xmlStrndup(buf, len);
3096
2.25k
  else {
3097
2.25k
      ret = buffer;
3098
2.25k
  }
3099
1.94M
    }
3100
3101
8.50M
    return(ret);
3102
8.50M
}
3103
3104
/************************************************************************
3105
 *                  *
3106
 *      The parser itself       *
3107
 *  Relates to http://www.w3.org/TR/REC-xml       *
3108
 *                  *
3109
 ************************************************************************/
3110
3111
/************************************************************************
3112
 *                  *
3113
 *  Routines to parse Name, NCName and NmToken      *
3114
 *                  *
3115
 ************************************************************************/
3116
#ifdef DEBUG
3117
static unsigned long nbParseName = 0;
3118
static unsigned long nbParseNmToken = 0;
3119
static unsigned long nbParseNCName = 0;
3120
static unsigned long nbParseNCNameComplex = 0;
3121
static unsigned long nbParseNameComplex = 0;
3122
static unsigned long nbParseStringName = 0;
3123
#endif
3124
3125
/*
3126
 * The two following functions are related to the change of accepted
3127
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3128
 * They correspond to the modified production [4] and the new production [4a]
3129
 * changes in that revision. Also note that the macros used for the
3130
 * productions Letter, Digit, CombiningChar and Extender are not needed
3131
 * anymore.
3132
 * We still keep compatibility to pre-revision5 parsing semantic if the
3133
 * new XML_PARSE_OLD10 option is given to the parser.
3134
 */
3135
static int
3136
1.93M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3137
1.93M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3138
        /*
3139
   * Use the new checks of production [4] [4a] amd [5] of the
3140
   * Update 5 of XML-1.0
3141
   */
3142
1.24M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3143
1.24M
      (((c >= 'a') && (c <= 'z')) ||
3144
1.21M
       ((c >= 'A') && (c <= 'Z')) ||
3145
1.21M
       (c == '_') || (c == ':') ||
3146
1.21M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3147
1.21M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3148
1.21M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3149
1.21M
       ((c >= 0x370) && (c <= 0x37D)) ||
3150
1.21M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3151
1.21M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3152
1.21M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3153
1.21M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3154
1.21M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3155
1.21M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3156
1.21M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3157
1.21M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3158
939k
      return(1);
3159
1.24M
    } else {
3160
685k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3161
480k
      return(1);
3162
685k
    }
3163
513k
    return(0);
3164
1.93M
}
3165
3166
static int
3167
39.8M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3168
39.8M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3169
        /*
3170
   * Use the new checks of production [4] [4a] amd [5] of the
3171
   * Update 5 of XML-1.0
3172
   */
3173
25.9M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3174
25.9M
      (((c >= 'a') && (c <= 'z')) ||
3175
25.9M
       ((c >= 'A') && (c <= 'Z')) ||
3176
25.9M
       ((c >= '0') && (c <= '9')) || /* !start */
3177
25.9M
       (c == '_') || (c == ':') ||
3178
25.9M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3179
25.9M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3180
25.9M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3181
25.9M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3182
25.9M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3183
25.9M
       ((c >= 0x370) && (c <= 0x37D)) ||
3184
25.9M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3185
25.9M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3186
25.9M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3187
25.9M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3188
25.9M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189
25.9M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190
25.9M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191
25.9M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192
25.9M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3193
24.8M
       return(1);
3194
25.9M
    } else {
3195
13.8M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3196
13.8M
            (c == '.') || (c == '-') ||
3197
13.8M
      (c == '_') || (c == ':') ||
3198
13.8M
      (IS_COMBINING(c)) ||
3199
13.8M
      (IS_EXTENDER(c)))
3200
13.3M
      return(1);
3201
13.8M
    }
3202
1.68M
    return(0);
3203
39.8M
}
3204
3205
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3206
                                          int *len, int *alloc, int normalize);
3207
3208
static const xmlChar *
3209
738k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3210
738k
    int len = 0, l;
3211
738k
    int c;
3212
738k
    int count = 0;
3213
738k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3214
324k
                    XML_MAX_TEXT_LENGTH :
3215
738k
                    XML_MAX_NAME_LENGTH;
3216
3217
#ifdef DEBUG
3218
    nbParseNameComplex++;
3219
#endif
3220
3221
    /*
3222
     * Handler for more complex cases
3223
     */
3224
738k
    GROW;
3225
738k
    if (ctxt->instate == XML_PARSER_EOF)
3226
0
        return(NULL);
3227
738k
    c = CUR_CHAR(l);
3228
738k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3229
        /*
3230
   * Use the new checks of production [4] [4a] amd [5] of the
3231
   * Update 5 of XML-1.0
3232
   */
3233
397k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3234
397k
      (!(((c >= 'a') && (c <= 'z')) ||
3235
375k
         ((c >= 'A') && (c <= 'Z')) ||
3236
375k
         (c == '_') || (c == ':') ||
3237
375k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3238
375k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3239
375k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3240
375k
         ((c >= 0x370) && (c <= 0x37D)) ||
3241
375k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3242
375k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3243
375k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3244
375k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3245
375k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3246
375k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3247
375k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3248
375k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3249
235k
      return(NULL);
3250
235k
  }
3251
162k
  len += l;
3252
162k
  NEXTL(l);
3253
162k
  c = CUR_CHAR(l);
3254
7.84M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3255
7.84M
         (((c >= 'a') && (c <= 'z')) ||
3256
7.79M
          ((c >= 'A') && (c <= 'Z')) ||
3257
7.79M
          ((c >= '0') && (c <= '9')) || /* !start */
3258
7.79M
          (c == '_') || (c == ':') ||
3259
7.79M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3260
7.79M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3261
7.79M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3262
7.79M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3263
7.79M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3264
7.79M
          ((c >= 0x370) && (c <= 0x37D)) ||
3265
7.79M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3266
7.79M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3267
7.79M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3268
7.79M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3269
7.79M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270
7.79M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271
7.79M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272
7.79M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273
7.79M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3274
7.79M
    )) {
3275
7.68M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3276
64.9k
    count = 0;
3277
64.9k
    GROW;
3278
64.9k
                if (ctxt->instate == XML_PARSER_EOF)
3279
0
                    return(NULL);
3280
64.9k
      }
3281
7.68M
            if (len <= INT_MAX - l)
3282
7.68M
          len += l;
3283
7.68M
      NEXTL(l);
3284
7.68M
      c = CUR_CHAR(l);
3285
7.68M
  }
3286
341k
    } else {
3287
341k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3288
341k
      (!IS_LETTER(c) && (c != '_') &&
3289
318k
       (c != ':'))) {
3290
213k
      return(NULL);
3291
213k
  }
3292
127k
  len += l;
3293
127k
  NEXTL(l);
3294
127k
  c = CUR_CHAR(l);
3295
3296
7.47M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3297
7.47M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298
7.43M
    (c == '.') || (c == '-') ||
3299
7.43M
    (c == '_') || (c == ':') ||
3300
7.43M
    (IS_COMBINING(c)) ||
3301
7.43M
    (IS_EXTENDER(c)))) {
3302
7.35M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3303
63.5k
    count = 0;
3304
63.5k
    GROW;
3305
63.5k
                if (ctxt->instate == XML_PARSER_EOF)
3306
0
                    return(NULL);
3307
63.5k
      }
3308
7.35M
            if (len <= INT_MAX - l)
3309
7.35M
          len += l;
3310
7.35M
      NEXTL(l);
3311
7.35M
      c = CUR_CHAR(l);
3312
7.35M
  }
3313
127k
    }
3314
289k
    if (len > maxLength) {
3315
6
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316
6
        return(NULL);
3317
6
    }
3318
289k
    if (ctxt->input->cur - ctxt->input->base < len) {
3319
        /*
3320
         * There were a couple of bugs where PERefs lead to to a change
3321
         * of the buffer. Check the buffer size to avoid passing an invalid
3322
         * pointer to xmlDictLookup.
3323
         */
3324
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3325
0
                    "unexpected change of input buffer");
3326
0
        return (NULL);
3327
0
    }
3328
289k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3329
2.12k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3330
287k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3331
289k
}
3332
3333
/**
3334
 * xmlParseName:
3335
 * @ctxt:  an XML parser context
3336
 *
3337
 * DEPRECATED: Internal function, don't use.
3338
 *
3339
 * parse an XML name.
3340
 *
3341
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3342
 *                  CombiningChar | Extender
3343
 *
3344
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3345
 *
3346
 * [6] Names ::= Name (#x20 Name)*
3347
 *
3348
 * Returns the Name parsed or NULL
3349
 */
3350
3351
const xmlChar *
3352
21.4M
xmlParseName(xmlParserCtxtPtr ctxt) {
3353
21.4M
    const xmlChar *in;
3354
21.4M
    const xmlChar *ret;
3355
21.4M
    size_t count = 0;
3356
21.4M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3357
10.7M
                       XML_MAX_TEXT_LENGTH :
3358
21.4M
                       XML_MAX_NAME_LENGTH;
3359
3360
21.4M
    GROW;
3361
3362
#ifdef DEBUG
3363
    nbParseName++;
3364
#endif
3365
3366
    /*
3367
     * Accelerator for simple ASCII names
3368
     */
3369
21.4M
    in = ctxt->input->cur;
3370
21.4M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3371
21.4M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3372
21.4M
  (*in == '_') || (*in == ':')) {
3373
20.9M
  in++;
3374
143M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3375
143M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3376
143M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3377
143M
         (*in == '_') || (*in == '-') ||
3378
143M
         (*in == ':') || (*in == '.'))
3379
122M
      in++;
3380
20.9M
  if ((*in > 0) && (*in < 0x80)) {
3381
20.6M
      count = in - ctxt->input->cur;
3382
20.6M
            if (count > maxLength) {
3383
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384
0
                return(NULL);
3385
0
            }
3386
20.6M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3387
20.6M
      ctxt->input->cur = in;
3388
20.6M
      ctxt->input->col += count;
3389
20.6M
      if (ret == NULL)
3390
0
          xmlErrMemory(ctxt, NULL);
3391
20.6M
      return(ret);
3392
20.6M
  }
3393
20.9M
    }
3394
    /* accelerator for special cases */
3395
738k
    return(xmlParseNameComplex(ctxt));
3396
21.4M
}
3397
3398
static const xmlChar *
3399
465k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3400
465k
    int len = 0, l;
3401
465k
    int c;
3402
465k
    int count = 0;
3403
465k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3404
182k
                    XML_MAX_TEXT_LENGTH :
3405
465k
                    XML_MAX_NAME_LENGTH;
3406
465k
    size_t startPosition = 0;
3407
3408
#ifdef DEBUG
3409
    nbParseNCNameComplex++;
3410
#endif
3411
3412
    /*
3413
     * Handler for more complex cases
3414
     */
3415
465k
    GROW;
3416
465k
    startPosition = CUR_PTR - BASE_PTR;
3417
465k
    c = CUR_CHAR(l);
3418
465k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
465k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
353k
  return(NULL);
3421
353k
    }
3422
3423
9.49M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
9.49M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
9.38M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3426
84.3k
      count = 0;
3427
84.3k
      GROW;
3428
84.3k
            if (ctxt->instate == XML_PARSER_EOF)
3429
0
                return(NULL);
3430
84.3k
  }
3431
9.38M
        if (len <= INT_MAX - l)
3432
9.38M
      len += l;
3433
9.38M
  NEXTL(l);
3434
9.38M
  c = CUR_CHAR(l);
3435
9.38M
  if (c == 0) {
3436
21.1k
      count = 0;
3437
      /*
3438
       * when shrinking to extend the buffer we really need to preserve
3439
       * the part of the name we already parsed. Hence rolling back
3440
       * by current length.
3441
       */
3442
21.1k
      ctxt->input->cur -= l;
3443
21.1k
      GROW;
3444
21.1k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
21.1k
      ctxt->input->cur += l;
3447
21.1k
      c = CUR_CHAR(l);
3448
21.1k
  }
3449
9.38M
    }
3450
112k
    if (len > maxLength) {
3451
3
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3452
3
        return(NULL);
3453
3
    }
3454
112k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3455
112k
}
3456
3457
/**
3458
 * xmlParseNCName:
3459
 * @ctxt:  an XML parser context
3460
 * @len:  length of the string parsed
3461
 *
3462
 * parse an XML name.
3463
 *
3464
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3465
 *                      CombiningChar | Extender
3466
 *
3467
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3468
 *
3469
 * Returns the Name parsed or NULL
3470
 */
3471
3472
static const xmlChar *
3473
17.0M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3474
17.0M
    const xmlChar *in, *e;
3475
17.0M
    const xmlChar *ret;
3476
17.0M
    size_t count = 0;
3477
17.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3478
5.22M
                       XML_MAX_TEXT_LENGTH :
3479
17.0M
                       XML_MAX_NAME_LENGTH;
3480
3481
#ifdef DEBUG
3482
    nbParseNCName++;
3483
#endif
3484
3485
    /*
3486
     * Accelerator for simple ASCII names
3487
     */
3488
17.0M
    in = ctxt->input->cur;
3489
17.0M
    e = ctxt->input->end;
3490
17.0M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3491
17.0M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3492
17.0M
   (*in == '_')) && (in < e)) {
3493
16.6M
  in++;
3494
85.4M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3495
85.4M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3496
85.4M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3497
85.4M
          (*in == '_') || (*in == '-') ||
3498
85.4M
          (*in == '.')) && (in < e))
3499
68.7M
      in++;
3500
16.6M
  if (in >= e)
3501
9.95k
      goto complex;
3502
16.6M
  if ((*in > 0) && (*in < 0x80)) {
3503
16.5M
      count = in - ctxt->input->cur;
3504
16.5M
            if (count > maxLength) {
3505
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3506
0
                return(NULL);
3507
0
            }
3508
16.5M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3509
16.5M
      ctxt->input->cur = in;
3510
16.5M
      ctxt->input->col += count;
3511
16.5M
      if (ret == NULL) {
3512
0
          xmlErrMemory(ctxt, NULL);
3513
0
      }
3514
16.5M
      return(ret);
3515
16.5M
  }
3516
16.6M
    }
3517
465k
complex:
3518
465k
    return(xmlParseNCNameComplex(ctxt));
3519
17.0M
}
3520
3521
/**
3522
 * xmlParseNameAndCompare:
3523
 * @ctxt:  an XML parser context
3524
 *
3525
 * parse an XML name and compares for match
3526
 * (specialized for endtag parsing)
3527
 *
3528
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3529
 * and the name for mismatch
3530
 */
3531
3532
static const xmlChar *
3533
3.43M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3534
3.43M
    register const xmlChar *cmp = other;
3535
3.43M
    register const xmlChar *in;
3536
3.43M
    const xmlChar *ret;
3537
3538
3.43M
    GROW;
3539
3.43M
    if (ctxt->instate == XML_PARSER_EOF)
3540
0
        return(NULL);
3541
3542
3.43M
    in = ctxt->input->cur;
3543
19.9M
    while (*in != 0 && *in == *cmp) {
3544
16.5M
  ++in;
3545
16.5M
  ++cmp;
3546
16.5M
    }
3547
3.43M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3548
  /* success */
3549
3.21M
  ctxt->input->col += in - ctxt->input->cur;
3550
3.21M
  ctxt->input->cur = in;
3551
3.21M
  return (const xmlChar*) 1;
3552
3.21M
    }
3553
    /* failure (or end of input buffer), check with full function */
3554
226k
    ret = xmlParseName (ctxt);
3555
    /* strings coming from the dictionary direct compare possible */
3556
226k
    if (ret == other) {
3557
19.2k
  return (const xmlChar*) 1;
3558
19.2k
    }
3559
207k
    return ret;
3560
226k
}
3561
3562
/**
3563
 * xmlParseStringName:
3564
 * @ctxt:  an XML parser context
3565
 * @str:  a pointer to the string pointer (IN/OUT)
3566
 *
3567
 * parse an XML name.
3568
 *
3569
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3570
 *                  CombiningChar | Extender
3571
 *
3572
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3573
 *
3574
 * [6] Names ::= Name (#x20 Name)*
3575
 *
3576
 * Returns the Name parsed or NULL. The @str pointer
3577
 * is updated to the current location in the string.
3578
 */
3579
3580
static xmlChar *
3581
1.49M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3582
1.49M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3583
1.49M
    const xmlChar *cur = *str;
3584
1.49M
    int len = 0, l;
3585
1.49M
    int c;
3586
1.49M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3587
803k
                    XML_MAX_TEXT_LENGTH :
3588
1.49M
                    XML_MAX_NAME_LENGTH;
3589
3590
#ifdef DEBUG
3591
    nbParseStringName++;
3592
#endif
3593
3594
1.49M
    c = CUR_SCHAR(cur, l);
3595
1.49M
    if (!xmlIsNameStartChar(ctxt, c)) {
3596
196k
  return(NULL);
3597
196k
    }
3598
3599
1.29M
    COPY_BUF(l,buf,len,c);
3600
1.29M
    cur += l;
3601
1.29M
    c = CUR_SCHAR(cur, l);
3602
13.2M
    while (xmlIsNameChar(ctxt, c)) {
3603
11.9M
  COPY_BUF(l,buf,len,c);
3604
11.9M
  cur += l;
3605
11.9M
  c = CUR_SCHAR(cur, l);
3606
11.9M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3607
      /*
3608
       * Okay someone managed to make a huge name, so he's ready to pay
3609
       * for the processing speed.
3610
       */
3611
1.54k
      xmlChar *buffer;
3612
1.54k
      int max = len * 2;
3613
3614
1.54k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3615
1.54k
      if (buffer == NULL) {
3616
0
          xmlErrMemory(ctxt, NULL);
3617
0
    return(NULL);
3618
0
      }
3619
1.54k
      memcpy(buffer, buf, len);
3620
3.07M
      while (xmlIsNameChar(ctxt, c)) {
3621
3.06M
    if (len + 10 > max) {
3622
4.82k
        xmlChar *tmp;
3623
3624
4.82k
        max *= 2;
3625
4.82k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3626
4.82k
        if (tmp == NULL) {
3627
0
      xmlErrMemory(ctxt, NULL);
3628
0
      xmlFree(buffer);
3629
0
      return(NULL);
3630
0
        }
3631
4.82k
        buffer = tmp;
3632
4.82k
    }
3633
3.06M
    COPY_BUF(l,buffer,len,c);
3634
3.06M
    cur += l;
3635
3.06M
    c = CUR_SCHAR(cur, l);
3636
3.06M
                if (len > maxLength) {
3637
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638
0
                    xmlFree(buffer);
3639
0
                    return(NULL);
3640
0
                }
3641
3.06M
      }
3642
1.54k
      buffer[len] = 0;
3643
1.54k
      *str = cur;
3644
1.54k
      return(buffer);
3645
1.54k
  }
3646
11.9M
    }
3647
1.29M
    if (len > maxLength) {
3648
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3649
0
        return(NULL);
3650
0
    }
3651
1.29M
    *str = cur;
3652
1.29M
    return(xmlStrndup(buf, len));
3653
1.29M
}
3654
3655
/**
3656
 * xmlParseNmtoken:
3657
 * @ctxt:  an XML parser context
3658
 *
3659
 * DEPRECATED: Internal function, don't use.
3660
 *
3661
 * parse an XML Nmtoken.
3662
 *
3663
 * [7] Nmtoken ::= (NameChar)+
3664
 *
3665
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3666
 *
3667
 * Returns the Nmtoken parsed or NULL
3668
 */
3669
3670
xmlChar *
3671
326k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3672
326k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3673
326k
    int len = 0, l;
3674
326k
    int c;
3675
326k
    int count = 0;
3676
326k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3677
116k
                    XML_MAX_TEXT_LENGTH :
3678
326k
                    XML_MAX_NAME_LENGTH;
3679
3680
#ifdef DEBUG
3681
    nbParseNmToken++;
3682
#endif
3683
3684
326k
    GROW;
3685
326k
    if (ctxt->instate == XML_PARSER_EOF)
3686
0
        return(NULL);
3687
326k
    c = CUR_CHAR(l);
3688
3689
2.30M
    while (xmlIsNameChar(ctxt, c)) {
3690
1.98M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3691
0
      count = 0;
3692
0
      GROW;
3693
0
  }
3694
1.98M
  COPY_BUF(l,buf,len,c);
3695
1.98M
  NEXTL(l);
3696
1.98M
  c = CUR_CHAR(l);
3697
1.98M
  if (c == 0) {
3698
973
      count = 0;
3699
973
      GROW;
3700
973
      if (ctxt->instate == XML_PARSER_EOF)
3701
0
    return(NULL);
3702
973
            c = CUR_CHAR(l);
3703
973
  }
3704
1.98M
  if (len >= XML_MAX_NAMELEN) {
3705
      /*
3706
       * Okay someone managed to make a huge token, so he's ready to pay
3707
       * for the processing speed.
3708
       */
3709
3.53k
      xmlChar *buffer;
3710
3.53k
      int max = len * 2;
3711
3712
3.53k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3713
3.53k
      if (buffer == NULL) {
3714
0
          xmlErrMemory(ctxt, NULL);
3715
0
    return(NULL);
3716
0
      }
3717
3.53k
      memcpy(buffer, buf, len);
3718
11.8M
      while (xmlIsNameChar(ctxt, c)) {
3719
11.8M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3720
117k
        count = 0;
3721
117k
        GROW;
3722
117k
                    if (ctxt->instate == XML_PARSER_EOF) {
3723
0
                        xmlFree(buffer);
3724
0
                        return(NULL);
3725
0
                    }
3726
117k
    }
3727
11.8M
    if (len + 10 > max) {
3728
10.8k
        xmlChar *tmp;
3729
3730
10.8k
        max *= 2;
3731
10.8k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3732
10.8k
        if (tmp == NULL) {
3733
0
      xmlErrMemory(ctxt, NULL);
3734
0
      xmlFree(buffer);
3735
0
      return(NULL);
3736
0
        }
3737
10.8k
        buffer = tmp;
3738
10.8k
    }
3739
11.8M
    COPY_BUF(l,buffer,len,c);
3740
11.8M
    NEXTL(l);
3741
11.8M
    c = CUR_CHAR(l);
3742
11.8M
                if (len > maxLength) {
3743
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3744
0
                    xmlFree(buffer);
3745
0
                    return(NULL);
3746
0
                }
3747
11.8M
      }
3748
3.53k
      buffer[len] = 0;
3749
3.53k
      return(buffer);
3750
3.53k
  }
3751
1.98M
    }
3752
322k
    if (len == 0)
3753
12.9k
        return(NULL);
3754
309k
    if (len > maxLength) {
3755
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3756
0
        return(NULL);
3757
0
    }
3758
309k
    return(xmlStrndup(buf, len));
3759
309k
}
3760
3761
/**
3762
 * xmlParseEntityValue:
3763
 * @ctxt:  an XML parser context
3764
 * @orig:  if non-NULL store a copy of the original entity value
3765
 *
3766
 * DEPRECATED: Internal function, don't use.
3767
 *
3768
 * parse a value for ENTITY declarations
3769
 *
3770
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3771
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3772
 *
3773
 * Returns the EntityValue parsed with reference substituted or NULL
3774
 */
3775
3776
xmlChar *
3777
1.17M
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3778
1.17M
    xmlChar *buf = NULL;
3779
1.17M
    int len = 0;
3780
1.17M
    int size = XML_PARSER_BUFFER_SIZE;
3781
1.17M
    int c, l;
3782
1.17M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3783
398k
                    XML_MAX_HUGE_LENGTH :
3784
1.17M
                    XML_MAX_TEXT_LENGTH;
3785
1.17M
    xmlChar stop;
3786
1.17M
    xmlChar *ret = NULL;
3787
1.17M
    const xmlChar *cur = NULL;
3788
1.17M
    xmlParserInputPtr input;
3789
3790
1.17M
    if (RAW == '"') stop = '"';
3791
262k
    else if (RAW == '\'') stop = '\'';
3792
0
    else {
3793
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3794
0
  return(NULL);
3795
0
    }
3796
1.17M
    buf = (xmlChar *) xmlMallocAtomic(size);
3797
1.17M
    if (buf == NULL) {
3798
0
  xmlErrMemory(ctxt, NULL);
3799
0
  return(NULL);
3800
0
    }
3801
3802
    /*
3803
     * The content of the entity definition is copied in a buffer.
3804
     */
3805
3806
1.17M
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3807
1.17M
    input = ctxt->input;
3808
1.17M
    GROW;
3809
1.17M
    if (ctxt->instate == XML_PARSER_EOF)
3810
0
        goto error;
3811
1.17M
    NEXT;
3812
1.17M
    c = CUR_CHAR(l);
3813
    /*
3814
     * NOTE: 4.4.5 Included in Literal
3815
     * When a parameter entity reference appears in a literal entity
3816
     * value, ... a single or double quote character in the replacement
3817
     * text is always treated as a normal data character and will not
3818
     * terminate the literal.
3819
     * In practice it means we stop the loop only when back at parsing
3820
     * the initial entity and the quote is found
3821
     */
3822
47.8M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3823
47.8M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3824
46.6M
  if (len + 5 >= size) {
3825
128k
      xmlChar *tmp;
3826
3827
128k
      size *= 2;
3828
128k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3829
128k
      if (tmp == NULL) {
3830
0
    xmlErrMemory(ctxt, NULL);
3831
0
                goto error;
3832
0
      }
3833
128k
      buf = tmp;
3834
128k
  }
3835
46.6M
  COPY_BUF(l,buf,len,c);
3836
46.6M
  NEXTL(l);
3837
3838
46.6M
  GROW;
3839
46.6M
  c = CUR_CHAR(l);
3840
46.6M
  if (c == 0) {
3841
2.04k
      GROW;
3842
2.04k
      c = CUR_CHAR(l);
3843
2.04k
  }
3844
3845
46.6M
        if (len > maxLength) {
3846
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3847
0
                           "entity value too long\n");
3848
0
            goto error;
3849
0
        }
3850
46.6M
    }
3851
1.17M
    buf[len] = 0;
3852
1.17M
    if (ctxt->instate == XML_PARSER_EOF)
3853
0
        goto error;
3854
1.17M
    if (c != stop) {
3855
3.50k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3856
3.50k
        goto error;
3857
3.50k
    }
3858
1.17M
    NEXT;
3859
3860
    /*
3861
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3862
     * reference constructs. Note Charref will be handled in
3863
     * xmlStringDecodeEntities()
3864
     */
3865
1.17M
    cur = buf;
3866
36.7M
    while (*cur != 0) { /* non input consuming */
3867
35.5M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3868
474k
      xmlChar *name;
3869
474k
      xmlChar tmp = *cur;
3870
474k
            int nameOk = 0;
3871
3872
474k
      cur++;
3873
474k
      name = xmlParseStringName(ctxt, &cur);
3874
474k
            if (name != NULL) {
3875
471k
                nameOk = 1;
3876
471k
                xmlFree(name);
3877
471k
            }
3878
474k
            if ((nameOk == 0) || (*cur != ';')) {
3879
7.27k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3880
7.27k
      "EntityValue: '%c' forbidden except for entities references\n",
3881
7.27k
                            tmp);
3882
7.27k
                goto error;
3883
7.27k
      }
3884
467k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3885
467k
    (ctxt->inputNr == 1)) {
3886
335
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3887
335
                goto error;
3888
335
      }
3889
466k
      if (*cur == 0)
3890
0
          break;
3891
466k
  }
3892
35.5M
  cur++;
3893
35.5M
    }
3894
3895
    /*
3896
     * Then PEReference entities are substituted.
3897
     *
3898
     * NOTE: 4.4.7 Bypassed
3899
     * When a general entity reference appears in the EntityValue in
3900
     * an entity declaration, it is bypassed and left as is.
3901
     * so XML_SUBSTITUTE_REF is not set here.
3902
     */
3903
1.16M
    ++ctxt->depth;
3904
1.16M
    ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3905
1.16M
                                  0, 0, 0);
3906
1.16M
    --ctxt->depth;
3907
1.16M
    if (orig != NULL) {
3908
1.16M
        *orig = buf;
3909
1.16M
        buf = NULL;
3910
1.16M
    }
3911
3912
1.17M
error:
3913
1.17M
    if (buf != NULL)
3914
11.1k
        xmlFree(buf);
3915
1.17M
    return(ret);
3916
1.16M
}
3917
3918
/**
3919
 * xmlParseAttValueComplex:
3920
 * @ctxt:  an XML parser context
3921
 * @len:   the resulting attribute len
3922
 * @normalize:  whether to apply the inner normalization
3923
 *
3924
 * parse a value for an attribute, this is the fallback function
3925
 * of xmlParseAttValue() when the attribute parsing requires handling
3926
 * of non-ASCII characters, or normalization compaction.
3927
 *
3928
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3929
 */
3930
static xmlChar *
3931
539k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3932
539k
    xmlChar limit = 0;
3933
539k
    xmlChar *buf = NULL;
3934
539k
    xmlChar *rep = NULL;
3935
539k
    size_t len = 0;
3936
539k
    size_t buf_size = 0;
3937
539k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3938
196k
                       XML_MAX_HUGE_LENGTH :
3939
539k
                       XML_MAX_TEXT_LENGTH;
3940
539k
    int c, l, in_space = 0;
3941
539k
    xmlChar *current = NULL;
3942
539k
    xmlEntityPtr ent;
3943
3944
539k
    if (NXT(0) == '"') {
3945
330k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3946
330k
  limit = '"';
3947
330k
        NEXT;
3948
330k
    } else if (NXT(0) == '\'') {
3949
208k
  limit = '\'';
3950
208k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3951
208k
        NEXT;
3952
208k
    } else {
3953
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3954
0
  return(NULL);
3955
0
    }
3956
3957
    /*
3958
     * allocate a translation buffer.
3959
     */
3960
539k
    buf_size = XML_PARSER_BUFFER_SIZE;
3961
539k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3962
539k
    if (buf == NULL) goto mem_error;
3963
3964
    /*
3965
     * OK loop until we reach one of the ending char or a size limit.
3966
     */
3967
539k
    c = CUR_CHAR(l);
3968
21.8M
    while (((NXT(0) != limit) && /* checked */
3969
21.8M
            (IS_CHAR(c)) && (c != '<')) &&
3970
21.8M
            (ctxt->instate != XML_PARSER_EOF)) {
3971
21.3M
  if (c == '&') {
3972
655k
      in_space = 0;
3973
655k
      if (NXT(1) == '#') {
3974
294k
    int val = xmlParseCharRef(ctxt);
3975
3976
294k
    if (val == '&') {
3977
3.87k
        if (ctxt->replaceEntities) {
3978
1.71k
      if (len + 10 > buf_size) {
3979
156
          growBuffer(buf, 10);
3980
156
      }
3981
1.71k
      buf[len++] = '&';
3982
2.15k
        } else {
3983
      /*
3984
       * The reparsing will be done in xmlStringGetNodeList()
3985
       * called by the attribute() function in SAX.c
3986
       */
3987
2.15k
      if (len + 10 > buf_size) {
3988
158
          growBuffer(buf, 10);
3989
158
      }
3990
2.15k
      buf[len++] = '&';
3991
2.15k
      buf[len++] = '#';
3992
2.15k
      buf[len++] = '3';
3993
2.15k
      buf[len++] = '8';
3994
2.15k
      buf[len++] = ';';
3995
2.15k
        }
3996
290k
    } else if (val != 0) {
3997
270k
        if (len + 10 > buf_size) {
3998
4.14k
      growBuffer(buf, 10);
3999
4.14k
        }
4000
270k
        len += xmlCopyChar(0, &buf[len], val);
4001
270k
    }
4002
361k
      } else {
4003
361k
    ent = xmlParseEntityRef(ctxt);
4004
361k
    ctxt->nbentities++;
4005
361k
    if (ent != NULL)
4006
290k
        ctxt->nbentities += ent->owner;
4007
361k
    if ((ent != NULL) &&
4008
361k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4009
132k
        if (len + 10 > buf_size) {
4010
182
      growBuffer(buf, 10);
4011
182
        }
4012
132k
        if ((ctxt->replaceEntities == 0) &&
4013
132k
            (ent->content[0] == '&')) {
4014
32.6k
      buf[len++] = '&';
4015
32.6k
      buf[len++] = '#';
4016
32.6k
      buf[len++] = '3';
4017
32.6k
      buf[len++] = '8';
4018
32.6k
      buf[len++] = ';';
4019
100k
        } else {
4020
100k
      buf[len++] = ent->content[0];
4021
100k
        }
4022
228k
    } else if ((ent != NULL) &&
4023
228k
               (ctxt->replaceEntities != 0)) {
4024
69.2k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4025
69.2k
      ++ctxt->depth;
4026
69.2k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4027
69.2k
                  XML_SUBSTITUTE_REF,
4028
69.2k
                  0, 0, 0);
4029
69.2k
      --ctxt->depth;
4030
69.2k
      if (rep != NULL) {
4031
66.1k
          current = rep;
4032
476k
          while (*current != 0) { /* non input consuming */
4033
410k
                                if ((*current == 0xD) || (*current == 0xA) ||
4034
410k
                                    (*current == 0x9)) {
4035
61.2k
                                    buf[len++] = 0x20;
4036
61.2k
                                    current++;
4037
61.2k
                                } else
4038
349k
                                    buf[len++] = *current++;
4039
410k
        if (len + 10 > buf_size) {
4040
2.70k
            growBuffer(buf, 10);
4041
2.70k
        }
4042
410k
          }
4043
66.1k
          xmlFree(rep);
4044
66.1k
          rep = NULL;
4045
66.1k
      }
4046
69.2k
        } else {
4047
0
      if (len + 10 > buf_size) {
4048
0
          growBuffer(buf, 10);
4049
0
      }
4050
0
      if (ent->content != NULL)
4051
0
          buf[len++] = ent->content[0];
4052
0
        }
4053
159k
    } else if (ent != NULL) {
4054
88.1k
        int i = xmlStrlen(ent->name);
4055
88.1k
        const xmlChar *cur = ent->name;
4056
4057
        /*
4058
         * This may look absurd but is needed to detect
4059
         * entities problems
4060
         */
4061
88.1k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4062
88.1k
      (ent->content != NULL) && (ent->checked == 0)) {
4063
19.8k
      unsigned long oldnbent = ctxt->nbentities, diff;
4064
4065
19.8k
      ++ctxt->depth;
4066
19.8k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4067
19.8k
              XML_SUBSTITUTE_REF, 0, 0, 0);
4068
19.8k
      --ctxt->depth;
4069
4070
19.8k
                        diff = ctxt->nbentities - oldnbent + 1;
4071
19.8k
                        if (diff > INT_MAX / 2)
4072
0
                            diff = INT_MAX / 2;
4073
19.8k
                        ent->checked = diff * 2;
4074
19.8k
      if (rep != NULL) {
4075
19.8k
          if (xmlStrchr(rep, '<'))
4076
762
              ent->checked |= 1;
4077
19.8k
          xmlFree(rep);
4078
19.8k
          rep = NULL;
4079
19.8k
      } else {
4080
63
                            ent->content[0] = 0;
4081
63
                        }
4082
19.8k
        }
4083
4084
        /*
4085
         * Just output the reference
4086
         */
4087
88.1k
        buf[len++] = '&';
4088
88.5k
        while (len + i + 10 > buf_size) {
4089
760
      growBuffer(buf, i + 10);
4090
760
        }
4091
597k
        for (;i > 0;i--)
4092
509k
      buf[len++] = *cur++;
4093
88.1k
        buf[len++] = ';';
4094
88.1k
    }
4095
361k
      }
4096
20.6M
  } else {
4097
20.6M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4098
3.16M
          if ((len != 0) || (!normalize)) {
4099
3.11M
        if ((!normalize) || (!in_space)) {
4100
3.07M
      COPY_BUF(l,buf,len,0x20);
4101
3.07M
      while (len + 10 > buf_size) {
4102
8.56k
          growBuffer(buf, 10);
4103
8.56k
      }
4104
3.07M
        }
4105
3.11M
        in_space = 1;
4106
3.11M
    }
4107
17.5M
      } else {
4108
17.5M
          in_space = 0;
4109
17.5M
    COPY_BUF(l,buf,len,c);
4110
17.5M
    if (len + 10 > buf_size) {
4111
62.1k
        growBuffer(buf, 10);
4112
62.1k
    }
4113
17.5M
      }
4114
20.6M
      NEXTL(l);
4115
20.6M
  }
4116
21.3M
  GROW;
4117
21.3M
  c = CUR_CHAR(l);
4118
21.3M
        if (len > maxLength) {
4119
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4120
0
                           "AttValue length too long\n");
4121
0
            goto mem_error;
4122
0
        }
4123
21.3M
    }
4124
539k
    if (ctxt->instate == XML_PARSER_EOF)
4125
0
        goto error;
4126
4127
539k
    if ((in_space) && (normalize)) {
4128
47.5k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4129
22.9k
    }
4130
539k
    buf[len] = 0;
4131
539k
    if (RAW == '<') {
4132
65.4k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4133
474k
    } else if (RAW != limit) {
4134
89.6k
  if ((c != 0) && (!IS_CHAR(c))) {
4135
44.3k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4136
44.3k
         "invalid character in attribute value\n");
4137
45.3k
  } else {
4138
45.3k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4139
45.3k
         "AttValue: ' expected\n");
4140
45.3k
        }
4141
89.6k
    } else
4142
384k
  NEXT;
4143
4144
539k
    if (attlen != NULL) *attlen = len;
4145
539k
    return(buf);
4146
4147
0
mem_error:
4148
0
    xmlErrMemory(ctxt, NULL);
4149
0
error:
4150
0
    if (buf != NULL)
4151
0
        xmlFree(buf);
4152
0
    if (rep != NULL)
4153
0
        xmlFree(rep);
4154
0
    return(NULL);
4155
0
}
4156
4157
/**
4158
 * xmlParseAttValue:
4159
 * @ctxt:  an XML parser context
4160
 *
4161
 * DEPRECATED: Internal function, don't use.
4162
 *
4163
 * parse a value for an attribute
4164
 * Note: the parser won't do substitution of entities here, this
4165
 * will be handled later in xmlStringGetNodeList
4166
 *
4167
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4168
 *                   "'" ([^<&'] | Reference)* "'"
4169
 *
4170
 * 3.3.3 Attribute-Value Normalization:
4171
 * Before the value of an attribute is passed to the application or
4172
 * checked for validity, the XML processor must normalize it as follows:
4173
 * - a character reference is processed by appending the referenced
4174
 *   character to the attribute value
4175
 * - an entity reference is processed by recursively processing the
4176
 *   replacement text of the entity
4177
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4178
 *   appending #x20 to the normalized value, except that only a single
4179
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4180
 *   parsed entity or the literal entity value of an internal parsed entity
4181
 * - other characters are processed by appending them to the normalized value
4182
 * If the declared value is not CDATA, then the XML processor must further
4183
 * process the normalized attribute value by discarding any leading and
4184
 * trailing space (#x20) characters, and by replacing sequences of space
4185
 * (#x20) characters by a single space (#x20) character.
4186
 * All attributes for which no declaration has been read should be treated
4187
 * by a non-validating parser as if declared CDATA.
4188
 *
4189
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4190
 */
4191
4192
4193
xmlChar *
4194
3.40M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4195
3.40M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4196
3.40M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4197
3.40M
}
4198
4199
/**
4200
 * xmlParseSystemLiteral:
4201
 * @ctxt:  an XML parser context
4202
 *
4203
 * DEPRECATED: Internal function, don't use.
4204
 *
4205
 * parse an XML Literal
4206
 *
4207
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4208
 *
4209
 * Returns the SystemLiteral parsed or NULL
4210
 */
4211
4212
xmlChar *
4213
249k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4214
249k
    xmlChar *buf = NULL;
4215
249k
    int len = 0;
4216
249k
    int size = XML_PARSER_BUFFER_SIZE;
4217
249k
    int cur, l;
4218
249k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4219
80.9k
                    XML_MAX_TEXT_LENGTH :
4220
249k
                    XML_MAX_NAME_LENGTH;
4221
249k
    xmlChar stop;
4222
249k
    int state = ctxt->instate;
4223
249k
    int count = 0;
4224
4225
249k
    SHRINK;
4226
249k
    if (RAW == '"') {
4227
227k
        NEXT;
4228
227k
  stop = '"';
4229
227k
    } else if (RAW == '\'') {
4230
15.4k
        NEXT;
4231
15.4k
  stop = '\'';
4232
15.4k
    } else {
4233
6.56k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4234
6.56k
  return(NULL);
4235
6.56k
    }
4236
4237
242k
    buf = (xmlChar *) xmlMallocAtomic(size);
4238
242k
    if (buf == NULL) {
4239
0
        xmlErrMemory(ctxt, NULL);
4240
0
  return(NULL);
4241
0
    }
4242
242k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4243
242k
    cur = CUR_CHAR(l);
4244
7.16M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4245
6.91M
  if (len + 5 >= size) {
4246
6.50k
      xmlChar *tmp;
4247
4248
6.50k
      size *= 2;
4249
6.50k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4250
6.50k
      if (tmp == NULL) {
4251
0
          xmlFree(buf);
4252
0
    xmlErrMemory(ctxt, NULL);
4253
0
    ctxt->instate = (xmlParserInputState) state;
4254
0
    return(NULL);
4255
0
      }
4256
6.50k
      buf = tmp;
4257
6.50k
  }
4258
6.91M
  count++;
4259
6.91M
  if (count > 50) {
4260
47.0k
      SHRINK;
4261
47.0k
      GROW;
4262
47.0k
      count = 0;
4263
47.0k
            if (ctxt->instate == XML_PARSER_EOF) {
4264
0
          xmlFree(buf);
4265
0
    return(NULL);
4266
0
            }
4267
47.0k
  }
4268
6.91M
  COPY_BUF(l,buf,len,cur);
4269
6.91M
  NEXTL(l);
4270
6.91M
  cur = CUR_CHAR(l);
4271
6.91M
  if (cur == 0) {
4272
3.81k
      GROW;
4273
3.81k
      SHRINK;
4274
3.81k
      cur = CUR_CHAR(l);
4275
3.81k
  }
4276
6.91M
        if (len > maxLength) {
4277
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4278
0
            xmlFree(buf);
4279
0
            ctxt->instate = (xmlParserInputState) state;
4280
0
            return(NULL);
4281
0
        }
4282
6.91M
    }
4283
242k
    buf[len] = 0;
4284
242k
    ctxt->instate = (xmlParserInputState) state;
4285
242k
    if (!IS_CHAR(cur)) {
4286
5.39k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4287
237k
    } else {
4288
237k
  NEXT;
4289
237k
    }
4290
242k
    return(buf);
4291
242k
}
4292
4293
/**
4294
 * xmlParsePubidLiteral:
4295
 * @ctxt:  an XML parser context
4296
 *
4297
 * DEPRECATED: Internal function, don't use.
4298
 *
4299
 * parse an XML public literal
4300
 *
4301
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4302
 *
4303
 * Returns the PubidLiteral parsed or NULL.
4304
 */
4305
4306
xmlChar *
4307
65.4k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4308
65.4k
    xmlChar *buf = NULL;
4309
65.4k
    int len = 0;
4310
65.4k
    int size = XML_PARSER_BUFFER_SIZE;
4311
65.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4312
21.0k
                    XML_MAX_TEXT_LENGTH :
4313
65.4k
                    XML_MAX_NAME_LENGTH;
4314
65.4k
    xmlChar cur;
4315
65.4k
    xmlChar stop;
4316
65.4k
    int count = 0;
4317
65.4k
    xmlParserInputState oldstate = ctxt->instate;
4318
4319
65.4k
    SHRINK;
4320
65.4k
    if (RAW == '"') {
4321
56.8k
        NEXT;
4322
56.8k
  stop = '"';
4323
56.8k
    } else if (RAW == '\'') {
4324
7.69k
        NEXT;
4325
7.69k
  stop = '\'';
4326
7.69k
    } else {
4327
893
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4328
893
  return(NULL);
4329
893
    }
4330
64.5k
    buf = (xmlChar *) xmlMallocAtomic(size);
4331
64.5k
    if (buf == NULL) {
4332
0
  xmlErrMemory(ctxt, NULL);
4333
0
  return(NULL);
4334
0
    }
4335
64.5k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4336
64.5k
    cur = CUR;
4337
3.61M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4338
3.55M
  if (len + 1 >= size) {
4339
4.04k
      xmlChar *tmp;
4340
4341
4.04k
      size *= 2;
4342
4.04k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4343
4.04k
      if (tmp == NULL) {
4344
0
    xmlErrMemory(ctxt, NULL);
4345
0
    xmlFree(buf);
4346
0
    return(NULL);
4347
0
      }
4348
4.04k
      buf = tmp;
4349
4.04k
  }
4350
3.55M
  buf[len++] = cur;
4351
3.55M
  count++;
4352
3.55M
  if (count > 50) {
4353
38.7k
      SHRINK;
4354
38.7k
      GROW;
4355
38.7k
      count = 0;
4356
38.7k
            if (ctxt->instate == XML_PARSER_EOF) {
4357
0
    xmlFree(buf);
4358
0
    return(NULL);
4359
0
            }
4360
38.7k
  }
4361
3.55M
  NEXT;
4362
3.55M
  cur = CUR;
4363
3.55M
  if (cur == 0) {
4364
1.08k
      GROW;
4365
1.08k
      SHRINK;
4366
1.08k
      cur = CUR;
4367
1.08k
  }
4368
3.55M
        if (len > maxLength) {
4369
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4370
0
            xmlFree(buf);
4371
0
            return(NULL);
4372
0
        }
4373
3.55M
    }
4374
64.5k
    buf[len] = 0;
4375
64.5k
    if (cur != stop) {
4376
3.69k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4377
60.8k
    } else {
4378
60.8k
  NEXT;
4379
60.8k
    }
4380
64.5k
    ctxt->instate = oldstate;
4381
64.5k
    return(buf);
4382
64.5k
}
4383
4384
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4385
4386
/*
4387
 * used for the test in the inner loop of the char data testing
4388
 */
4389
static const unsigned char test_char_data[256] = {
4390
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4392
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4395
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4396
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4397
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4398
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4399
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4400
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4401
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4402
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4403
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4404
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4405
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4406
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4407
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4416
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4417
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4418
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4420
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4422
};
4423
4424
/**
4425
 * xmlParseCharData:
4426
 * @ctxt:  an XML parser context
4427
 * @cdata:  int indicating whether we are within a CDATA section
4428
 *
4429
 * DEPRECATED: Internal function, don't use.
4430
 *
4431
 * parse a CharData section.
4432
 * if we are within a CDATA section ']]>' marks an end of section.
4433
 *
4434
 * The right angle bracket (>) may be represented using the string "&gt;",
4435
 * and must, for compatibility, be escaped using "&gt;" or a character
4436
 * reference when it appears in the string "]]>" in content, when that
4437
 * string is not marking the end of a CDATA section.
4438
 *
4439
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4440
 */
4441
4442
void
4443
14.5M
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4444
14.5M
    const xmlChar *in;
4445
14.5M
    int nbchar = 0;
4446
14.5M
    int line = ctxt->input->line;
4447
14.5M
    int col = ctxt->input->col;
4448
14.5M
    int ccol;
4449
4450
14.5M
    SHRINK;
4451
14.5M
    GROW;
4452
    /*
4453
     * Accelerated common case where input don't need to be
4454
     * modified before passing it to the handler.
4455
     */
4456
14.5M
    if (!cdata) {
4457
14.5M
  in = ctxt->input->cur;
4458
17.5M
  do {
4459
24.5M
get_more_space:
4460
62.9M
      while (*in == 0x20) { in++; ctxt->input->col++; }
4461
24.5M
      if (*in == 0xA) {
4462
7.12M
    do {
4463
7.12M
        ctxt->input->line++; ctxt->input->col = 1;
4464
7.12M
        in++;
4465
7.12M
    } while (*in == 0xA);
4466
6.99M
    goto get_more_space;
4467
6.99M
      }
4468
17.5M
      if (*in == '<') {
4469
6.14M
    nbchar = in - ctxt->input->cur;
4470
6.14M
    if (nbchar > 0) {
4471
6.10M
        const xmlChar *tmp = ctxt->input->cur;
4472
6.10M
        ctxt->input->cur = in;
4473
4474
6.10M
        if ((ctxt->sax != NULL) &&
4475
6.10M
            (ctxt->sax->ignorableWhitespace !=
4476
6.10M
             ctxt->sax->characters)) {
4477
2.32M
      if (areBlanks(ctxt, tmp, nbchar, 1)) {
4478
1.84M
          if (ctxt->sax->ignorableWhitespace != NULL)
4479
1.84M
        ctxt->sax->ignorableWhitespace(ctxt->userData,
4480
1.84M
                   tmp, nbchar);
4481
1.84M
      } else {
4482
477k
          if (ctxt->sax->characters != NULL)
4483
477k
        ctxt->sax->characters(ctxt->userData,
4484
477k
                  tmp, nbchar);
4485
477k
          if (*ctxt->space == -1)
4486
130k
              *ctxt->space = -2;
4487
477k
      }
4488
3.78M
        } else if ((ctxt->sax != NULL) &&
4489
3.78M
                   (ctxt->sax->characters != NULL)) {
4490
3.78M
      ctxt->sax->characters(ctxt->userData,
4491
3.78M
                tmp, nbchar);
4492
3.78M
        }
4493
6.10M
    }
4494
6.14M
    return;
4495
6.14M
      }
4496
4497
14.0M
get_more:
4498
14.0M
            ccol = ctxt->input->col;
4499
210M
      while (test_char_data[*in]) {
4500
196M
    in++;
4501
196M
    ccol++;
4502
196M
      }
4503
14.0M
      ctxt->input->col = ccol;
4504
14.0M
      if (*in == 0xA) {
4505
2.54M
    do {
4506
2.54M
        ctxt->input->line++; ctxt->input->col = 1;
4507
2.54M
        in++;
4508
2.54M
    } while (*in == 0xA);
4509
2.45M
    goto get_more;
4510
2.45M
      }
4511
11.5M
      if (*in == ']') {
4512
145k
    if ((in[1] == ']') && (in[2] == '>')) {
4513
6.22k
        xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4514
6.22k
        ctxt->input->cur = in + 1;
4515
6.22k
        return;
4516
6.22k
    }
4517
139k
    in++;
4518
139k
    ctxt->input->col++;
4519
139k
    goto get_more;
4520
145k
      }
4521
11.4M
      nbchar = in - ctxt->input->cur;
4522
11.4M
      if (nbchar > 0) {
4523
7.59M
    if ((ctxt->sax != NULL) &&
4524
7.59M
        (ctxt->sax->ignorableWhitespace !=
4525
7.59M
         ctxt->sax->characters) &&
4526
7.59M
        (IS_BLANK_CH(*ctxt->input->cur))) {
4527
1.35M
        const xmlChar *tmp = ctxt->input->cur;
4528
1.35M
        ctxt->input->cur = in;
4529
4530
1.35M
        if (areBlanks(ctxt, tmp, nbchar, 0)) {
4531
647k
            if (ctxt->sax->ignorableWhitespace != NULL)
4532
647k
          ctxt->sax->ignorableWhitespace(ctxt->userData,
4533
647k
                 tmp, nbchar);
4534
711k
        } else {
4535
711k
            if (ctxt->sax->characters != NULL)
4536
711k
          ctxt->sax->characters(ctxt->userData,
4537
711k
              tmp, nbchar);
4538
711k
      if (*ctxt->space == -1)
4539
301k
          *ctxt->space = -2;
4540
711k
        }
4541
1.35M
                    line = ctxt->input->line;
4542
1.35M
                    col = ctxt->input->col;
4543
6.23M
    } else if (ctxt->sax != NULL) {
4544
6.23M
        if (ctxt->sax->characters != NULL)
4545
6.23M
      ctxt->sax->characters(ctxt->userData,
4546
6.23M
                ctxt->input->cur, nbchar);
4547
6.23M
                    line = ctxt->input->line;
4548
6.23M
                    col = ctxt->input->col;
4549
6.23M
    }
4550
                /* something really bad happened in the SAX callback */
4551
7.59M
                if (ctxt->instate != XML_PARSER_CONTENT)
4552
0
                    return;
4553
7.59M
      }
4554
11.4M
      ctxt->input->cur = in;
4555
11.4M
      if (*in == 0xD) {
4556
3.08M
    in++;
4557
3.08M
    if (*in == 0xA) {
4558
3.02M
        ctxt->input->cur = in;
4559
3.02M
        in++;
4560
3.02M
        ctxt->input->line++; ctxt->input->col = 1;
4561
3.02M
        continue; /* while */
4562
3.02M
    }
4563
62.6k
    in--;
4564
62.6k
      }
4565
8.38M
      if (*in == '<') {
4566
6.31M
    return;
4567
6.31M
      }
4568
2.07M
      if (*in == '&') {
4569
676k
    return;
4570
676k
      }
4571
1.39M
      SHRINK;
4572
1.39M
      GROW;
4573
1.39M
            if (ctxt->instate == XML_PARSER_EOF)
4574
0
    return;
4575
1.39M
      in = ctxt->input->cur;
4576
4.41M
  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4577
1.40M
  nbchar = 0;
4578
1.40M
    }
4579
1.40M
    ctxt->input->line = line;
4580
1.40M
    ctxt->input->col = col;
4581
1.40M
    xmlParseCharDataComplex(ctxt, cdata);
4582
1.40M
}
4583
4584
/**
4585
 * xmlParseCharDataComplex:
4586
 * @ctxt:  an XML parser context
4587
 * @cdata:  int indicating whether we are within a CDATA section
4588
 *
4589
 * parse a CharData section.this is the fallback function
4590
 * of xmlParseCharData() when the parsing requires handling
4591
 * of non-ASCII characters.
4592
 */
4593
static void
4594
1.40M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4595
1.40M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4596
1.40M
    int nbchar = 0;
4597
1.40M
    int cur, l;
4598
1.40M
    int count = 0;
4599
4600
1.40M
    SHRINK;
4601
1.40M
    GROW;
4602
1.40M
    cur = CUR_CHAR(l);
4603
32.2M
    while ((cur != '<') && /* checked */
4604
32.2M
           (cur != '&') &&
4605
32.2M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4606
30.8M
  if ((cur == ']') && (NXT(1) == ']') &&
4607
30.8M
      (NXT(2) == '>')) {
4608
4.43k
      if (cdata) break;
4609
4.43k
      else {
4610
4.43k
    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4611
4.43k
      }
4612
4.43k
  }
4613
30.8M
  COPY_BUF(l,buf,nbchar,cur);
4614
  /* move current position before possible calling of ctxt->sax->characters */
4615
30.8M
  NEXTL(l);
4616
30.8M
  cur = CUR_CHAR(l);
4617
30.8M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4618
80.7k
      buf[nbchar] = 0;
4619
4620
      /*
4621
       * OK the segment is to be consumed as chars.
4622
       */
4623
80.7k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4624
62.1k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4625
65
        if (ctxt->sax->ignorableWhitespace != NULL)
4626
65
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4627
65
                                     buf, nbchar);
4628
62.0k
    } else {
4629
62.0k
        if (ctxt->sax->characters != NULL)
4630
62.0k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4631
62.0k
        if ((ctxt->sax->characters !=
4632
62.0k
             ctxt->sax->ignorableWhitespace) &&
4633
62.0k
      (*ctxt->space == -1))
4634
2.65k
      *ctxt->space = -2;
4635
62.0k
    }
4636
62.1k
      }
4637
80.7k
      nbchar = 0;
4638
            /* something really bad happened in the SAX callback */
4639
80.7k
            if (ctxt->instate != XML_PARSER_CONTENT)
4640
0
                return;
4641
80.7k
  }
4642
30.8M
  count++;
4643
30.8M
  if (count > 50) {
4644
495k
      SHRINK;
4645
495k
      GROW;
4646
495k
      count = 0;
4647
495k
            if (ctxt->instate == XML_PARSER_EOF)
4648
0
    return;
4649
495k
  }
4650
30.8M
    }
4651
1.40M
    if (nbchar != 0) {
4652
402k
        buf[nbchar] = 0;
4653
  /*
4654
   * OK the segment is to be consumed as chars.
4655
   */
4656
402k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4657
351k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4658
2.57k
    if (ctxt->sax->ignorableWhitespace != NULL)
4659
2.57k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4660
349k
      } else {
4661
349k
    if (ctxt->sax->characters != NULL)
4662
349k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4663
349k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4664
349k
        (*ctxt->space == -1))
4665
81.1k
        *ctxt->space = -2;
4666
349k
      }
4667
351k
  }
4668
402k
    }
4669
1.40M
    if ((cur != 0) && (!IS_CHAR(cur))) {
4670
  /* Generate the error and skip the offending character */
4671
809k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4672
809k
                          "PCDATA invalid Char value %d\n",
4673
809k
                    cur);
4674
809k
  NEXTL(l);
4675
809k
    }
4676
1.40M
}
4677
4678
/**
4679
 * xmlParseExternalID:
4680
 * @ctxt:  an XML parser context
4681
 * @publicID:  a xmlChar** receiving PubidLiteral
4682
 * @strict: indicate whether we should restrict parsing to only
4683
 *          production [75], see NOTE below
4684
 *
4685
 * DEPRECATED: Internal function, don't use.
4686
 *
4687
 * Parse an External ID or a Public ID
4688
 *
4689
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4690
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4691
 *
4692
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4693
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4694
 *
4695
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4696
 *
4697
 * Returns the function returns SystemLiteral and in the second
4698
 *                case publicID receives PubidLiteral, is strict is off
4699
 *                it is possible to return NULL and have publicID set.
4700
 */
4701
4702
xmlChar *
4703
546k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4704
546k
    xmlChar *URI = NULL;
4705
4706
546k
    SHRINK;
4707
4708
546k
    *publicID = NULL;
4709
546k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4710
187k
        SKIP(6);
4711
187k
  if (SKIP_BLANKS == 0) {
4712
604
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4713
604
                     "Space required after 'SYSTEM'\n");
4714
604
  }
4715
187k
  URI = xmlParseSystemLiteral(ctxt);
4716
187k
  if (URI == NULL) {
4717
938
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4718
938
        }
4719
359k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4720
65.4k
        SKIP(6);
4721
65.4k
  if (SKIP_BLANKS == 0) {
4722
611
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4723
611
        "Space required after 'PUBLIC'\n");
4724
611
  }
4725
65.4k
  *publicID = xmlParsePubidLiteral(ctxt);
4726
65.4k
  if (*publicID == NULL) {
4727
893
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4728
893
  }
4729
65.4k
  if (strict) {
4730
      /*
4731
       * We don't handle [83] so "S SystemLiteral" is required.
4732
       */
4733
62.2k
      if (SKIP_BLANKS == 0) {
4734
5.14k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4735
5.14k
      "Space required after the Public Identifier\n");
4736
5.14k
      }
4737
62.2k
  } else {
4738
      /*
4739
       * We handle [83] so we return immediately, if
4740
       * "S SystemLiteral" is not detected. We skip blanks if no
4741
             * system literal was found, but this is harmless since we must
4742
             * be at the end of a NotationDecl.
4743
       */
4744
3.25k
      if (SKIP_BLANKS == 0) return(NULL);
4745
353
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4746
353
  }
4747
62.3k
  URI = xmlParseSystemLiteral(ctxt);
4748
62.3k
  if (URI == NULL) {
4749
5.62k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4750
5.62k
        }
4751
62.3k
    }
4752
542k
    return(URI);
4753
546k
}
4754
4755
/**
4756
 * xmlParseCommentComplex:
4757
 * @ctxt:  an XML parser context
4758
 * @buf:  the already parsed part of the buffer
4759
 * @len:  number of bytes in the buffer
4760
 * @size:  allocated size of the buffer
4761
 *
4762
 * Skip an XML (SGML) comment <!-- .... -->
4763
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4764
 *  must not occur within comments. "
4765
 * This is the slow routine in case the accelerator for ascii didn't work
4766
 *
4767
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4768
 */
4769
static void
4770
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4771
71.6k
                       size_t len, size_t size) {
4772
71.6k
    int q, ql;
4773
71.6k
    int r, rl;
4774
71.6k
    int cur, l;
4775
71.6k
    size_t count = 0;
4776
71.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4777
22.8k
                       XML_MAX_HUGE_LENGTH :
4778
71.6k
                       XML_MAX_TEXT_LENGTH;
4779
71.6k
    int inputid;
4780
4781
71.6k
    inputid = ctxt->input->id;
4782
4783
71.6k
    if (buf == NULL) {
4784
3.28k
        len = 0;
4785
3.28k
  size = XML_PARSER_BUFFER_SIZE;
4786
3.28k
  buf = (xmlChar *) xmlMallocAtomic(size);
4787
3.28k
  if (buf == NULL) {
4788
0
      xmlErrMemory(ctxt, NULL);
4789
0
      return;
4790
0
  }
4791
3.28k
    }
4792
71.6k
    GROW; /* Assure there's enough input data */
4793
71.6k
    q = CUR_CHAR(ql);
4794
71.6k
    if (q == 0)
4795
8.25k
        goto not_terminated;
4796
63.3k
    if (!IS_CHAR(q)) {
4797
8.59k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4798
8.59k
                          "xmlParseComment: invalid xmlChar value %d\n",
4799
8.59k
                    q);
4800
8.59k
  xmlFree (buf);
4801
8.59k
  return;
4802
8.59k
    }
4803
54.7k
    NEXTL(ql);
4804
54.7k
    r = CUR_CHAR(rl);
4805
54.7k
    if (r == 0)
4806
783
        goto not_terminated;
4807
53.9k
    if (!IS_CHAR(r)) {
4808
742
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4809
742
                          "xmlParseComment: invalid xmlChar value %d\n",
4810
742
                    q);
4811
742
  xmlFree (buf);
4812
742
  return;
4813
742
    }
4814
53.2k
    NEXTL(rl);
4815
53.2k
    cur = CUR_CHAR(l);
4816
53.2k
    if (cur == 0)
4817
304
        goto not_terminated;
4818
12.1M
    while (IS_CHAR(cur) && /* checked */
4819
12.1M
           ((cur != '>') ||
4820
12.1M
      (r != '-') || (q != '-'))) {
4821
12.0M
  if ((r == '-') && (q == '-')) {
4822
19.6k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4823
19.6k
  }
4824
12.0M
  if (len + 5 >= size) {
4825
25.1k
      xmlChar *new_buf;
4826
25.1k
            size_t new_size;
4827
4828
25.1k
      new_size = size * 2;
4829
25.1k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4830
25.1k
      if (new_buf == NULL) {
4831
0
    xmlFree (buf);
4832
0
    xmlErrMemory(ctxt, NULL);
4833
0
    return;
4834
0
      }
4835
25.1k
      buf = new_buf;
4836
25.1k
            size = new_size;
4837
25.1k
  }
4838
12.0M
  COPY_BUF(ql,buf,len,q);
4839
12.0M
  q = r;
4840
12.0M
  ql = rl;
4841
12.0M
  r = cur;
4842
12.0M
  rl = l;
4843
4844
12.0M
  count++;
4845
12.0M
  if (count > 50) {
4846
219k
      SHRINK;
4847
219k
      GROW;
4848
219k
      count = 0;
4849
219k
            if (ctxt->instate == XML_PARSER_EOF) {
4850
0
    xmlFree(buf);
4851
0
    return;
4852
0
            }
4853
219k
  }
4854
12.0M
  NEXTL(l);
4855
12.0M
  cur = CUR_CHAR(l);
4856
12.0M
  if (cur == 0) {
4857
4.09k
      SHRINK;
4858
4.09k
      GROW;
4859
4.09k
      cur = CUR_CHAR(l);
4860
4.09k
  }
4861
4862
12.0M
        if (len > maxLength) {
4863
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4864
0
                         "Comment too big found", NULL);
4865
0
            xmlFree (buf);
4866
0
            return;
4867
0
        }
4868
12.0M
    }
4869
52.9k
    buf[len] = 0;
4870
52.9k
    if (cur == 0) {
4871
4.09k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872
4.09k
                       "Comment not terminated \n<!--%.50s\n", buf);
4873
48.8k
    } else if (!IS_CHAR(cur)) {
4874
3.95k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4875
3.95k
                          "xmlParseComment: invalid xmlChar value %d\n",
4876
3.95k
                    cur);
4877
44.8k
    } else {
4878
44.8k
  if (inputid != ctxt->input->id) {
4879
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4880
0
               "Comment doesn't start and stop in the same"
4881
0
                           " entity\n");
4882
0
  }
4883
44.8k
        NEXT;
4884
44.8k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4885
44.8k
      (!ctxt->disableSAX))
4886
35.2k
      ctxt->sax->comment(ctxt->userData, buf);
4887
44.8k
    }
4888
52.9k
    xmlFree(buf);
4889
52.9k
    return;
4890
9.34k
not_terminated:
4891
9.34k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4892
9.34k
       "Comment not terminated\n", NULL);
4893
9.34k
    xmlFree(buf);
4894
9.34k
    return;
4895
52.9k
}
4896
4897
/**
4898
 * xmlParseComment:
4899
 * @ctxt:  an XML parser context
4900
 *
4901
 * DEPRECATED: Internal function, don't use.
4902
 *
4903
 * Skip an XML (SGML) comment <!-- .... -->
4904
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4905
 *  must not occur within comments. "
4906
 *
4907
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4908
 */
4909
void
4910
875k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4911
875k
    xmlChar *buf = NULL;
4912
875k
    size_t size = XML_PARSER_BUFFER_SIZE;
4913
875k
    size_t len = 0;
4914
875k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4915
280k
                       XML_MAX_HUGE_LENGTH :
4916
875k
                       XML_MAX_TEXT_LENGTH;
4917
875k
    xmlParserInputState state;
4918
875k
    const xmlChar *in;
4919
875k
    size_t nbchar = 0;
4920
875k
    int ccol;
4921
875k
    int inputid;
4922
4923
    /*
4924
     * Check that there is a comment right here.
4925
     */
4926
875k
    if ((RAW != '<') || (NXT(1) != '!') ||
4927
875k
        (NXT(2) != '-') || (NXT(3) != '-')) return;
4928
874k
    state = ctxt->instate;
4929
874k
    ctxt->instate = XML_PARSER_COMMENT;
4930
874k
    inputid = ctxt->input->id;
4931
874k
    SKIP(4);
4932
874k
    SHRINK;
4933
874k
    GROW;
4934
4935
    /*
4936
     * Accelerated common case where input don't need to be
4937
     * modified before passing it to the handler.
4938
     */
4939
874k
    in = ctxt->input->cur;
4940
874k
    do {
4941
874k
  if (*in == 0xA) {
4942
115k
      do {
4943
115k
    ctxt->input->line++; ctxt->input->col = 1;
4944
115k
    in++;
4945
115k
      } while (*in == 0xA);
4946
105k
  }
4947
4.23M
get_more:
4948
4.23M
        ccol = ctxt->input->col;
4949
135M
  while (((*in > '-') && (*in <= 0x7F)) ||
4950
135M
         ((*in >= 0x20) && (*in < '-')) ||
4951
135M
         (*in == 0x09)) {
4952
130M
        in++;
4953
130M
        ccol++;
4954
130M
  }
4955
4.23M
  ctxt->input->col = ccol;
4956
4.23M
  if (*in == 0xA) {
4957
1.37M
      do {
4958
1.37M
    ctxt->input->line++; ctxt->input->col = 1;
4959
1.37M
    in++;
4960
1.37M
      } while (*in == 0xA);
4961
1.28M
      goto get_more;
4962
1.28M
  }
4963
2.94M
  nbchar = in - ctxt->input->cur;
4964
  /*
4965
   * save current set of data
4966
   */
4967
2.94M
  if (nbchar > 0) {
4968
2.92M
      if ((ctxt->sax != NULL) &&
4969
2.92M
    (ctxt->sax->comment != NULL)) {
4970
2.92M
    if (buf == NULL) {
4971
869k
        if ((*in == '-') && (in[1] == '-'))
4972
531k
            size = nbchar + 1;
4973
337k
        else
4974
337k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4975
869k
        buf = (xmlChar *) xmlMallocAtomic(size);
4976
869k
        if (buf == NULL) {
4977
0
            xmlErrMemory(ctxt, NULL);
4978
0
      ctxt->instate = state;
4979
0
      return;
4980
0
        }
4981
869k
        len = 0;
4982
2.05M
    } else if (len + nbchar + 1 >= size) {
4983
272k
        xmlChar *new_buf;
4984
272k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4985
272k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4986
272k
        if (new_buf == NULL) {
4987
0
            xmlFree (buf);
4988
0
      xmlErrMemory(ctxt, NULL);
4989
0
      ctxt->instate = state;
4990
0
      return;
4991
0
        }
4992
272k
        buf = new_buf;
4993
272k
    }
4994
2.92M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4995
2.92M
    len += nbchar;
4996
2.92M
    buf[len] = 0;
4997
2.92M
      }
4998
2.92M
  }
4999
2.94M
        if (len > maxLength) {
5000
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5001
0
                         "Comment too big found", NULL);
5002
0
            xmlFree (buf);
5003
0
            return;
5004
0
        }
5005
2.94M
  ctxt->input->cur = in;
5006
2.94M
  if (*in == 0xA) {
5007
0
      in++;
5008
0
      ctxt->input->line++; ctxt->input->col = 1;
5009
0
  }
5010
2.94M
  if (*in == 0xD) {
5011
1.03M
      in++;
5012
1.03M
      if (*in == 0xA) {
5013
1.03M
    ctxt->input->cur = in;
5014
1.03M
    in++;
5015
1.03M
    ctxt->input->line++; ctxt->input->col = 1;
5016
1.03M
    goto get_more;
5017
1.03M
      }
5018
2.54k
      in--;
5019
2.54k
  }
5020
1.91M
  SHRINK;
5021
1.91M
  GROW;
5022
1.91M
        if (ctxt->instate == XML_PARSER_EOF) {
5023
0
            xmlFree(buf);
5024
0
            return;
5025
0
        }
5026
1.91M
  in = ctxt->input->cur;
5027
1.91M
  if (*in == '-') {
5028
1.84M
      if (in[1] == '-') {
5029
826k
          if (in[2] == '>') {
5030
802k
        if (ctxt->input->id != inputid) {
5031
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5032
0
                     "comment doesn't start and stop in the"
5033
0
                                       " same entity\n");
5034
0
        }
5035
802k
        SKIP(3);
5036
802k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5037
802k
            (!ctxt->disableSAX)) {
5038
676k
      if (buf != NULL)
5039
674k
          ctxt->sax->comment(ctxt->userData, buf);
5040
1.58k
      else
5041
1.58k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5042
676k
        }
5043
802k
        if (buf != NULL)
5044
800k
            xmlFree(buf);
5045
802k
        if (ctxt->instate != XML_PARSER_EOF)
5046
802k
      ctxt->instate = state;
5047
802k
        return;
5048
802k
    }
5049
24.1k
    if (buf != NULL) {
5050
23.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5051
23.5k
                          "Double hyphen within comment: "
5052
23.5k
                                      "<!--%.50s\n",
5053
23.5k
              buf);
5054
23.5k
    } else
5055
596
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5056
596
                          "Double hyphen within comment\n", NULL);
5057
24.1k
                if (ctxt->instate == XML_PARSER_EOF) {
5058
0
                    xmlFree(buf);
5059
0
                    return;
5060
0
                }
5061
24.1k
    in++;
5062
24.1k
    ctxt->input->col++;
5063
24.1k
      }
5064
1.04M
      in++;
5065
1.04M
      ctxt->input->col++;
5066
1.04M
      goto get_more;
5067
1.84M
  }
5068
1.91M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5069
71.6k
    xmlParseCommentComplex(ctxt, buf, len, size);
5070
71.6k
    ctxt->instate = state;
5071
71.6k
    return;
5072
874k
}
5073
5074
5075
/**
5076
 * xmlParsePITarget:
5077
 * @ctxt:  an XML parser context
5078
 *
5079
 * DEPRECATED: Internal function, don't use.
5080
 *
5081
 * parse the name of a PI
5082
 *
5083
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5084
 *
5085
 * Returns the PITarget name or NULL
5086
 */
5087
5088
const xmlChar *
5089
142k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5090
142k
    const xmlChar *name;
5091
5092
142k
    name = xmlParseName(ctxt);
5093
142k
    if ((name != NULL) &&
5094
142k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5095
142k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5096
142k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5097
26.0k
  int i;
5098
26.0k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5099
26.0k
      (name[2] == 'l') && (name[3] == 0)) {
5100
16.7k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5101
16.7k
     "XML declaration allowed only at the start of the document\n");
5102
16.7k
      return(name);
5103
16.7k
  } else if (name[3] == 0) {
5104
1.04k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5105
1.04k
      return(name);
5106
1.04k
  }
5107
16.1k
  for (i = 0;;i++) {
5108
16.1k
      if (xmlW3CPIs[i] == NULL) break;
5109
12.2k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5110
4.42k
          return(name);
5111
12.2k
  }
5112
3.89k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5113
3.89k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5114
3.89k
          NULL, NULL);
5115
3.89k
    }
5116
120k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5117
1.78k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5118
1.78k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5119
1.78k
    }
5120
120k
    return(name);
5121
142k
}
5122
5123
#ifdef LIBXML_CATALOG_ENABLED
5124
/**
5125
 * xmlParseCatalogPI:
5126
 * @ctxt:  an XML parser context
5127
 * @catalog:  the PI value string
5128
 *
5129
 * parse an XML Catalog Processing Instruction.
5130
 *
5131
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5132
 *
5133
 * Occurs only if allowed by the user and if happening in the Misc
5134
 * part of the document before any doctype information
5135
 * This will add the given catalog to the parsing context in order
5136
 * to be used if there is a resolution need further down in the document
5137
 */
5138
5139
static void
5140
412
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5141
412
    xmlChar *URL = NULL;
5142
412
    const xmlChar *tmp, *base;
5143
412
    xmlChar marker;
5144
5145
412
    tmp = catalog;
5146
412
    while (IS_BLANK_CH(*tmp)) tmp++;
5147
412
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5148
64
  goto error;
5149
348
    tmp += 7;
5150
959
    while (IS_BLANK_CH(*tmp)) tmp++;
5151
348
    if (*tmp != '=') {
5152
130
  return;
5153
130
    }
5154
218
    tmp++;
5155
2.01k
    while (IS_BLANK_CH(*tmp)) tmp++;
5156
218
    marker = *tmp;
5157
218
    if ((marker != '\'') && (marker != '"'))
5158
69
  goto error;
5159
149
    tmp++;
5160
149
    base = tmp;
5161
4.13k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5162
149
    if (*tmp == 0)
5163
27
  goto error;
5164
122
    URL = xmlStrndup(base, tmp - base);
5165
122
    tmp++;
5166
603
    while (IS_BLANK_CH(*tmp)) tmp++;
5167
122
    if (*tmp != 0)
5168
80
  goto error;
5169
5170
42
    if (URL != NULL) {
5171
42
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5172
42
  xmlFree(URL);
5173
42
    }
5174
42
    return;
5175
5176
240
error:
5177
240
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5178
240
            "Catalog PI syntax error: %s\n",
5179
240
      catalog, NULL);
5180
240
    if (URL != NULL)
5181
80
  xmlFree(URL);
5182
240
}
5183
#endif
5184
5185
/**
5186
 * xmlParsePI:
5187
 * @ctxt:  an XML parser context
5188
 *
5189
 * DEPRECATED: Internal function, don't use.
5190
 *
5191
 * parse an XML Processing Instruction.
5192
 *
5193
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5194
 *
5195
 * The processing is transferred to SAX once parsed.
5196
 */
5197
5198
void
5199
142k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5200
142k
    xmlChar *buf = NULL;
5201
142k
    size_t len = 0;
5202
142k
    size_t size = XML_PARSER_BUFFER_SIZE;
5203
142k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5204
62.3k
                       XML_MAX_HUGE_LENGTH :
5205
142k
                       XML_MAX_TEXT_LENGTH;
5206
142k
    int cur, l;
5207
142k
    const xmlChar *target;
5208
142k
    xmlParserInputState state;
5209
142k
    int count = 0;
5210
5211
142k
    if ((RAW == '<') && (NXT(1) == '?')) {
5212
142k
  int inputid = ctxt->input->id;
5213
142k
  state = ctxt->instate;
5214
142k
        ctxt->instate = XML_PARSER_PI;
5215
  /*
5216
   * this is a Processing Instruction.
5217
   */
5218
142k
  SKIP(2);
5219
142k
  SHRINK;
5220
5221
  /*
5222
   * Parse the target name and check for special support like
5223
   * namespace.
5224
   */
5225
142k
        target = xmlParsePITarget(ctxt);
5226
142k
  if (target != NULL) {
5227
136k
      if ((RAW == '?') && (NXT(1) == '>')) {
5228
22.1k
    if (inputid != ctxt->input->id) {
5229
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5230
0
                             "PI declaration doesn't start and stop in"
5231
0
                                   " the same entity\n");
5232
0
    }
5233
22.1k
    SKIP(2);
5234
5235
    /*
5236
     * SAX: PI detected.
5237
     */
5238
22.1k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5239
22.1k
        (ctxt->sax->processingInstruction != NULL))
5240
21.2k
        ctxt->sax->processingInstruction(ctxt->userData,
5241
21.2k
                                         target, NULL);
5242
22.1k
    if (ctxt->instate != XML_PARSER_EOF)
5243
22.1k
        ctxt->instate = state;
5244
22.1k
    return;
5245
22.1k
      }
5246
114k
      buf = (xmlChar *) xmlMallocAtomic(size);
5247
114k
      if (buf == NULL) {
5248
0
    xmlErrMemory(ctxt, NULL);
5249
0
    ctxt->instate = state;
5250
0
    return;
5251
0
      }
5252
114k
      if (SKIP_BLANKS == 0) {
5253
26.5k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5254
26.5k
        "ParsePI: PI %s space expected\n", target);
5255
26.5k
      }
5256
114k
      cur = CUR_CHAR(l);
5257
10.6M
      while (IS_CHAR(cur) && /* checked */
5258
10.6M
       ((cur != '?') || (NXT(1) != '>'))) {
5259
10.5M
    if (len + 5 >= size) {
5260
19.2k
        xmlChar *tmp;
5261
19.2k
                    size_t new_size = size * 2;
5262
19.2k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5263
19.2k
        if (tmp == NULL) {
5264
0
      xmlErrMemory(ctxt, NULL);
5265
0
      xmlFree(buf);
5266
0
      ctxt->instate = state;
5267
0
      return;
5268
0
        }
5269
19.2k
        buf = tmp;
5270
19.2k
                    size = new_size;
5271
19.2k
    }
5272
10.5M
    count++;
5273
10.5M
    if (count > 50) {
5274
172k
        SHRINK;
5275
172k
        GROW;
5276
172k
                    if (ctxt->instate == XML_PARSER_EOF) {
5277
0
                        xmlFree(buf);
5278
0
                        return;
5279
0
                    }
5280
172k
        count = 0;
5281
172k
    }
5282
10.5M
    COPY_BUF(l,buf,len,cur);
5283
10.5M
    NEXTL(l);
5284
10.5M
    cur = CUR_CHAR(l);
5285
10.5M
    if (cur == 0) {
5286
8.59k
        SHRINK;
5287
8.59k
        GROW;
5288
8.59k
        cur = CUR_CHAR(l);
5289
8.59k
    }
5290
10.5M
                if (len > maxLength) {
5291
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5292
0
                                      "PI %s too big found", target);
5293
0
                    xmlFree(buf);
5294
0
                    ctxt->instate = state;
5295
0
                    return;
5296
0
                }
5297
10.5M
      }
5298
114k
      buf[len] = 0;
5299
114k
      if (cur != '?') {
5300
18.3k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5301
18.3k
          "ParsePI: PI %s never end ...\n", target);
5302
96.2k
      } else {
5303
96.2k
    if (inputid != ctxt->input->id) {
5304
7
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5305
7
                             "PI declaration doesn't start and stop in"
5306
7
                                   " the same entity\n");
5307
7
    }
5308
96.2k
    SKIP(2);
5309
5310
96.2k
#ifdef LIBXML_CATALOG_ENABLED
5311
96.2k
    if (((state == XML_PARSER_MISC) ||
5312
96.2k
               (state == XML_PARSER_START)) &&
5313
96.2k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5314
412
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5315
412
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5316
412
      (allow == XML_CATA_ALLOW_ALL))
5317
412
      xmlParseCatalogPI(ctxt, buf);
5318
412
    }
5319
96.2k
#endif
5320
5321
5322
    /*
5323
     * SAX: PI detected.
5324
     */
5325
96.2k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5326
96.2k
        (ctxt->sax->processingInstruction != NULL))
5327
82.2k
        ctxt->sax->processingInstruction(ctxt->userData,
5328
82.2k
                                         target, buf);
5329
96.2k
      }
5330
114k
      xmlFree(buf);
5331
114k
  } else {
5332
5.91k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5333
5.91k
  }
5334
120k
  if (ctxt->instate != XML_PARSER_EOF)
5335
120k
      ctxt->instate = state;
5336
120k
    }
5337
142k
}
5338
5339
/**
5340
 * xmlParseNotationDecl:
5341
 * @ctxt:  an XML parser context
5342
 *
5343
 * DEPRECATED: Internal function, don't use.
5344
 *
5345
 * parse a notation declaration
5346
 *
5347
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5348
 *
5349
 * Hence there is actually 3 choices:
5350
 *     'PUBLIC' S PubidLiteral
5351
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5352
 * and 'SYSTEM' S SystemLiteral
5353
 *
5354
 * See the NOTE on xmlParseExternalID().
5355
 */
5356
5357
void
5358
9.09k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5359
9.09k
    const xmlChar *name;
5360
9.09k
    xmlChar *Pubid;
5361
9.09k
    xmlChar *Systemid;
5362
5363
9.09k
    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5364
6.62k
  int inputid = ctxt->input->id;
5365
6.62k
  SHRINK;
5366
6.62k
  SKIP(10);
5367
6.62k
  if (SKIP_BLANKS == 0) {
5368
338
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5369
338
         "Space required after '<!NOTATION'\n");
5370
338
      return;
5371
338
  }
5372
5373
6.28k
        name = xmlParseName(ctxt);
5374
6.28k
  if (name == NULL) {
5375
220
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5376
220
      return;
5377
220
  }
5378
6.06k
  if (xmlStrchr(name, ':') != NULL) {
5379
161
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5380
161
         "colons are forbidden from notation names '%s'\n",
5381
161
         name, NULL, NULL);
5382
161
  }
5383
6.06k
  if (SKIP_BLANKS == 0) {
5384
327
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385
327
         "Space required after the NOTATION name'\n");
5386
327
      return;
5387
327
  }
5388
5389
  /*
5390
   * Parse the IDs.
5391
   */
5392
5.73k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5393
5.73k
  SKIP_BLANKS;
5394
5395
5.73k
  if (RAW == '>') {
5396
4.60k
      if (inputid != ctxt->input->id) {
5397
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5398
0
                         "Notation declaration doesn't start and stop"
5399
0
                               " in the same entity\n");
5400
0
      }
5401
4.60k
      NEXT;
5402
4.60k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5403
4.60k
    (ctxt->sax->notationDecl != NULL))
5404
3.87k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5405
4.60k
  } else {
5406
1.13k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5407
1.13k
  }
5408
5.73k
  if (Systemid != NULL) xmlFree(Systemid);
5409
5.73k
  if (Pubid != NULL) xmlFree(Pubid);
5410
5.73k
    }
5411
9.09k
}
5412
5413
/**
5414
 * xmlParseEntityDecl:
5415
 * @ctxt:  an XML parser context
5416
 *
5417
 * DEPRECATED: Internal function, don't use.
5418
 *
5419
 * parse <!ENTITY declarations
5420
 *
5421
 * [70] EntityDecl ::= GEDecl | PEDecl
5422
 *
5423
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5424
 *
5425
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5426
 *
5427
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5428
 *
5429
 * [74] PEDef ::= EntityValue | ExternalID
5430
 *
5431
 * [76] NDataDecl ::= S 'NDATA' S Name
5432
 *
5433
 * [ VC: Notation Declared ]
5434
 * The Name must match the declared name of a notation.
5435
 */
5436
5437
void
5438
1.27M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5439
1.27M
    const xmlChar *name = NULL;
5440
1.27M
    xmlChar *value = NULL;
5441
1.27M
    xmlChar *URI = NULL, *literal = NULL;
5442
1.27M
    const xmlChar *ndata = NULL;
5443
1.27M
    int isParameter = 0;
5444
1.27M
    xmlChar *orig = NULL;
5445
5446
    /* GROW; done in the caller */
5447
1.27M
    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5448
1.26M
  int inputid = ctxt->input->id;
5449
1.26M
  SHRINK;
5450
1.26M
  SKIP(8);
5451
1.26M
  if (SKIP_BLANKS == 0) {
5452
2.35k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5453
2.35k
         "Space required after '<!ENTITY'\n");
5454
2.35k
  }
5455
5456
1.26M
  if (RAW == '%') {
5457
530k
      NEXT;
5458
530k
      if (SKIP_BLANKS == 0) {
5459
566
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
566
             "Space required after '%%'\n");
5461
566
      }
5462
530k
      isParameter = 1;
5463
530k
  }
5464
5465
1.26M
        name = xmlParseName(ctxt);
5466
1.26M
  if (name == NULL) {
5467
5.78k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5468
5.78k
                     "xmlParseEntityDecl: no name\n");
5469
5.78k
            return;
5470
5.78k
  }
5471
1.26M
  if (xmlStrchr(name, ':') != NULL) {
5472
628
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5473
628
         "colons are forbidden from entities names '%s'\n",
5474
628
         name, NULL, NULL);
5475
628
  }
5476
1.26M
  if (SKIP_BLANKS == 0) {
5477
5.59k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478
5.59k
         "Space required after the entity name\n");
5479
5.59k
  }
5480
5481
1.26M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5482
  /*
5483
   * handle the various case of definitions...
5484
   */
5485
1.26M
  if (isParameter) {
5486
529k
      if ((RAW == '"') || (RAW == '\'')) {
5487
499k
          value = xmlParseEntityValue(ctxt, &orig);
5488
499k
    if (value) {
5489
493k
        if ((ctxt->sax != NULL) &&
5490
493k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5491
426k
      ctxt->sax->entityDecl(ctxt->userData, name,
5492
426k
                        XML_INTERNAL_PARAMETER_ENTITY,
5493
426k
            NULL, NULL, value);
5494
493k
    }
5495
499k
      } else {
5496
29.8k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5497
29.8k
    if ((URI == NULL) && (literal == NULL)) {
5498
1.63k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5499
1.63k
    }
5500
29.8k
    if (URI) {
5501
28.0k
        xmlURIPtr uri;
5502
5503
28.0k
        uri = xmlParseURI((const char *) URI);
5504
28.0k
        if (uri == NULL) {
5505
925
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5506
925
             "Invalid URI: %s\n", URI);
5507
      /*
5508
       * This really ought to be a well formedness error
5509
       * but the XML Core WG decided otherwise c.f. issue
5510
       * E26 of the XML erratas.
5511
       */
5512
27.0k
        } else {
5513
27.0k
      if (uri->fragment != NULL) {
5514
          /*
5515
           * Okay this is foolish to block those but not
5516
           * invalid URIs.
5517
           */
5518
185
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5519
26.9k
      } else {
5520
26.9k
          if ((ctxt->sax != NULL) &&
5521
26.9k
        (!ctxt->disableSAX) &&
5522
26.9k
        (ctxt->sax->entityDecl != NULL))
5523
23.1k
        ctxt->sax->entityDecl(ctxt->userData, name,
5524
23.1k
              XML_EXTERNAL_PARAMETER_ENTITY,
5525
23.1k
              literal, URI, NULL);
5526
26.9k
      }
5527
27.0k
      xmlFreeURI(uri);
5528
27.0k
        }
5529
28.0k
    }
5530
29.8k
      }
5531
733k
  } else {
5532
733k
      if ((RAW == '"') || (RAW == '\'')) {
5533
680k
          value = xmlParseEntityValue(ctxt, &orig);
5534
680k
    if ((ctxt->sax != NULL) &&
5535
680k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5536
607k
        ctxt->sax->entityDecl(ctxt->userData, name,
5537
607k
        XML_INTERNAL_GENERAL_ENTITY,
5538
607k
        NULL, NULL, value);
5539
    /*
5540
     * For expat compatibility in SAX mode.
5541
     */
5542
680k
    if ((ctxt->myDoc == NULL) ||
5543
680k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5544
8.60k
        if (ctxt->myDoc == NULL) {
5545
1.89k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5546
1.89k
      if (ctxt->myDoc == NULL) {
5547
0
          xmlErrMemory(ctxt, "New Doc failed");
5548
0
          return;
5549
0
      }
5550
1.89k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5551
1.89k
        }
5552
8.60k
        if (ctxt->myDoc->intSubset == NULL)
5553
1.89k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5554
1.89k
              BAD_CAST "fake", NULL, NULL);
5555
5556
8.60k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5557
8.60k
                    NULL, NULL, value);
5558
8.60k
    }
5559
680k
      } else {
5560
52.7k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5561
52.7k
    if ((URI == NULL) && (literal == NULL)) {
5562
7.06k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5563
7.06k
    }
5564
52.7k
    if (URI) {
5565
44.4k
        xmlURIPtr uri;
5566
5567
44.4k
        uri = xmlParseURI((const char *)URI);
5568
44.4k
        if (uri == NULL) {
5569
2.09k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5570
2.09k
             "Invalid URI: %s\n", URI);
5571
      /*
5572
       * This really ought to be a well formedness error
5573
       * but the XML Core WG decided otherwise c.f. issue
5574
       * E26 of the XML erratas.
5575
       */
5576
42.3k
        } else {
5577
42.3k
      if (uri->fragment != NULL) {
5578
          /*
5579
           * Okay this is foolish to block those but not
5580
           * invalid URIs.
5581
           */
5582
356
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5583
356
      }
5584
42.3k
      xmlFreeURI(uri);
5585
42.3k
        }
5586
44.4k
    }
5587
52.7k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5588
6.79k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5589
6.79k
           "Space required before 'NDATA'\n");
5590
6.79k
    }
5591
52.7k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5592
11.3k
        SKIP(5);
5593
11.3k
        if (SKIP_BLANKS == 0) {
5594
395
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5595
395
               "Space required after 'NDATA'\n");
5596
395
        }
5597
11.3k
        ndata = xmlParseName(ctxt);
5598
11.3k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5599
11.3k
            (ctxt->sax->unparsedEntityDecl != NULL))
5600
10.5k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5601
10.5k
            literal, URI, ndata);
5602
41.3k
    } else {
5603
41.3k
        if ((ctxt->sax != NULL) &&
5604
41.3k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5605
34.5k
      ctxt->sax->entityDecl(ctxt->userData, name,
5606
34.5k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5607
34.5k
            literal, URI, NULL);
5608
        /*
5609
         * For expat compatibility in SAX mode.
5610
         * assuming the entity replacement was asked for
5611
         */
5612
41.3k
        if ((ctxt->replaceEntities != 0) &&
5613
41.3k
      ((ctxt->myDoc == NULL) ||
5614
25.7k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5615
728
      if (ctxt->myDoc == NULL) {
5616
334
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5617
334
          if (ctxt->myDoc == NULL) {
5618
0
              xmlErrMemory(ctxt, "New Doc failed");
5619
0
        return;
5620
0
          }
5621
334
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5622
334
      }
5623
5624
728
      if (ctxt->myDoc->intSubset == NULL)
5625
334
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5626
334
            BAD_CAST "fake", NULL, NULL);
5627
728
      xmlSAX2EntityDecl(ctxt, name,
5628
728
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5629
728
                  literal, URI, NULL);
5630
728
        }
5631
41.3k
    }
5632
52.7k
      }
5633
733k
  }
5634
1.26M
  if (ctxt->instate == XML_PARSER_EOF)
5635
0
      goto done;
5636
1.26M
  SKIP_BLANKS;
5637
1.26M
  if (RAW != '>') {
5638
18.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5639
18.9k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5640
18.9k
      xmlHaltParser(ctxt);
5641
1.24M
  } else {
5642
1.24M
      if (inputid != ctxt->input->id) {
5643
135
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5644
135
                         "Entity declaration doesn't start and stop in"
5645
135
                               " the same entity\n");
5646
135
      }
5647
1.24M
      NEXT;
5648
1.24M
  }
5649
1.26M
  if (orig != NULL) {
5650
      /*
5651
       * Ugly mechanism to save the raw entity value.
5652
       */
5653
1.16M
      xmlEntityPtr cur = NULL;
5654
5655
1.16M
      if (isParameter) {
5656
494k
          if ((ctxt->sax != NULL) &&
5657
494k
        (ctxt->sax->getParameterEntity != NULL))
5658
494k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5659
673k
      } else {
5660
673k
          if ((ctxt->sax != NULL) &&
5661
673k
        (ctxt->sax->getEntity != NULL))
5662
673k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5663
673k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5664
56.3k
        cur = xmlSAX2GetEntity(ctxt, name);
5665
56.3k
    }
5666
673k
      }
5667
1.16M
            if ((cur != NULL) && (cur->orig == NULL)) {
5668
971k
    cur->orig = orig;
5669
971k
                orig = NULL;
5670
971k
      }
5671
1.16M
  }
5672
5673
1.26M
done:
5674
1.26M
  if (value != NULL) xmlFree(value);
5675
1.26M
  if (URI != NULL) xmlFree(URI);
5676
1.26M
  if (literal != NULL) xmlFree(literal);
5677
1.26M
        if (orig != NULL) xmlFree(orig);
5678
1.26M
    }
5679
1.27M
}
5680
5681
/**
5682
 * xmlParseDefaultDecl:
5683
 * @ctxt:  an XML parser context
5684
 * @value:  Receive a possible fixed default value for the attribute
5685
 *
5686
 * DEPRECATED: Internal function, don't use.
5687
 *
5688
 * Parse an attribute default declaration
5689
 *
5690
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5691
 *
5692
 * [ VC: Required Attribute ]
5693
 * if the default declaration is the keyword #REQUIRED, then the
5694
 * attribute must be specified for all elements of the type in the
5695
 * attribute-list declaration.
5696
 *
5697
 * [ VC: Attribute Default Legal ]
5698
 * The declared default value must meet the lexical constraints of
5699
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5700
 *
5701
 * [ VC: Fixed Attribute Default ]
5702
 * if an attribute has a default value declared with the #FIXED
5703
 * keyword, instances of that attribute must match the default value.
5704
 *
5705
 * [ WFC: No < in Attribute Values ]
5706
 * handled in xmlParseAttValue()
5707
 *
5708
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5709
 *          or XML_ATTRIBUTE_FIXED.
5710
 */
5711
5712
int
5713
1.72M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5714
1.72M
    int val;
5715
1.72M
    xmlChar *ret;
5716
5717
1.72M
    *value = NULL;
5718
1.72M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5719
515k
  SKIP(9);
5720
515k
  return(XML_ATTRIBUTE_REQUIRED);
5721
515k
    }
5722
1.21M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5723
1.01M
  SKIP(8);
5724
1.01M
  return(XML_ATTRIBUTE_IMPLIED);
5725
1.01M
    }
5726
199k
    val = XML_ATTRIBUTE_NONE;
5727
199k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5728
97.1k
  SKIP(6);
5729
97.1k
  val = XML_ATTRIBUTE_FIXED;
5730
97.1k
  if (SKIP_BLANKS == 0) {
5731
413
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
413
         "Space required after '#FIXED'\n");
5733
413
  }
5734
97.1k
    }
5735
199k
    ret = xmlParseAttValue(ctxt);
5736
199k
    ctxt->instate = XML_PARSER_DTD;
5737
199k
    if (ret == NULL) {
5738
7.03k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5739
7.03k
           "Attribute default value declaration error\n");
5740
7.03k
    } else
5741
192k
        *value = ret;
5742
199k
    return(val);
5743
1.21M
}
5744
5745
/**
5746
 * xmlParseNotationType:
5747
 * @ctxt:  an XML parser context
5748
 *
5749
 * DEPRECATED: Internal function, don't use.
5750
 *
5751
 * parse an Notation attribute type.
5752
 *
5753
 * Note: the leading 'NOTATION' S part has already being parsed...
5754
 *
5755
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5756
 *
5757
 * [ VC: Notation Attributes ]
5758
 * Values of this type must match one of the notation names included
5759
 * in the declaration; all notation names in the declaration must be declared.
5760
 *
5761
 * Returns: the notation attribute tree built while parsing
5762
 */
5763
5764
xmlEnumerationPtr
5765
2.66k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5766
2.66k
    const xmlChar *name;
5767
2.66k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5768
5769
2.66k
    if (RAW != '(') {
5770
191
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5771
191
  return(NULL);
5772
191
    }
5773
2.47k
    SHRINK;
5774
3.04k
    do {
5775
3.04k
        NEXT;
5776
3.04k
  SKIP_BLANKS;
5777
3.04k
        name = xmlParseName(ctxt);
5778
3.04k
  if (name == NULL) {
5779
266
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5780
266
         "Name expected in NOTATION declaration\n");
5781
266
            xmlFreeEnumeration(ret);
5782
266
      return(NULL);
5783
266
  }
5784
2.77k
  tmp = ret;
5785
3.84k
  while (tmp != NULL) {
5786
1.22k
      if (xmlStrEqual(name, tmp->name)) {
5787
150
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5788
150
    "standalone: attribute notation value token %s duplicated\n",
5789
150
         name, NULL);
5790
150
    if (!xmlDictOwns(ctxt->dict, name))
5791
0
        xmlFree((xmlChar *) name);
5792
150
    break;
5793
150
      }
5794
1.07k
      tmp = tmp->next;
5795
1.07k
  }
5796
2.77k
  if (tmp == NULL) {
5797
2.62k
      cur = xmlCreateEnumeration(name);
5798
2.62k
      if (cur == NULL) {
5799
0
                xmlFreeEnumeration(ret);
5800
0
                return(NULL);
5801
0
            }
5802
2.62k
      if (last == NULL) ret = last = cur;
5803
362
      else {
5804
362
    last->next = cur;
5805
362
    last = cur;
5806
362
      }
5807
2.62k
  }
5808
2.77k
  SKIP_BLANKS;
5809
2.77k
    } while (RAW == '|');
5810
2.20k
    if (RAW != ')') {
5811
339
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5812
339
        xmlFreeEnumeration(ret);
5813
339
  return(NULL);
5814
339
    }
5815
1.86k
    NEXT;
5816
1.86k
    return(ret);
5817
2.20k
}
5818
5819
/**
5820
 * xmlParseEnumerationType:
5821
 * @ctxt:  an XML parser context
5822
 *
5823
 * DEPRECATED: Internal function, don't use.
5824
 *
5825
 * parse an Enumeration attribute type.
5826
 *
5827
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5828
 *
5829
 * [ VC: Enumeration ]
5830
 * Values of this type must match one of the Nmtoken tokens in
5831
 * the declaration
5832
 *
5833
 * Returns: the enumeration attribute tree built while parsing
5834
 */
5835
5836
xmlEnumerationPtr
5837
112k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5838
112k
    xmlChar *name;
5839
112k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5840
5841
112k
    if (RAW != '(') {
5842
8.08k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5843
8.08k
  return(NULL);
5844
8.08k
    }
5845
104k
    SHRINK;
5846
307k
    do {
5847
307k
        NEXT;
5848
307k
  SKIP_BLANKS;
5849
307k
        name = xmlParseNmtoken(ctxt);
5850
307k
  if (name == NULL) {
5851
458
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5852
458
      return(ret);
5853
458
  }
5854
306k
  tmp = ret;
5855
827k
  while (tmp != NULL) {
5856
521k
      if (xmlStrEqual(name, tmp->name)) {
5857
560
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5858
560
    "standalone: attribute enumeration value token %s duplicated\n",
5859
560
         name, NULL);
5860
560
    if (!xmlDictOwns(ctxt->dict, name))
5861
560
        xmlFree(name);
5862
560
    break;
5863
560
      }
5864
521k
      tmp = tmp->next;
5865
521k
  }
5866
306k
  if (tmp == NULL) {
5867
306k
      cur = xmlCreateEnumeration(name);
5868
306k
      if (!xmlDictOwns(ctxt->dict, name))
5869
306k
    xmlFree(name);
5870
306k
      if (cur == NULL) {
5871
0
                xmlFreeEnumeration(ret);
5872
0
                return(NULL);
5873
0
            }
5874
306k
      if (last == NULL) ret = last = cur;
5875
202k
      else {
5876
202k
    last->next = cur;
5877
202k
    last = cur;
5878
202k
      }
5879
306k
  }
5880
306k
  SKIP_BLANKS;
5881
306k
    } while (RAW == '|');
5882
103k
    if (RAW != ')') {
5883
1.81k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5884
1.81k
  return(ret);
5885
1.81k
    }
5886
101k
    NEXT;
5887
101k
    return(ret);
5888
103k
}
5889
5890
/**
5891
 * xmlParseEnumeratedType:
5892
 * @ctxt:  an XML parser context
5893
 * @tree:  the enumeration tree built while parsing
5894
 *
5895
 * DEPRECATED: Internal function, don't use.
5896
 *
5897
 * parse an Enumerated attribute type.
5898
 *
5899
 * [57] EnumeratedType ::= NotationType | Enumeration
5900
 *
5901
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5902
 *
5903
 *
5904
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5905
 */
5906
5907
int
5908
115k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5909
115k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5910
2.93k
  SKIP(8);
5911
2.93k
  if (SKIP_BLANKS == 0) {
5912
269
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5913
269
         "Space required after 'NOTATION'\n");
5914
269
      return(0);
5915
269
  }
5916
2.66k
  *tree = xmlParseNotationType(ctxt);
5917
2.66k
  if (*tree == NULL) return(0);
5918
1.86k
  return(XML_ATTRIBUTE_NOTATION);
5919
2.66k
    }
5920
112k
    *tree = xmlParseEnumerationType(ctxt);
5921
112k
    if (*tree == NULL) return(0);
5922
103k
    return(XML_ATTRIBUTE_ENUMERATION);
5923
112k
}
5924
5925
/**
5926
 * xmlParseAttributeType:
5927
 * @ctxt:  an XML parser context
5928
 * @tree:  the enumeration tree built while parsing
5929
 *
5930
 * DEPRECATED: Internal function, don't use.
5931
 *
5932
 * parse the Attribute list def for an element
5933
 *
5934
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5935
 *
5936
 * [55] StringType ::= 'CDATA'
5937
 *
5938
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5939
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5940
 *
5941
 * Validity constraints for attribute values syntax are checked in
5942
 * xmlValidateAttributeValue()
5943
 *
5944
 * [ VC: ID ]
5945
 * Values of type ID must match the Name production. A name must not
5946
 * appear more than once in an XML document as a value of this type;
5947
 * i.e., ID values must uniquely identify the elements which bear them.
5948
 *
5949
 * [ VC: One ID per Element Type ]
5950
 * No element type may have more than one ID attribute specified.
5951
 *
5952
 * [ VC: ID Attribute Default ]
5953
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5954
 *
5955
 * [ VC: IDREF ]
5956
 * Values of type IDREF must match the Name production, and values
5957
 * of type IDREFS must match Names; each IDREF Name must match the value
5958
 * of an ID attribute on some element in the XML document; i.e. IDREF
5959
 * values must match the value of some ID attribute.
5960
 *
5961
 * [ VC: Entity Name ]
5962
 * Values of type ENTITY must match the Name production, values
5963
 * of type ENTITIES must match Names; each Entity Name must match the
5964
 * name of an unparsed entity declared in the DTD.
5965
 *
5966
 * [ VC: Name Token ]
5967
 * Values of type NMTOKEN must match the Nmtoken production; values
5968
 * of type NMTOKENS must match Nmtokens.
5969
 *
5970
 * Returns the attribute type
5971
 */
5972
int
5973
1.74M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5974
1.74M
    SHRINK;
5975
1.74M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5976
809k
  SKIP(5);
5977
809k
  return(XML_ATTRIBUTE_CDATA);
5978
931k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5979
9.62k
  SKIP(6);
5980
9.62k
  return(XML_ATTRIBUTE_IDREFS);
5981
921k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5982
45.7k
  SKIP(5);
5983
45.7k
  return(XML_ATTRIBUTE_IDREF);
5984
876k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5985
362k
        SKIP(2);
5986
362k
  return(XML_ATTRIBUTE_ID);
5987
513k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5988
1.24k
  SKIP(6);
5989
1.24k
  return(XML_ATTRIBUTE_ENTITY);
5990
512k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5991
3.20k
  SKIP(8);
5992
3.20k
  return(XML_ATTRIBUTE_ENTITIES);
5993
508k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5994
68.3k
  SKIP(8);
5995
68.3k
  return(XML_ATTRIBUTE_NMTOKENS);
5996
440k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5997
325k
  SKIP(7);
5998
325k
  return(XML_ATTRIBUTE_NMTOKEN);
5999
325k
     }
6000
115k
     return(xmlParseEnumeratedType(ctxt, tree));
6001
1.74M
}
6002
6003
/**
6004
 * xmlParseAttributeListDecl:
6005
 * @ctxt:  an XML parser context
6006
 *
6007
 * DEPRECATED: Internal function, don't use.
6008
 *
6009
 * : parse the Attribute list def for an element
6010
 *
6011
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6012
 *
6013
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6014
 *
6015
 */
6016
void
6017
936k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6018
936k
    const xmlChar *elemName;
6019
936k
    const xmlChar *attrName;
6020
936k
    xmlEnumerationPtr tree;
6021
6022
936k
    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6023
930k
  int inputid = ctxt->input->id;
6024
6025
930k
  SKIP(9);
6026
930k
  if (SKIP_BLANKS == 0) {
6027
1.66k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6028
1.66k
                     "Space required after '<!ATTLIST'\n");
6029
1.66k
  }
6030
930k
        elemName = xmlParseName(ctxt);
6031
930k
  if (elemName == NULL) {
6032
1.79k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6033
1.79k
         "ATTLIST: no name for Element\n");
6034
1.79k
      return;
6035
1.79k
  }
6036
928k
  SKIP_BLANKS;
6037
928k
  GROW;
6038
2.64M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6039
1.75M
      int type;
6040
1.75M
      int def;
6041
1.75M
      xmlChar *defaultValue = NULL;
6042
6043
1.75M
      GROW;
6044
1.75M
            tree = NULL;
6045
1.75M
      attrName = xmlParseName(ctxt);
6046
1.75M
      if (attrName == NULL) {
6047
10.6k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6048
10.6k
             "ATTLIST: no name for Attribute\n");
6049
10.6k
    break;
6050
10.6k
      }
6051
1.74M
      GROW;
6052
1.74M
      if (SKIP_BLANKS == 0) {
6053
5.76k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6054
5.76k
            "Space required after the attribute name\n");
6055
5.76k
    break;
6056
5.76k
      }
6057
6058
1.74M
      type = xmlParseAttributeType(ctxt, &tree);
6059
1.74M
      if (type <= 0) {
6060
9.41k
          break;
6061
9.41k
      }
6062
6063
1.73M
      GROW;
6064
1.73M
      if (SKIP_BLANKS == 0) {
6065
5.15k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6066
5.15k
             "Space required after the attribute type\n");
6067
5.15k
          if (tree != NULL)
6068
2.36k
        xmlFreeEnumeration(tree);
6069
5.15k
    break;
6070
5.15k
      }
6071
6072
1.72M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6073
1.72M
      if (def <= 0) {
6074
0
                if (defaultValue != NULL)
6075
0
        xmlFree(defaultValue);
6076
0
          if (tree != NULL)
6077
0
        xmlFreeEnumeration(tree);
6078
0
          break;
6079
0
      }
6080
1.72M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6081
70.5k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6082
6083
1.72M
      GROW;
6084
1.72M
            if (RAW != '>') {
6085
1.26M
    if (SKIP_BLANKS == 0) {
6086
11.4k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6087
11.4k
      "Space required after the attribute default value\n");
6088
11.4k
        if (defaultValue != NULL)
6089
3.80k
      xmlFree(defaultValue);
6090
11.4k
        if (tree != NULL)
6091
1.30k
      xmlFreeEnumeration(tree);
6092
11.4k
        break;
6093
11.4k
    }
6094
1.26M
      }
6095
1.71M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6096
1.71M
    (ctxt->sax->attributeDecl != NULL))
6097
1.35M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6098
1.35M
                          type, def, defaultValue, tree);
6099
360k
      else if (tree != NULL)
6100
12.6k
    xmlFreeEnumeration(tree);
6101
6102
1.71M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6103
1.71M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6104
1.71M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6105
122k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6106
122k
      }
6107
1.71M
      if (ctxt->sax2) {
6108
1.09M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6109
1.09M
      }
6110
1.71M
      if (defaultValue != NULL)
6111
189k
          xmlFree(defaultValue);
6112
1.71M
      GROW;
6113
1.71M
  }
6114
928k
  if (RAW == '>') {
6115
888k
      if (inputid != ctxt->input->id) {
6116
4.71k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6117
4.71k
                               "Attribute list declaration doesn't start and"
6118
4.71k
                               " stop in the same entity\n");
6119
4.71k
      }
6120
888k
      NEXT;
6121
888k
  }
6122
928k
    }
6123
936k
}
6124
6125
/**
6126
 * xmlParseElementMixedContentDecl:
6127
 * @ctxt:  an XML parser context
6128
 * @inputchk:  the input used for the current entity, needed for boundary checks
6129
 *
6130
 * DEPRECATED: Internal function, don't use.
6131
 *
6132
 * parse the declaration for a Mixed Element content
6133
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6134
 *
6135
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6136
 *                '(' S? '#PCDATA' S? ')'
6137
 *
6138
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6139
 *
6140
 * [ VC: No Duplicate Types ]
6141
 * The same name must not appear more than once in a single
6142
 * mixed-content declaration.
6143
 *
6144
 * returns: the list of the xmlElementContentPtr describing the element choices
6145
 */
6146
xmlElementContentPtr
6147
249k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6148
249k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6149
249k
    const xmlChar *elem = NULL;
6150
6151
249k
    GROW;
6152
249k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6153
249k
  SKIP(7);
6154
249k
  SKIP_BLANKS;
6155
249k
  SHRINK;
6156
249k
  if (RAW == ')') {
6157
213k
      if (ctxt->input->id != inputchk) {
6158
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159
0
                               "Element content declaration doesn't start and"
6160
0
                               " stop in the same entity\n");
6161
0
      }
6162
213k
      NEXT;
6163
213k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6164
213k
      if (ret == NULL)
6165
0
          return(NULL);
6166
213k
      if (RAW == '*') {
6167
193
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168
193
    NEXT;
6169
193
      }
6170
213k
      return(ret);
6171
213k
  }
6172
35.4k
  if ((RAW == '(') || (RAW == '|')) {
6173
34.5k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6174
34.5k
      if (ret == NULL) return(NULL);
6175
34.5k
  }
6176
266k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6177
231k
      NEXT;
6178
231k
      if (elem == NULL) {
6179
34.3k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6180
34.3k
    if (ret == NULL) {
6181
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6182
0
                    return(NULL);
6183
0
                }
6184
34.3k
    ret->c1 = cur;
6185
34.3k
    if (cur != NULL)
6186
34.3k
        cur->parent = ret;
6187
34.3k
    cur = ret;
6188
196k
      } else {
6189
196k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6190
196k
    if (n == NULL) {
6191
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6192
0
                    return(NULL);
6193
0
                }
6194
196k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6195
196k
    if (n->c1 != NULL)
6196
196k
        n->c1->parent = n;
6197
196k
          cur->c2 = n;
6198
196k
    if (n != NULL)
6199
196k
        n->parent = cur;
6200
196k
    cur = n;
6201
196k
      }
6202
231k
      SKIP_BLANKS;
6203
231k
      elem = xmlParseName(ctxt);
6204
231k
      if (elem == NULL) {
6205
458
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6206
458
      "xmlParseElementMixedContentDecl : Name expected\n");
6207
458
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6208
458
    return(NULL);
6209
458
      }
6210
230k
      SKIP_BLANKS;
6211
230k
      GROW;
6212
230k
  }
6213
34.9k
  if ((RAW == ')') && (NXT(1) == '*')) {
6214
32.7k
      if (elem != NULL) {
6215
32.7k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6216
32.7k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6217
32.7k
    if (cur->c2 != NULL)
6218
32.7k
        cur->c2->parent = cur;
6219
32.7k
            }
6220
32.7k
            if (ret != NULL)
6221
32.7k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6222
32.7k
      if (ctxt->input->id != inputchk) {
6223
3
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6224
3
                               "Element content declaration doesn't start and"
6225
3
                               " stop in the same entity\n");
6226
3
      }
6227
32.7k
      SKIP(2);
6228
32.7k
  } else {
6229
2.28k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6230
2.28k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6231
2.28k
      return(NULL);
6232
2.28k
  }
6233
6234
34.9k
    } else {
6235
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6236
0
    }
6237
32.7k
    return(ret);
6238
249k
}
6239
6240
/**
6241
 * xmlParseElementChildrenContentDeclPriv:
6242
 * @ctxt:  an XML parser context
6243
 * @inputchk:  the input used for the current entity, needed for boundary checks
6244
 * @depth: the level of recursion
6245
 *
6246
 * parse the declaration for a Mixed Element content
6247
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6248
 *
6249
 *
6250
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6251
 *
6252
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6253
 *
6254
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6255
 *
6256
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6257
 *
6258
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6259
 * TODO Parameter-entity replacement text must be properly nested
6260
 *  with parenthesized groups. That is to say, if either of the
6261
 *  opening or closing parentheses in a choice, seq, or Mixed
6262
 *  construct is contained in the replacement text for a parameter
6263
 *  entity, both must be contained in the same replacement text. For
6264
 *  interoperability, if a parameter-entity reference appears in a
6265
 *  choice, seq, or Mixed construct, its replacement text should not
6266
 *  be empty, and neither the first nor last non-blank character of
6267
 *  the replacement text should be a connector (| or ,).
6268
 *
6269
 * Returns the tree of xmlElementContentPtr describing the element
6270
 *          hierarchy.
6271
 */
6272
static xmlElementContentPtr
6273
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6274
660k
                                       int depth) {
6275
660k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6276
660k
    const xmlChar *elem;
6277
660k
    xmlChar type = 0;
6278
6279
660k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6280
660k
        (depth >  2048)) {
6281
28
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6282
28
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6283
28
                          depth);
6284
28
  return(NULL);
6285
28
    }
6286
660k
    SKIP_BLANKS;
6287
660k
    GROW;
6288
660k
    if (RAW == '(') {
6289
71.4k
  int inputid = ctxt->input->id;
6290
6291
        /* Recurse on first child */
6292
71.4k
  NEXT;
6293
71.4k
  SKIP_BLANKS;
6294
71.4k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6295
71.4k
                                                           depth + 1);
6296
71.4k
        if (cur == NULL)
6297
26.4k
            return(NULL);
6298
45.0k
  SKIP_BLANKS;
6299
45.0k
  GROW;
6300
588k
    } else {
6301
588k
  elem = xmlParseName(ctxt);
6302
588k
  if (elem == NULL) {
6303
5.02k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6304
5.02k
      return(NULL);
6305
5.02k
  }
6306
583k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6307
583k
  if (cur == NULL) {
6308
0
      xmlErrMemory(ctxt, NULL);
6309
0
      return(NULL);
6310
0
  }
6311
583k
  GROW;
6312
583k
  if (RAW == '?') {
6313
28.4k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6314
28.4k
      NEXT;
6315
555k
  } else if (RAW == '*') {
6316
63.6k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6317
63.6k
      NEXT;
6318
491k
  } else if (RAW == '+') {
6319
34.6k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6320
34.6k
      NEXT;
6321
457k
  } else {
6322
457k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6323
457k
  }
6324
583k
  GROW;
6325
583k
    }
6326
628k
    SKIP_BLANKS;
6327
628k
    SHRINK;
6328
1.88M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6329
        /*
6330
   * Each loop we parse one separator and one element.
6331
   */
6332
1.27M
        if (RAW == ',') {
6333
433k
      if (type == 0) type = CUR;
6334
6335
      /*
6336
       * Detect "Name | Name , Name" error
6337
       */
6338
241k
      else if (type != CUR) {
6339
221
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6340
221
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6341
221
                      type);
6342
221
    if ((last != NULL) && (last != ret))
6343
221
        xmlFreeDocElementContent(ctxt->myDoc, last);
6344
221
    if (ret != NULL)
6345
221
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6346
221
    return(NULL);
6347
221
      }
6348
433k
      NEXT;
6349
6350
433k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6351
433k
      if (op == NULL) {
6352
0
    if ((last != NULL) && (last != ret))
6353
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6354
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6355
0
    return(NULL);
6356
0
      }
6357
433k
      if (last == NULL) {
6358
192k
    op->c1 = ret;
6359
192k
    if (ret != NULL)
6360
192k
        ret->parent = op;
6361
192k
    ret = cur = op;
6362
241k
      } else {
6363
241k
          cur->c2 = op;
6364
241k
    if (op != NULL)
6365
241k
        op->parent = cur;
6366
241k
    op->c1 = last;
6367
241k
    if (last != NULL)
6368
241k
        last->parent = op;
6369
241k
    cur =op;
6370
241k
    last = NULL;
6371
241k
      }
6372
841k
  } else if (RAW == '|') {
6373
823k
      if (type == 0) type = CUR;
6374
6375
      /*
6376
       * Detect "Name , Name | Name" error
6377
       */
6378
666k
      else if (type != CUR) {
6379
292
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6380
292
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6381
292
          type);
6382
292
    if ((last != NULL) && (last != ret))
6383
292
        xmlFreeDocElementContent(ctxt->myDoc, last);
6384
292
    if (ret != NULL)
6385
292
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6386
292
    return(NULL);
6387
292
      }
6388
823k
      NEXT;
6389
6390
823k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6391
823k
      if (op == NULL) {
6392
0
    if ((last != NULL) && (last != ret))
6393
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6394
0
    if (ret != NULL)
6395
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6396
0
    return(NULL);
6397
0
      }
6398
823k
      if (last == NULL) {
6399
157k
    op->c1 = ret;
6400
157k
    if (ret != NULL)
6401
157k
        ret->parent = op;
6402
157k
    ret = cur = op;
6403
665k
      } else {
6404
665k
          cur->c2 = op;
6405
665k
    if (op != NULL)
6406
665k
        op->parent = cur;
6407
665k
    op->c1 = last;
6408
665k
    if (last != NULL)
6409
665k
        last->parent = op;
6410
665k
    cur =op;
6411
665k
    last = NULL;
6412
665k
      }
6413
823k
  } else {
6414
17.9k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6415
17.9k
      if ((last != NULL) && (last != ret))
6416
8.51k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6417
17.9k
      if (ret != NULL)
6418
17.9k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
17.9k
      return(NULL);
6420
17.9k
  }
6421
1.25M
  GROW;
6422
1.25M
  SKIP_BLANKS;
6423
1.25M
  GROW;
6424
1.25M
  if (RAW == '(') {
6425
62.5k
      int inputid = ctxt->input->id;
6426
      /* Recurse on second child */
6427
62.5k
      NEXT;
6428
62.5k
      SKIP_BLANKS;
6429
62.5k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6430
62.5k
                                                          depth + 1);
6431
62.5k
            if (last == NULL) {
6432
1.36k
    if (ret != NULL)
6433
1.36k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6434
1.36k
    return(NULL);
6435
1.36k
            }
6436
61.1k
      SKIP_BLANKS;
6437
1.19M
  } else {
6438
1.19M
      elem = xmlParseName(ctxt);
6439
1.19M
      if (elem == NULL) {
6440
1.52k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6441
1.52k
    if (ret != NULL)
6442
1.52k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
1.52k
    return(NULL);
6444
1.52k
      }
6445
1.19M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6446
1.19M
      if (last == NULL) {
6447
0
    if (ret != NULL)
6448
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6449
0
    return(NULL);
6450
0
      }
6451
1.19M
      if (RAW == '?') {
6452
178k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6453
178k
    NEXT;
6454
1.01M
      } else if (RAW == '*') {
6455
99.2k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6456
99.2k
    NEXT;
6457
915k
      } else if (RAW == '+') {
6458
14.5k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6459
14.5k
    NEXT;
6460
900k
      } else {
6461
900k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6462
900k
      }
6463
1.19M
  }
6464
1.25M
  SKIP_BLANKS;
6465
1.25M
  GROW;
6466
1.25M
    }
6467
607k
    if ((cur != NULL) && (last != NULL)) {
6468
337k
        cur->c2 = last;
6469
337k
  if (last != NULL)
6470
337k
      last->parent = cur;
6471
337k
    }
6472
607k
    if (ctxt->input->id != inputchk) {
6473
128
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6474
128
                       "Element content declaration doesn't start and stop in"
6475
128
                       " the same entity\n");
6476
128
    }
6477
607k
    NEXT;
6478
607k
    if (RAW == '?') {
6479
6.19k
  if (ret != NULL) {
6480
6.19k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6481
6.19k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6482
33
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6483
6.16k
      else
6484
6.16k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6485
6.19k
  }
6486
6.19k
  NEXT;
6487
601k
    } else if (RAW == '*') {
6488
299k
  if (ret != NULL) {
6489
299k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6490
299k
      cur = ret;
6491
      /*
6492
       * Some normalization:
6493
       * (a | b* | c?)* == (a | b | c)*
6494
       */
6495
1.01M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6496
712k
    if ((cur->c1 != NULL) &&
6497
712k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6498
712k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6499
32.4k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6500
712k
    if ((cur->c2 != NULL) &&
6501
712k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6502
712k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6503
5.28k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6504
712k
    cur = cur->c2;
6505
712k
      }
6506
299k
  }
6507
299k
  NEXT;
6508
301k
    } else if (RAW == '+') {
6509
20.0k
  if (ret != NULL) {
6510
20.0k
      int found = 0;
6511
6512
20.0k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6513
20.0k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6514
80
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6515
19.9k
      else
6516
19.9k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6517
      /*
6518
       * Some normalization:
6519
       * (a | b*)+ == (a | b)*
6520
       * (a | b?)+ == (a | b)*
6521
       */
6522
32.4k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6523
12.3k
    if ((cur->c1 != NULL) &&
6524
12.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6525
12.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6526
227
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6527
227
        found = 1;
6528
227
    }
6529
12.3k
    if ((cur->c2 != NULL) &&
6530
12.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6531
12.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6532
193
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6533
193
        found = 1;
6534
193
    }
6535
12.3k
    cur = cur->c2;
6536
12.3k
      }
6537
20.0k
      if (found)
6538
350
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6539
20.0k
  }
6540
20.0k
  NEXT;
6541
20.0k
    }
6542
607k
    return(ret);
6543
628k
}
6544
6545
/**
6546
 * xmlParseElementChildrenContentDecl:
6547
 * @ctxt:  an XML parser context
6548
 * @inputchk:  the input used for the current entity, needed for boundary checks
6549
 *
6550
 * DEPRECATED: Internal function, don't use.
6551
 *
6552
 * parse the declaration for a Mixed Element content
6553
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6554
 *
6555
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6556
 *
6557
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6558
 *
6559
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6560
 *
6561
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6562
 *
6563
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6564
 * TODO Parameter-entity replacement text must be properly nested
6565
 *  with parenthesized groups. That is to say, if either of the
6566
 *  opening or closing parentheses in a choice, seq, or Mixed
6567
 *  construct is contained in the replacement text for a parameter
6568
 *  entity, both must be contained in the same replacement text. For
6569
 *  interoperability, if a parameter-entity reference appears in a
6570
 *  choice, seq, or Mixed construct, its replacement text should not
6571
 *  be empty, and neither the first nor last non-blank character of
6572
 *  the replacement text should be a connector (| or ,).
6573
 *
6574
 * Returns the tree of xmlElementContentPtr describing the element
6575
 *          hierarchy.
6576
 */
6577
xmlElementContentPtr
6578
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6579
    /* stub left for API/ABI compat */
6580
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6581
0
}
6582
6583
/**
6584
 * xmlParseElementContentDecl:
6585
 * @ctxt:  an XML parser context
6586
 * @name:  the name of the element being defined.
6587
 * @result:  the Element Content pointer will be stored here if any
6588
 *
6589
 * DEPRECATED: Internal function, don't use.
6590
 *
6591
 * parse the declaration for an Element content either Mixed or Children,
6592
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6593
 *
6594
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6595
 *
6596
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6597
 */
6598
6599
int
6600
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6601
775k
                           xmlElementContentPtr *result) {
6602
6603
775k
    xmlElementContentPtr tree = NULL;
6604
775k
    int inputid = ctxt->input->id;
6605
775k
    int res;
6606
6607
775k
    *result = NULL;
6608
6609
775k
    if (RAW != '(') {
6610
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6611
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6612
0
  return(-1);
6613
0
    }
6614
775k
    NEXT;
6615
775k
    GROW;
6616
775k
    if (ctxt->instate == XML_PARSER_EOF)
6617
0
        return(-1);
6618
775k
    SKIP_BLANKS;
6619
775k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6620
249k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6621
249k
  res = XML_ELEMENT_TYPE_MIXED;
6622
526k
    } else {
6623
526k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6624
526k
  res = XML_ELEMENT_TYPE_ELEMENT;
6625
526k
    }
6626
775k
    SKIP_BLANKS;
6627
775k
    *result = tree;
6628
775k
    return(res);
6629
775k
}
6630
6631
/**
6632
 * xmlParseElementDecl:
6633
 * @ctxt:  an XML parser context
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse an Element declaration.
6638
 *
6639
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6640
 *
6641
 * [ VC: Unique Element Type Declaration ]
6642
 * No element type may be declared more than once
6643
 *
6644
 * Returns the type of the element, or -1 in case of error
6645
 */
6646
int
6647
1.25M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6648
1.25M
    const xmlChar *name;
6649
1.25M
    int ret = -1;
6650
1.25M
    xmlElementContentPtr content  = NULL;
6651
6652
    /* GROW; done in the caller */
6653
1.25M
    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6654
1.24M
  int inputid = ctxt->input->id;
6655
6656
1.24M
  SKIP(9);
6657
1.24M
  if (SKIP_BLANKS == 0) {
6658
2.47k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6659
2.47k
               "Space required after 'ELEMENT'\n");
6660
2.47k
      return(-1);
6661
2.47k
  }
6662
1.24M
        name = xmlParseName(ctxt);
6663
1.24M
  if (name == NULL) {
6664
1.70k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6665
1.70k
         "xmlParseElementDecl: no name for Element\n");
6666
1.70k
      return(-1);
6667
1.70k
  }
6668
1.24M
  if (SKIP_BLANKS == 0) {
6669
6.73k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6670
6.73k
         "Space required after the element name\n");
6671
6.73k
  }
6672
1.24M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6673
454k
      SKIP(5);
6674
      /*
6675
       * Element must always be empty.
6676
       */
6677
454k
      ret = XML_ELEMENT_TYPE_EMPTY;
6678
790k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6679
790k
             (NXT(2) == 'Y')) {
6680
3.84k
      SKIP(3);
6681
      /*
6682
       * Element is a generic container.
6683
       */
6684
3.84k
      ret = XML_ELEMENT_TYPE_ANY;
6685
786k
  } else if (RAW == '(') {
6686
775k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6687
775k
  } else {
6688
      /*
6689
       * [ WFC: PEs in Internal Subset ] error handling.
6690
       */
6691
10.8k
      if ((RAW == '%') && (ctxt->external == 0) &&
6692
10.8k
          (ctxt->inputNr == 1)) {
6693
333
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6694
333
    "PEReference: forbidden within markup decl in internal subset\n");
6695
10.4k
      } else {
6696
10.4k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6697
10.4k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6698
10.4k
            }
6699
10.8k
      return(-1);
6700
10.8k
  }
6701
6702
1.23M
  SKIP_BLANKS;
6703
6704
1.23M
  if (RAW != '>') {
6705
27.1k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6706
27.1k
      if (content != NULL) {
6707
2.00k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6708
2.00k
      }
6709
1.20M
  } else {
6710
1.20M
      if (inputid != ctxt->input->id) {
6711
210
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
210
                               "Element declaration doesn't start and stop in"
6713
210
                               " the same entity\n");
6714
210
      }
6715
6716
1.20M
      NEXT;
6717
1.20M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6718
1.20M
    (ctxt->sax->elementDecl != NULL)) {
6719
1.03M
    if (content != NULL)
6720
655k
        content->parent = NULL;
6721
1.03M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6722
1.03M
                           content);
6723
1.03M
    if ((content != NULL) && (content->parent == NULL)) {
6724
        /*
6725
         * this is a trick: if xmlAddElementDecl is called,
6726
         * instead of copying the full tree it is plugged directly
6727
         * if called from the parser. Avoid duplicating the
6728
         * interfaces or change the API/ABI
6729
         */
6730
8.86k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6731
8.86k
    }
6732
1.03M
      } else if (content != NULL) {
6733
90.4k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6734
90.4k
      }
6735
1.20M
  }
6736
1.23M
    }
6737
1.24M
    return(ret);
6738
1.25M
}
6739
6740
/**
6741
 * xmlParseConditionalSections
6742
 * @ctxt:  an XML parser context
6743
 *
6744
 * [61] conditionalSect ::= includeSect | ignoreSect
6745
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6746
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6747
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6748
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6749
 */
6750
6751
static void
6752
15.2k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6753
15.2k
    int *inputIds = NULL;
6754
15.2k
    size_t inputIdsSize = 0;
6755
15.2k
    size_t depth = 0;
6756
6757
77.6k
    while (ctxt->instate != XML_PARSER_EOF) {
6758
77.1k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6759
45.5k
            int id = ctxt->input->id;
6760
6761
45.5k
            SKIP(3);
6762
45.5k
            SKIP_BLANKS;
6763
6764
45.5k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6765
37.5k
                SKIP(7);
6766
37.5k
                SKIP_BLANKS;
6767
37.5k
                if (RAW != '[') {
6768
345
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6769
345
                    xmlHaltParser(ctxt);
6770
345
                    goto error;
6771
345
                }
6772
37.2k
                if (ctxt->input->id != id) {
6773
6
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6774
6
                                   "All markup of the conditional section is"
6775
6
                                   " not in the same entity\n");
6776
6
                }
6777
37.2k
                NEXT;
6778
6779
37.2k
                if (inputIdsSize <= depth) {
6780
11.8k
                    int *tmp;
6781
6782
11.8k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6783
11.8k
                    tmp = (int *) xmlRealloc(inputIds,
6784
11.8k
                            inputIdsSize * sizeof(int));
6785
11.8k
                    if (tmp == NULL) {
6786
0
                        xmlErrMemory(ctxt, NULL);
6787
0
                        goto error;
6788
0
                    }
6789
11.8k
                    inputIds = tmp;
6790
11.8k
                }
6791
37.2k
                inputIds[depth] = id;
6792
37.2k
                depth++;
6793
37.2k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6794
4.82k
                int state;
6795
4.82k
                xmlParserInputState instate;
6796
4.82k
                size_t ignoreDepth = 0;
6797
6798
4.82k
                SKIP(6);
6799
4.82k
                SKIP_BLANKS;
6800
4.82k
                if (RAW != '[') {
6801
200
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6802
200
                    xmlHaltParser(ctxt);
6803
200
                    goto error;
6804
200
                }
6805
4.62k
                if (ctxt->input->id != id) {
6806
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6807
0
                                   "All markup of the conditional section is"
6808
0
                                   " not in the same entity\n");
6809
0
                }
6810
4.62k
                NEXT;
6811
6812
                /*
6813
                 * Parse up to the end of the conditional section but disable
6814
                 * SAX event generating DTD building in the meantime
6815
                 */
6816
4.62k
                state = ctxt->disableSAX;
6817
4.62k
                instate = ctxt->instate;
6818
4.62k
                if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6819
4.62k
                ctxt->instate = XML_PARSER_IGNORE;
6820
6821
832k
                while (RAW != 0) {
6822
830k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6823
8.54k
                        SKIP(3);
6824
8.54k
                        ignoreDepth++;
6825
                        /* Check for integer overflow */
6826
8.54k
                        if (ignoreDepth == 0) {
6827
0
                            xmlErrMemory(ctxt, NULL);
6828
0
                            goto error;
6829
0
                        }
6830
821k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6831
821k
                               (NXT(2) == '>')) {
6832
6.28k
                        if (ignoreDepth == 0)
6833
1.82k
                            break;
6834
4.46k
                        SKIP(3);
6835
4.46k
                        ignoreDepth--;
6836
815k
                    } else {
6837
815k
                        NEXT;
6838
815k
                    }
6839
830k
                }
6840
6841
4.62k
                ctxt->disableSAX = state;
6842
4.62k
                ctxt->instate = instate;
6843
6844
4.62k
    if (RAW == 0) {
6845
2.79k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6846
2.79k
                    goto error;
6847
2.79k
    }
6848
1.82k
                if (ctxt->input->id != id) {
6849
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6850
0
                                   "All markup of the conditional section is"
6851
0
                                   " not in the same entity\n");
6852
0
                }
6853
1.82k
                SKIP(3);
6854
3.10k
            } else {
6855
3.10k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6856
3.10k
                xmlHaltParser(ctxt);
6857
3.10k
                goto error;
6858
3.10k
            }
6859
45.5k
        } else if ((depth > 0) &&
6860
31.5k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6861
15.9k
            depth--;
6862
15.9k
            if (ctxt->input->id != inputIds[depth]) {
6863
303
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6864
303
                               "All markup of the conditional section is not"
6865
303
                               " in the same entity\n");
6866
303
            }
6867
15.9k
            SKIP(3);
6868
15.9k
        } else {
6869
15.5k
            int id = ctxt->input->id;
6870
15.5k
            unsigned long cons = CUR_CONSUMED;
6871
6872
15.5k
            xmlParseMarkupDecl(ctxt);
6873
6874
15.5k
            if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6875
4.03k
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6876
4.03k
                xmlHaltParser(ctxt);
6877
4.03k
                goto error;
6878
4.03k
            }
6879
15.5k
        }
6880
6881
66.6k
        if (depth == 0)
6882
4.20k
            break;
6883
6884
62.4k
        SKIP_BLANKS;
6885
62.4k
        GROW;
6886
62.4k
    }
6887
6888
15.2k
error:
6889
15.2k
    xmlFree(inputIds);
6890
15.2k
}
6891
6892
/**
6893
 * xmlParseMarkupDecl:
6894
 * @ctxt:  an XML parser context
6895
 *
6896
 * DEPRECATED: Internal function, don't use.
6897
 *
6898
 * parse Markup declarations
6899
 *
6900
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6901
 *                     NotationDecl | PI | Comment
6902
 *
6903
 * [ VC: Proper Declaration/PE Nesting ]
6904
 * Parameter-entity replacement text must be properly nested with
6905
 * markup declarations. That is to say, if either the first character
6906
 * or the last character of a markup declaration (markupdecl above) is
6907
 * contained in the replacement text for a parameter-entity reference,
6908
 * both must be contained in the same replacement text.
6909
 *
6910
 * [ WFC: PEs in Internal Subset ]
6911
 * In the internal DTD subset, parameter-entity references can occur
6912
 * only where markup declarations can occur, not within markup declarations.
6913
 * (This does not apply to references that occur in external parameter
6914
 * entities or to the external subset.)
6915
 */
6916
void
6917
4.37M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6918
4.37M
    GROW;
6919
4.37M
    if (CUR == '<') {
6920
4.10M
        if (NXT(1) == '!') {
6921
4.08M
      switch (NXT(2)) {
6922
2.53M
          case 'E':
6923
2.53M
        if (NXT(3) == 'L')
6924
1.25M
      xmlParseElementDecl(ctxt);
6925
1.27M
        else if (NXT(3) == 'N')
6926
1.27M
      xmlParseEntityDecl(ctxt);
6927
2.53M
        break;
6928
936k
          case 'A':
6929
936k
        xmlParseAttributeListDecl(ctxt);
6930
936k
        break;
6931
9.09k
          case 'N':
6932
9.09k
        xmlParseNotationDecl(ctxt);
6933
9.09k
        break;
6934
599k
          case '-':
6935
599k
        xmlParseComment(ctxt);
6936
599k
        break;
6937
5.29k
    default:
6938
        /* there is an error but it will be detected later */
6939
5.29k
        break;
6940
4.08M
      }
6941
4.08M
  } else if (NXT(1) == '?') {
6942
3.60k
      xmlParsePI(ctxt);
6943
3.60k
  }
6944
4.10M
    }
6945
6946
    /*
6947
     * detect requirement to exit there and act accordingly
6948
     * and avoid having instate overridden later on
6949
     */
6950
4.37M
    if (ctxt->instate == XML_PARSER_EOF)
6951
18.9k
        return;
6952
6953
4.35M
    ctxt->instate = XML_PARSER_DTD;
6954
4.35M
}
6955
6956
/**
6957
 * xmlParseTextDecl:
6958
 * @ctxt:  an XML parser context
6959
 *
6960
 * DEPRECATED: Internal function, don't use.
6961
 *
6962
 * parse an XML declaration header for external entities
6963
 *
6964
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6965
 */
6966
6967
void
6968
15.4k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6969
15.4k
    xmlChar *version;
6970
15.4k
    const xmlChar *encoding;
6971
15.4k
    int oldstate;
6972
6973
    /*
6974
     * We know that '<?xml' is here.
6975
     */
6976
15.4k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6977
15.2k
  SKIP(5);
6978
15.2k
    } else {
6979
215
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6980
215
  return;
6981
215
    }
6982
6983
    /* Avoid expansion of parameter entities when skipping blanks. */
6984
15.2k
    oldstate = ctxt->instate;
6985
15.2k
    ctxt->instate = XML_PARSER_START;
6986
6987
15.2k
    if (SKIP_BLANKS == 0) {
6988
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6989
0
           "Space needed after '<?xml'\n");
6990
0
    }
6991
6992
    /*
6993
     * We may have the VersionInfo here.
6994
     */
6995
15.2k
    version = xmlParseVersionInfo(ctxt);
6996
15.2k
    if (version == NULL)
6997
4.00k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6998
11.2k
    else {
6999
11.2k
  if (SKIP_BLANKS == 0) {
7000
814
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7001
814
               "Space needed here\n");
7002
814
  }
7003
11.2k
    }
7004
15.2k
    ctxt->input->version = version;
7005
7006
    /*
7007
     * We must have the encoding declaration
7008
     */
7009
15.2k
    encoding = xmlParseEncodingDecl(ctxt);
7010
15.2k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7011
  /*
7012
   * The XML REC instructs us to stop parsing right here
7013
   */
7014
281
        ctxt->instate = oldstate;
7015
281
        return;
7016
281
    }
7017
14.9k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7018
4.54k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7019
4.54k
           "Missing encoding in text declaration\n");
7020
4.54k
    }
7021
7022
14.9k
    SKIP_BLANKS;
7023
14.9k
    if ((RAW == '?') && (NXT(1) == '>')) {
7024
8.45k
        SKIP(2);
7025
8.45k
    } else if (RAW == '>') {
7026
        /* Deprecated old WD ... */
7027
200
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7028
200
  NEXT;
7029
6.32k
    } else {
7030
6.32k
        int c;
7031
7032
6.32k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7033
219k
        while ((c = CUR) != 0) {
7034
218k
            NEXT;
7035
218k
            if (c == '>')
7036
4.85k
                break;
7037
218k
        }
7038
6.32k
    }
7039
7040
14.9k
    ctxt->instate = oldstate;
7041
14.9k
}
7042
7043
/**
7044
 * xmlParseExternalSubset:
7045
 * @ctxt:  an XML parser context
7046
 * @ExternalID: the external identifier
7047
 * @SystemID: the system identifier (or URL)
7048
 *
7049
 * parse Markup declarations from an external subset
7050
 *
7051
 * [30] extSubset ::= textDecl? extSubsetDecl
7052
 *
7053
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7054
 */
7055
void
7056
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7057
55.1k
                       const xmlChar *SystemID) {
7058
55.1k
    xmlDetectSAX2(ctxt);
7059
55.1k
    GROW;
7060
7061
55.1k
    if ((ctxt->encoding == NULL) &&
7062
55.1k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7063
54.9k
        xmlChar start[4];
7064
54.9k
  xmlCharEncoding enc;
7065
7066
54.9k
  start[0] = RAW;
7067
54.9k
  start[1] = NXT(1);
7068
54.9k
  start[2] = NXT(2);
7069
54.9k
  start[3] = NXT(3);
7070
54.9k
  enc = xmlDetectCharEncoding(start, 4);
7071
54.9k
  if (enc != XML_CHAR_ENCODING_NONE)
7072
11.6k
      xmlSwitchEncoding(ctxt, enc);
7073
54.9k
    }
7074
7075
55.1k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7076
10.6k
  xmlParseTextDecl(ctxt);
7077
10.6k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7078
      /*
7079
       * The XML REC instructs us to stop parsing right here
7080
       */
7081
224
      xmlHaltParser(ctxt);
7082
224
      return;
7083
224
  }
7084
10.6k
    }
7085
54.9k
    if (ctxt->myDoc == NULL) {
7086
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7087
0
  if (ctxt->myDoc == NULL) {
7088
0
      xmlErrMemory(ctxt, "New Doc failed");
7089
0
      return;
7090
0
  }
7091
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7092
0
    }
7093
54.9k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7094
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7095
7096
54.9k
    ctxt->instate = XML_PARSER_DTD;
7097
54.9k
    ctxt->external = 1;
7098
54.9k
    SKIP_BLANKS;
7099
1.06M
    while (((RAW == '<') && (NXT(1) == '?')) ||
7100
1.06M
           ((RAW == '<') && (NXT(1) == '!')) ||
7101
1.06M
     (RAW == '%')) {
7102
1.02M
  int id = ctxt->input->id;
7103
1.02M
  unsigned long cons = CUR_CONSUMED;
7104
7105
1.02M
  GROW;
7106
1.02M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7107
15.2k
      xmlParseConditionalSections(ctxt);
7108
15.2k
  } else
7109
1.00M
      xmlParseMarkupDecl(ctxt);
7110
1.02M
        SKIP_BLANKS;
7111
7112
1.02M
  if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7113
8.84k
      xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7114
8.84k
      break;
7115
8.84k
  }
7116
1.02M
    }
7117
7118
54.9k
    if (RAW != 0) {
7119
19.9k
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7120
19.9k
    }
7121
7122
54.9k
}
7123
7124
/**
7125
 * xmlParseReference:
7126
 * @ctxt:  an XML parser context
7127
 *
7128
 * DEPRECATED: Internal function, don't use.
7129
 *
7130
 * parse and handle entity references in content, depending on the SAX
7131
 * interface, this may end-up in a call to character() if this is a
7132
 * CharRef, a predefined entity, if there is no reference() callback.
7133
 * or if the parser was asked to switch to that mode.
7134
 *
7135
 * [67] Reference ::= EntityRef | CharRef
7136
 */
7137
void
7138
3.90M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7139
3.90M
    xmlEntityPtr ent;
7140
3.90M
    xmlChar *val;
7141
3.90M
    int was_checked;
7142
3.90M
    xmlNodePtr list = NULL;
7143
3.90M
    xmlParserErrors ret = XML_ERR_OK;
7144
7145
7146
3.90M
    if (RAW != '&')
7147
0
        return;
7148
7149
    /*
7150
     * Simple case of a CharRef
7151
     */
7152
3.90M
    if (NXT(1) == '#') {
7153
177k
  int i = 0;
7154
177k
  xmlChar out[16];
7155
177k
  int hex = NXT(2);
7156
177k
  int value = xmlParseCharRef(ctxt);
7157
7158
177k
  if (value == 0)
7159
17.7k
      return;
7160
160k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7161
      /*
7162
       * So we are using non-UTF-8 buffers
7163
       * Check that the char fit on 8bits, if not
7164
       * generate a CharRef.
7165
       */
7166
64.3k
      if (value <= 0xFF) {
7167
54.2k
    out[0] = value;
7168
54.2k
    out[1] = 0;
7169
54.2k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7170
54.2k
        (!ctxt->disableSAX))
7171
44.6k
        ctxt->sax->characters(ctxt->userData, out, 1);
7172
54.2k
      } else {
7173
10.1k
    if ((hex == 'x') || (hex == 'X'))
7174
47
        snprintf((char *)out, sizeof(out), "#x%X", value);
7175
10.0k
    else
7176
10.0k
        snprintf((char *)out, sizeof(out), "#%d", value);
7177
10.1k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7178
10.1k
        (!ctxt->disableSAX))
7179
9.49k
        ctxt->sax->reference(ctxt->userData, out);
7180
10.1k
      }
7181
95.6k
  } else {
7182
      /*
7183
       * Just encode the value in UTF-8
7184
       */
7185
95.6k
      COPY_BUF(0 ,out, i, value);
7186
95.6k
      out[i] = 0;
7187
95.6k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7188
95.6k
    (!ctxt->disableSAX))
7189
84.6k
    ctxt->sax->characters(ctxt->userData, out, i);
7190
95.6k
  }
7191
160k
  return;
7192
177k
    }
7193
7194
    /*
7195
     * We are seeing an entity reference
7196
     */
7197
3.72M
    ent = xmlParseEntityRef(ctxt);
7198
3.72M
    if (ent == NULL) return;
7199
3.62M
    if (!ctxt->wellFormed)
7200
222k
  return;
7201
3.40M
    was_checked = ent->checked;
7202
7203
    /* special case of predefined entities */
7204
3.40M
    if ((ent->name == NULL) ||
7205
3.40M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7206
44.3k
  val = ent->content;
7207
44.3k
  if (val == NULL) return;
7208
  /*
7209
   * inline the entity.
7210
   */
7211
44.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7212
44.3k
      (!ctxt->disableSAX))
7213
44.3k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7214
44.3k
  return;
7215
44.3k
    }
7216
7217
    /*
7218
     * The first reference to the entity trigger a parsing phase
7219
     * where the ent->children is filled with the result from
7220
     * the parsing.
7221
     * Note: external parsed entities will not be loaded, it is not
7222
     * required for a non-validating parser, unless the parsing option
7223
     * of validating, or substituting entities were given. Doing so is
7224
     * far more secure as the parser will only process data coming from
7225
     * the document entity by default.
7226
     */
7227
3.35M
    if (((ent->checked == 0) ||
7228
3.35M
         ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7229
3.35M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7230
3.29M
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7231
3.29M
  unsigned long oldnbent = ctxt->nbentities, diff;
7232
7233
  /*
7234
   * This is a bit hackish but this seems the best
7235
   * way to make sure both SAX and DOM entity support
7236
   * behaves okay.
7237
   */
7238
3.29M
  void *user_data;
7239
3.29M
  if (ctxt->userData == ctxt)
7240
3.29M
      user_data = NULL;
7241
0
  else
7242
0
      user_data = ctxt->userData;
7243
7244
  /*
7245
   * Check that this entity is well formed
7246
   * 4.3.2: An internal general parsed entity is well-formed
7247
   * if its replacement text matches the production labeled
7248
   * content.
7249
   */
7250
3.29M
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7251
61.8k
      ctxt->depth++;
7252
61.8k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7253
61.8k
                                                user_data, &list);
7254
61.8k
      ctxt->depth--;
7255
7256
3.23M
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7257
3.23M
      ctxt->depth++;
7258
3.23M
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7259
3.23M
                                     user_data, ctxt->depth, ent->URI,
7260
3.23M
             ent->ExternalID, &list);
7261
3.23M
      ctxt->depth--;
7262
3.23M
  } else {
7263
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7264
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7265
0
       "invalid entity type found\n", NULL);
7266
0
  }
7267
7268
  /*
7269
   * Store the number of entities needing parsing for this entity
7270
   * content and do checkings
7271
   */
7272
3.29M
        diff = ctxt->nbentities - oldnbent + 1;
7273
3.29M
        if (diff > INT_MAX / 2)
7274
0
            diff = INT_MAX / 2;
7275
3.29M
        ent->checked = diff * 2;
7276
3.29M
  if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7277
17.2k
      ent->checked |= 1;
7278
3.29M
  if (ret == XML_ERR_ENTITY_LOOP) {
7279
3.21M
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7280
3.21M
            xmlHaltParser(ctxt);
7281
3.21M
      xmlFreeNodeList(list);
7282
3.21M
      return;
7283
3.21M
  }
7284
77.8k
  if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7285
0
      xmlFreeNodeList(list);
7286
0
      return;
7287
0
  }
7288
7289
77.8k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7290
43.1k
      if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7291
43.1k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7292
43.1k
    (ent->children == NULL)) {
7293
42.7k
    ent->children = list;
7294
                /*
7295
                 * Prune it directly in the generated document
7296
                 * except for single text nodes.
7297
                 */
7298
42.7k
                if ((ctxt->replaceEntities == 0) ||
7299
42.7k
                    (ctxt->parseMode == XML_PARSE_READER) ||
7300
42.7k
                    ((list->type == XML_TEXT_NODE) &&
7301
40.4k
                     (list->next == NULL))) {
7302
40.4k
                    ent->owner = 1;
7303
88.8k
                    while (list != NULL) {
7304
48.3k
                        list->parent = (xmlNodePtr) ent;
7305
48.3k
                        if (list->doc != ent->doc)
7306
0
                            xmlSetTreeDoc(list, ent->doc);
7307
48.3k
                        if (list->next == NULL)
7308
40.4k
                            ent->last = list;
7309
48.3k
                        list = list->next;
7310
48.3k
                    }
7311
40.4k
                    list = NULL;
7312
40.4k
                } else {
7313
2.25k
                    ent->owner = 0;
7314
7.81k
                    while (list != NULL) {
7315
5.56k
                        list->parent = (xmlNodePtr) ctxt->node;
7316
5.56k
                        list->doc = ctxt->myDoc;
7317
5.56k
                        if (list->next == NULL)
7318
2.25k
                            ent->last = list;
7319
5.56k
                        list = list->next;
7320
5.56k
                    }
7321
2.25k
                    list = ent->children;
7322
#ifdef LIBXML_LEGACY_ENABLED
7323
                    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7324
                        xmlAddEntityReference(ent, list, NULL);
7325
#endif /* LIBXML_LEGACY_ENABLED */
7326
2.25k
                }
7327
42.7k
      } else {
7328
360
    xmlFreeNodeList(list);
7329
360
    list = NULL;
7330
360
      }
7331
43.1k
  } else if ((ret != XML_ERR_OK) &&
7332
34.7k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7333
14.0k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7334
14.0k
         "Entity '%s' failed to parse\n", ent->name);
7335
14.0k
            if (ent->content != NULL)
7336
3.74k
                ent->content[0] = 0;
7337
14.0k
      xmlParserEntityCheck(ctxt, 0, ent, 0);
7338
20.7k
  } else if (list != NULL) {
7339
0
      xmlFreeNodeList(list);
7340
0
      list = NULL;
7341
0
  }
7342
77.8k
  if (ent->checked == 0)
7343
0
      ent->checked = 2;
7344
7345
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7346
77.8k
        was_checked = 0;
7347
77.8k
    } else if (ent->checked != 1) {
7348
65.1k
  ctxt->nbentities += ent->checked / 2;
7349
65.1k
    }
7350
7351
    /*
7352
     * Now that the entity content has been gathered
7353
     * provide it to the application, this can take different forms based
7354
     * on the parsing modes.
7355
     */
7356
143k
    if (ent->children == NULL) {
7357
  /*
7358
   * Probably running in SAX mode and the callbacks don't
7359
   * build the entity content. So unless we already went
7360
   * though parsing for first checking go though the entity
7361
   * content to generate callbacks associated to the entity
7362
   */
7363
44.3k
  if (was_checked != 0) {
7364
6.83k
      void *user_data;
7365
      /*
7366
       * This is a bit hackish but this seems the best
7367
       * way to make sure both SAX and DOM entity support
7368
       * behaves okay.
7369
       */
7370
6.83k
      if (ctxt->userData == ctxt)
7371
6.83k
    user_data = NULL;
7372
0
      else
7373
0
    user_data = ctxt->userData;
7374
7375
6.83k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7376
198
    ctxt->depth++;
7377
198
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7378
198
           ent->content, user_data, NULL);
7379
198
    ctxt->depth--;
7380
6.63k
      } else if (ent->etype ==
7381
6.63k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7382
6.63k
    ctxt->depth++;
7383
6.63k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7384
6.63k
         ctxt->sax, user_data, ctxt->depth,
7385
6.63k
         ent->URI, ent->ExternalID, NULL);
7386
6.63k
    ctxt->depth--;
7387
6.63k
      } else {
7388
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7389
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7390
0
           "invalid entity type found\n", NULL);
7391
0
      }
7392
6.83k
      if (ret == XML_ERR_ENTITY_LOOP) {
7393
3
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7394
3
    return;
7395
3
      }
7396
6.83k
  }
7397
44.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7398
44.3k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7399
      /*
7400
       * Entity reference callback comes second, it's somewhat
7401
       * superfluous but a compatibility to historical behaviour
7402
       */
7403
15.3k
      ctxt->sax->reference(ctxt->userData, ent->name);
7404
15.3k
  }
7405
44.3k
  return;
7406
44.3k
    }
7407
7408
    /*
7409
     * If we didn't get any children for the entity being built
7410
     */
7411
98.7k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7412
98.7k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7413
  /*
7414
   * Create a node.
7415
   */
7416
58.8k
  ctxt->sax->reference(ctxt->userData, ent->name);
7417
58.8k
  return;
7418
58.8k
    }
7419
7420
39.8k
    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7421
  /*
7422
   * There is a problem on the handling of _private for entities
7423
   * (bug 155816): Should we copy the content of the field from
7424
   * the entity (possibly overwriting some value set by the user
7425
   * when a copy is created), should we leave it alone, or should
7426
   * we try to take care of different situations?  The problem
7427
   * is exacerbated by the usage of this field by the xmlReader.
7428
   * To fix this bug, we look at _private on the created node
7429
   * and, if it's NULL, we copy in whatever was in the entity.
7430
   * If it's not NULL we leave it alone.  This is somewhat of a
7431
   * hack - maybe we should have further tests to determine
7432
   * what to do.
7433
   */
7434
39.8k
  if ((ctxt->node != NULL) && (ent->children != NULL)) {
7435
      /*
7436
       * Seems we are generating the DOM content, do
7437
       * a simple tree copy for all references except the first
7438
       * In the first occurrence list contains the replacement.
7439
       */
7440
39.8k
      if (((list == NULL) && (ent->owner == 0)) ||
7441
39.8k
    (ctxt->parseMode == XML_PARSE_READER)) {
7442
11.8k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7443
7444
    /*
7445
     * We are copying here, make sure there is no abuse
7446
     */
7447
11.8k
    ctxt->sizeentcopy += ent->length + 5;
7448
11.8k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7449
0
        return;
7450
7451
    /*
7452
     * when operating on a reader, the entities definitions
7453
     * are always owning the entities subtree.
7454
    if (ctxt->parseMode == XML_PARSE_READER)
7455
        ent->owner = 1;
7456
     */
7457
7458
11.8k
    cur = ent->children;
7459
14.7k
    while (cur != NULL) {
7460
14.7k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7461
14.7k
        if (nw != NULL) {
7462
14.7k
      if (nw->_private == NULL)
7463
14.7k
          nw->_private = cur->_private;
7464
14.7k
      if (firstChild == NULL){
7465
11.8k
          firstChild = nw;
7466
11.8k
      }
7467
14.7k
      nw = xmlAddChild(ctxt->node, nw);
7468
14.7k
        }
7469
14.7k
        if (cur == ent->last) {
7470
      /*
7471
       * needed to detect some strange empty
7472
       * node cases in the reader tests
7473
       */
7474
11.8k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7475
11.8k
          (nw != NULL) &&
7476
11.8k
          (nw->type == XML_ELEMENT_NODE) &&
7477
11.8k
          (nw->children == NULL))
7478
516
          nw->extra = 1;
7479
7480
11.8k
      break;
7481
11.8k
        }
7482
2.90k
        cur = cur->next;
7483
2.90k
    }
7484
#ifdef LIBXML_LEGACY_ENABLED
7485
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7486
      xmlAddEntityReference(ent, firstChild, nw);
7487
#endif /* LIBXML_LEGACY_ENABLED */
7488
28.0k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7489
28.0k
    xmlNodePtr nw = NULL, cur, next, last,
7490
28.0k
         firstChild = NULL;
7491
7492
    /*
7493
     * We are copying here, make sure there is no abuse
7494
     */
7495
28.0k
    ctxt->sizeentcopy += ent->length + 5;
7496
28.0k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7497
0
        return;
7498
7499
    /*
7500
     * Copy the entity child list and make it the new
7501
     * entity child list. The goal is to make sure any
7502
     * ID or REF referenced will be the one from the
7503
     * document content and not the entity copy.
7504
     */
7505
28.0k
    cur = ent->children;
7506
28.0k
    ent->children = NULL;
7507
28.0k
    last = ent->last;
7508
28.0k
    ent->last = NULL;
7509
33.3k
    while (cur != NULL) {
7510
33.3k
        next = cur->next;
7511
33.3k
        cur->next = NULL;
7512
33.3k
        cur->parent = NULL;
7513
33.3k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7514
33.3k
        if (nw != NULL) {
7515
33.3k
      if (nw->_private == NULL)
7516
33.3k
          nw->_private = cur->_private;
7517
33.3k
      if (firstChild == NULL){
7518
28.0k
          firstChild = cur;
7519
28.0k
      }
7520
33.3k
      xmlAddChild((xmlNodePtr) ent, nw);
7521
33.3k
      xmlAddChild(ctxt->node, cur);
7522
33.3k
        }
7523
33.3k
        if (cur == last)
7524
28.0k
      break;
7525
5.27k
        cur = next;
7526
5.27k
    }
7527
28.0k
    if (ent->owner == 0)
7528
2.25k
        ent->owner = 1;
7529
#ifdef LIBXML_LEGACY_ENABLED
7530
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7531
      xmlAddEntityReference(ent, firstChild, nw);
7532
#endif /* LIBXML_LEGACY_ENABLED */
7533
28.0k
      } else {
7534
0
    const xmlChar *nbktext;
7535
7536
    /*
7537
     * the name change is to avoid coalescing of the
7538
     * node with a possible previous text one which
7539
     * would make ent->children a dangling pointer
7540
     */
7541
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7542
0
          -1);
7543
0
    if (ent->children->type == XML_TEXT_NODE)
7544
0
        ent->children->name = nbktext;
7545
0
    if ((ent->last != ent->children) &&
7546
0
        (ent->last->type == XML_TEXT_NODE))
7547
0
        ent->last->name = nbktext;
7548
0
    xmlAddChildList(ctxt->node, ent->children);
7549
0
      }
7550
7551
      /*
7552
       * This is to avoid a nasty side effect, see
7553
       * characters() in SAX.c
7554
       */
7555
39.8k
      ctxt->nodemem = 0;
7556
39.8k
      ctxt->nodelen = 0;
7557
39.8k
      return;
7558
39.8k
  }
7559
39.8k
    }
7560
39.8k
}
7561
7562
/**
7563
 * xmlParseEntityRef:
7564
 * @ctxt:  an XML parser context
7565
 *
7566
 * DEPRECATED: Internal function, don't use.
7567
 *
7568
 * parse ENTITY references declarations
7569
 *
7570
 * [68] EntityRef ::= '&' Name ';'
7571
 *
7572
 * [ WFC: Entity Declared ]
7573
 * In a document without any DTD, a document with only an internal DTD
7574
 * subset which contains no parameter entity references, or a document
7575
 * with "standalone='yes'", the Name given in the entity reference
7576
 * must match that in an entity declaration, except that well-formed
7577
 * documents need not declare any of the following entities: amp, lt,
7578
 * gt, apos, quot.  The declaration of a parameter entity must precede
7579
 * any reference to it.  Similarly, the declaration of a general entity
7580
 * must precede any reference to it which appears in a default value in an
7581
 * attribute-list declaration. Note that if entities are declared in the
7582
 * external subset or in external parameter entities, a non-validating
7583
 * processor is not obligated to read and process their declarations;
7584
 * for such documents, the rule that an entity must be declared is a
7585
 * well-formedness constraint only if standalone='yes'.
7586
 *
7587
 * [ WFC: Parsed Entity ]
7588
 * An entity reference must not contain the name of an unparsed entity
7589
 *
7590
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7591
 */
7592
xmlEntityPtr
7593
4.08M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7594
4.08M
    const xmlChar *name;
7595
4.08M
    xmlEntityPtr ent = NULL;
7596
7597
4.08M
    GROW;
7598
4.08M
    if (ctxt->instate == XML_PARSER_EOF)
7599
0
        return(NULL);
7600
7601
4.08M
    if (RAW != '&')
7602
0
        return(NULL);
7603
4.08M
    NEXT;
7604
4.08M
    name = xmlParseName(ctxt);
7605
4.08M
    if (name == NULL) {
7606
71.0k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7607
71.0k
           "xmlParseEntityRef: no name\n");
7608
71.0k
        return(NULL);
7609
71.0k
    }
7610
4.01M
    if (RAW != ';') {
7611
28.5k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7612
28.5k
  return(NULL);
7613
28.5k
    }
7614
3.98M
    NEXT;
7615
7616
    /*
7617
     * Predefined entities override any extra definition
7618
     */
7619
3.98M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7620
2.26M
        ent = xmlGetPredefinedEntity(name);
7621
2.26M
        if (ent != NULL)
7622
199k
            return(ent);
7623
2.26M
    }
7624
7625
    /*
7626
     * Increase the number of entity references parsed
7627
     */
7628
3.78M
    ctxt->nbentities++;
7629
7630
    /*
7631
     * Ask first SAX for entity resolution, otherwise try the
7632
     * entities which may have stored in the parser context.
7633
     */
7634
3.78M
    if (ctxt->sax != NULL) {
7635
3.78M
  if (ctxt->sax->getEntity != NULL)
7636
3.78M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7637
3.78M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7638
3.78M
      (ctxt->options & XML_PARSE_OLDSAX))
7639
6.10k
      ent = xmlGetPredefinedEntity(name);
7640
3.78M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7641
3.78M
      (ctxt->userData==ctxt)) {
7642
21.6k
      ent = xmlSAX2GetEntity(ctxt, name);
7643
21.6k
  }
7644
3.78M
    }
7645
3.78M
    if (ctxt->instate == XML_PARSER_EOF)
7646
0
  return(NULL);
7647
    /*
7648
     * [ WFC: Entity Declared ]
7649
     * In a document without any DTD, a document with only an
7650
     * internal DTD subset which contains no parameter entity
7651
     * references, or a document with "standalone='yes'", the
7652
     * Name given in the entity reference must match that in an
7653
     * entity declaration, except that well-formed documents
7654
     * need not declare any of the following entities: amp, lt,
7655
     * gt, apos, quot.
7656
     * The declaration of a parameter entity must precede any
7657
     * reference to it.
7658
     * Similarly, the declaration of a general entity must
7659
     * precede any reference to it which appears in a default
7660
     * value in an attribute-list declaration. Note that if
7661
     * entities are declared in the external subset or in
7662
     * external parameter entities, a non-validating processor
7663
     * is not obligated to read and process their declarations;
7664
     * for such documents, the rule that an entity must be
7665
     * declared is a well-formedness constraint only if
7666
     * standalone='yes'.
7667
     */
7668
3.78M
    if (ent == NULL) {
7669
73.7k
  if ((ctxt->standalone == 1) ||
7670
73.7k
      ((ctxt->hasExternalSubset == 0) &&
7671
71.9k
       (ctxt->hasPErefs == 0))) {
7672
28.2k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7673
28.2k
         "Entity '%s' not defined\n", name);
7674
45.5k
  } else {
7675
45.5k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7676
45.5k
         "Entity '%s' not defined\n", name);
7677
45.5k
      if ((ctxt->inSubset == 0) &&
7678
45.5k
    (ctxt->sax != NULL) &&
7679
45.5k
    (ctxt->sax->reference != NULL)) {
7680
45.2k
    ctxt->sax->reference(ctxt->userData, name);
7681
45.2k
      }
7682
45.5k
  }
7683
73.7k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7684
73.7k
  ctxt->valid = 0;
7685
73.7k
    }
7686
7687
    /*
7688
     * [ WFC: Parsed Entity ]
7689
     * An entity reference must not contain the name of an
7690
     * unparsed entity
7691
     */
7692
3.71M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7693
897
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7694
897
     "Entity reference to unparsed entity %s\n", name);
7695
897
    }
7696
7697
    /*
7698
     * [ WFC: No External Entity References ]
7699
     * Attribute values cannot contain direct or indirect
7700
     * entity references to external entities.
7701
     */
7702
3.71M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7703
3.71M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7704
942
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7705
942
       "Attribute references external entity '%s'\n", name);
7706
942
    }
7707
    /*
7708
     * [ WFC: No < in Attribute Values ]
7709
     * The replacement text of any entity referred to directly or
7710
     * indirectly in an attribute value (other than "&lt;") must
7711
     * not contain a <.
7712
     */
7713
3.71M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7714
3.71M
       (ent != NULL) && 
7715
3.71M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7716
155k
  if (((ent->checked & 1) || (ent->checked == 0)) &&
7717
155k
       (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7718
2.66k
      xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7719
2.66k
  "'<' in entity '%s' is not allowed in attributes values\n", name);
7720
2.66k
        }
7721
155k
    }
7722
7723
    /*
7724
     * Internal check, no parameter entities here ...
7725
     */
7726
3.55M
    else {
7727
3.55M
  switch (ent->etype) {
7728
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7729
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7730
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7731
0
       "Attempt to reference the parameter entity '%s'\n",
7732
0
            name);
7733
0
      break;
7734
3.55M
      default:
7735
3.55M
      break;
7736
3.55M
  }
7737
3.55M
    }
7738
7739
    /*
7740
     * [ WFC: No Recursion ]
7741
     * A parsed entity must not contain a recursive reference
7742
     * to itself, either directly or indirectly.
7743
     * Done somewhere else
7744
     */
7745
3.78M
    return(ent);
7746
3.78M
}
7747
7748
/**
7749
 * xmlParseStringEntityRef:
7750
 * @ctxt:  an XML parser context
7751
 * @str:  a pointer to an index in the string
7752
 *
7753
 * parse ENTITY references declarations, but this version parses it from
7754
 * a string value.
7755
 *
7756
 * [68] EntityRef ::= '&' Name ';'
7757
 *
7758
 * [ WFC: Entity Declared ]
7759
 * In a document without any DTD, a document with only an internal DTD
7760
 * subset which contains no parameter entity references, or a document
7761
 * with "standalone='yes'", the Name given in the entity reference
7762
 * must match that in an entity declaration, except that well-formed
7763
 * documents need not declare any of the following entities: amp, lt,
7764
 * gt, apos, quot.  The declaration of a parameter entity must precede
7765
 * any reference to it.  Similarly, the declaration of a general entity
7766
 * must precede any reference to it which appears in a default value in an
7767
 * attribute-list declaration. Note that if entities are declared in the
7768
 * external subset or in external parameter entities, a non-validating
7769
 * processor is not obligated to read and process their declarations;
7770
 * for such documents, the rule that an entity must be declared is a
7771
 * well-formedness constraint only if standalone='yes'.
7772
 *
7773
 * [ WFC: Parsed Entity ]
7774
 * An entity reference must not contain the name of an unparsed entity
7775
 *
7776
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7777
 * is updated to the current location in the string.
7778
 */
7779
static xmlEntityPtr
7780
193k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7781
193k
    xmlChar *name;
7782
193k
    const xmlChar *ptr;
7783
193k
    xmlChar cur;
7784
193k
    xmlEntityPtr ent = NULL;
7785
7786
193k
    if ((str == NULL) || (*str == NULL))
7787
0
        return(NULL);
7788
193k
    ptr = *str;
7789
193k
    cur = *ptr;
7790
193k
    if (cur != '&')
7791
0
  return(NULL);
7792
7793
193k
    ptr++;
7794
193k
    name = xmlParseStringName(ctxt, &ptr);
7795
193k
    if (name == NULL) {
7796
5.69k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7797
5.69k
           "xmlParseStringEntityRef: no name\n");
7798
5.69k
  *str = ptr;
7799
5.69k
  return(NULL);
7800
5.69k
    }
7801
187k
    if (*ptr != ';') {
7802
1.03k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7803
1.03k
        xmlFree(name);
7804
1.03k
  *str = ptr;
7805
1.03k
  return(NULL);
7806
1.03k
    }
7807
186k
    ptr++;
7808
7809
7810
    /*
7811
     * Predefined entities override any extra definition
7812
     */
7813
186k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7814
113k
        ent = xmlGetPredefinedEntity(name);
7815
113k
        if (ent != NULL) {
7816
5.95k
            xmlFree(name);
7817
5.95k
            *str = ptr;
7818
5.95k
            return(ent);
7819
5.95k
        }
7820
113k
    }
7821
7822
    /*
7823
     * Increase the number of entity references parsed
7824
     */
7825
180k
    ctxt->nbentities++;
7826
7827
    /*
7828
     * Ask first SAX for entity resolution, otherwise try the
7829
     * entities which may have stored in the parser context.
7830
     */
7831
180k
    if (ctxt->sax != NULL) {
7832
180k
  if (ctxt->sax->getEntity != NULL)
7833
180k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7834
180k
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7835
9.32k
      ent = xmlGetPredefinedEntity(name);
7836
180k
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7837
28.0k
      ent = xmlSAX2GetEntity(ctxt, name);
7838
28.0k
  }
7839
180k
    }
7840
180k
    if (ctxt->instate == XML_PARSER_EOF) {
7841
0
  xmlFree(name);
7842
0
  return(NULL);
7843
0
    }
7844
7845
    /*
7846
     * [ WFC: Entity Declared ]
7847
     * In a document without any DTD, a document with only an
7848
     * internal DTD subset which contains no parameter entity
7849
     * references, or a document with "standalone='yes'", the
7850
     * Name given in the entity reference must match that in an
7851
     * entity declaration, except that well-formed documents
7852
     * need not declare any of the following entities: amp, lt,
7853
     * gt, apos, quot.
7854
     * The declaration of a parameter entity must precede any
7855
     * reference to it.
7856
     * Similarly, the declaration of a general entity must
7857
     * precede any reference to it which appears in a default
7858
     * value in an attribute-list declaration. Note that if
7859
     * entities are declared in the external subset or in
7860
     * external parameter entities, a non-validating processor
7861
     * is not obligated to read and process their declarations;
7862
     * for such documents, the rule that an entity must be
7863
     * declared is a well-formedness constraint only if
7864
     * standalone='yes'.
7865
     */
7866
180k
    if (ent == NULL) {
7867
28.0k
  if ((ctxt->standalone == 1) ||
7868
28.0k
      ((ctxt->hasExternalSubset == 0) &&
7869
27.2k
       (ctxt->hasPErefs == 0))) {
7870
25.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7871
25.9k
         "Entity '%s' not defined\n", name);
7872
25.9k
  } else {
7873
2.11k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7874
2.11k
        "Entity '%s' not defined\n",
7875
2.11k
        name);
7876
2.11k
  }
7877
28.0k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7878
  /* TODO ? check regressions ctxt->valid = 0; */
7879
28.0k
    }
7880
7881
    /*
7882
     * [ WFC: Parsed Entity ]
7883
     * An entity reference must not contain the name of an
7884
     * unparsed entity
7885
     */
7886
152k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7887
105
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7888
105
     "Entity reference to unparsed entity %s\n", name);
7889
105
    }
7890
7891
    /*
7892
     * [ WFC: No External Entity References ]
7893
     * Attribute values cannot contain direct or indirect
7894
     * entity references to external entities.
7895
     */
7896
152k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7897
152k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7898
587
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7899
587
   "Attribute references external entity '%s'\n", name);
7900
587
    }
7901
    /*
7902
     * [ WFC: No < in Attribute Values ]
7903
     * The replacement text of any entity referred to directly or
7904
     * indirectly in an attribute value (other than "&lt;") must
7905
     * not contain a <.
7906
     */
7907
151k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7908
151k
       (ent != NULL) && (ent->content != NULL) &&
7909
151k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7910
151k
       (xmlStrchr(ent->content, '<'))) {
7911
17.4k
  xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7912
17.4k
     "'<' in entity '%s' is not allowed in attributes values\n",
7913
17.4k
        name);
7914
17.4k
    }
7915
7916
    /*
7917
     * Internal check, no parameter entities here ...
7918
     */
7919
134k
    else {
7920
134k
  switch (ent->etype) {
7921
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7922
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7923
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7924
0
       "Attempt to reference the parameter entity '%s'\n",
7925
0
          name);
7926
0
      break;
7927
134k
      default:
7928
134k
      break;
7929
134k
  }
7930
134k
    }
7931
7932
    /*
7933
     * [ WFC: No Recursion ]
7934
     * A parsed entity must not contain a recursive reference
7935
     * to itself, either directly or indirectly.
7936
     * Done somewhere else
7937
     */
7938
7939
180k
    xmlFree(name);
7940
180k
    *str = ptr;
7941
180k
    return(ent);
7942
180k
}
7943
7944
/**
7945
 * xmlParsePEReference:
7946
 * @ctxt:  an XML parser context
7947
 *
7948
 * DEPRECATED: Internal function, don't use.
7949
 *
7950
 * parse PEReference declarations
7951
 * The entity content is handled directly by pushing it's content as
7952
 * a new input stream.
7953
 *
7954
 * [69] PEReference ::= '%' Name ';'
7955
 *
7956
 * [ WFC: No Recursion ]
7957
 * A parsed entity must not contain a recursive
7958
 * reference to itself, either directly or indirectly.
7959
 *
7960
 * [ WFC: Entity Declared ]
7961
 * In a document without any DTD, a document with only an internal DTD
7962
 * subset which contains no parameter entity references, or a document
7963
 * with "standalone='yes'", ...  ... The declaration of a parameter
7964
 * entity must precede any reference to it...
7965
 *
7966
 * [ VC: Entity Declared ]
7967
 * In a document with an external subset or external parameter entities
7968
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7969
 * must precede any reference to it...
7970
 *
7971
 * [ WFC: In DTD ]
7972
 * Parameter-entity references may only appear in the DTD.
7973
 * NOTE: misleading but this is handled.
7974
 */
7975
void
7976
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7977
3.92M
{
7978
3.92M
    const xmlChar *name;
7979
3.92M
    xmlEntityPtr entity = NULL;
7980
3.92M
    xmlParserInputPtr input;
7981
7982
3.92M
    if (RAW != '%')
7983
3.32M
        return;
7984
598k
    NEXT;
7985
598k
    name = xmlParseName(ctxt);
7986
598k
    if (name == NULL) {
7987
10.5k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7988
10.5k
  return;
7989
10.5k
    }
7990
587k
    if (xmlParserDebugEntities)
7991
0
  xmlGenericError(xmlGenericErrorContext,
7992
0
    "PEReference: %s\n", name);
7993
587k
    if (RAW != ';') {
7994
5.95k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7995
5.95k
        return;
7996
5.95k
    }
7997
7998
581k
    NEXT;
7999
8000
    /*
8001
     * Increase the number of entity references parsed
8002
     */
8003
581k
    ctxt->nbentities++;
8004
8005
    /*
8006
     * Request the entity from SAX
8007
     */
8008
581k
    if ((ctxt->sax != NULL) &&
8009
581k
  (ctxt->sax->getParameterEntity != NULL))
8010
581k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8011
581k
    if (ctxt->instate == XML_PARSER_EOF)
8012
0
  return;
8013
581k
    if (entity == NULL) {
8014
  /*
8015
   * [ WFC: Entity Declared ]
8016
   * In a document without any DTD, a document with only an
8017
   * internal DTD subset which contains no parameter entity
8018
   * references, or a document with "standalone='yes'", ...
8019
   * ... The declaration of a parameter entity must precede
8020
   * any reference to it...
8021
   */
8022
39.3k
  if ((ctxt->standalone == 1) ||
8023
39.3k
      ((ctxt->hasExternalSubset == 0) &&
8024
39.2k
       (ctxt->hasPErefs == 0))) {
8025
1.98k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8026
1.98k
            "PEReference: %%%s; not found\n",
8027
1.98k
            name);
8028
37.3k
  } else {
8029
      /*
8030
       * [ VC: Entity Declared ]
8031
       * In a document with an external subset or external
8032
       * parameter entities with "standalone='no'", ...
8033
       * ... The declaration of a parameter entity must
8034
       * precede any reference to it...
8035
       */
8036
37.3k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8037
16.8k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8038
16.8k
                                 "PEReference: %%%s; not found\n",
8039
16.8k
                                 name, NULL);
8040
16.8k
            } else
8041
20.5k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8042
20.5k
                              "PEReference: %%%s; not found\n",
8043
20.5k
                              name, NULL);
8044
37.3k
            ctxt->valid = 0;
8045
37.3k
  }
8046
39.3k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8047
542k
    } else {
8048
  /*
8049
   * Internal checking in case the entity quest barfed
8050
   */
8051
542k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8052
542k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8053
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8054
0
      "Internal: %%%s; is not a parameter entity\n",
8055
0
        name, NULL);
8056
542k
  } else {
8057
542k
            xmlChar start[4];
8058
542k
            xmlCharEncoding enc;
8059
8060
542k
      if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8061
2
          return;
8062
8063
542k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8064
542k
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8065
542k
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8066
542k
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8067
542k
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8068
542k
    (ctxt->replaceEntities == 0) &&
8069
542k
    (ctxt->validate == 0))
8070
145
    return;
8071
8072
542k
      input = xmlNewEntityInputStream(ctxt, entity);
8073
542k
      if (xmlPushInput(ctxt, input) < 0) {
8074
1.73k
                xmlFreeInputStream(input);
8075
1.73k
    return;
8076
1.73k
            }
8077
8078
540k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8079
                /*
8080
                 * Get the 4 first bytes and decode the charset
8081
                 * if enc != XML_CHAR_ENCODING_NONE
8082
                 * plug some encoding conversion routines.
8083
                 * Note that, since we may have some non-UTF8
8084
                 * encoding (like UTF16, bug 135229), the 'length'
8085
                 * is not known, but we can calculate based upon
8086
                 * the amount of data in the buffer.
8087
                 */
8088
20.2k
                GROW
8089
20.2k
                if (ctxt->instate == XML_PARSER_EOF)
8090
0
                    return;
8091
20.2k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8092
20.1k
                    start[0] = RAW;
8093
20.1k
                    start[1] = NXT(1);
8094
20.1k
                    start[2] = NXT(2);
8095
20.1k
                    start[3] = NXT(3);
8096
20.1k
                    enc = xmlDetectCharEncoding(start, 4);
8097
20.1k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8098
2.71k
                        xmlSwitchEncoding(ctxt, enc);
8099
2.71k
                    }
8100
20.1k
                }
8101
8102
20.2k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8103
20.2k
                    (IS_BLANK_CH(NXT(5)))) {
8104
2.56k
                    xmlParseTextDecl(ctxt);
8105
2.56k
                }
8106
20.2k
            }
8107
540k
  }
8108
542k
    }
8109
579k
    ctxt->hasPErefs = 1;
8110
579k
}
8111
8112
/**
8113
 * xmlLoadEntityContent:
8114
 * @ctxt:  an XML parser context
8115
 * @entity: an unloaded system entity
8116
 *
8117
 * Load the original content of the given system entity from the
8118
 * ExternalID/SystemID given. This is to be used for Included in Literal
8119
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8120
 *
8121
 * Returns 0 in case of success and -1 in case of failure
8122
 */
8123
static int
8124
5.63k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8125
5.63k
    xmlParserInputPtr input;
8126
5.63k
    xmlBufferPtr buf;
8127
5.63k
    int l, c;
8128
5.63k
    int count = 0;
8129
8130
5.63k
    if ((ctxt == NULL) || (entity == NULL) ||
8131
5.63k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8132
5.63k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8133
5.63k
  (entity->content != NULL)) {
8134
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8135
0
              "xmlLoadEntityContent parameter error");
8136
0
        return(-1);
8137
0
    }
8138
8139
5.63k
    if (xmlParserDebugEntities)
8140
0
  xmlGenericError(xmlGenericErrorContext,
8141
0
    "Reading %s entity content input\n", entity->name);
8142
8143
5.63k
    buf = xmlBufferCreate();
8144
5.63k
    if (buf == NULL) {
8145
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8146
0
              "xmlLoadEntityContent parameter error");
8147
0
        return(-1);
8148
0
    }
8149
5.63k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8150
8151
5.63k
    input = xmlNewEntityInputStream(ctxt, entity);
8152
5.63k
    if (input == NULL) {
8153
274
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8154
274
              "xmlLoadEntityContent input error");
8155
274
  xmlBufferFree(buf);
8156
274
        return(-1);
8157
274
    }
8158
8159
    /*
8160
     * Push the entity as the current input, read char by char
8161
     * saving to the buffer until the end of the entity or an error
8162
     */
8163
5.36k
    if (xmlPushInput(ctxt, input) < 0) {
8164
0
        xmlBufferFree(buf);
8165
0
  xmlFreeInputStream(input);
8166
0
  return(-1);
8167
0
    }
8168
8169
5.36k
    GROW;
8170
5.36k
    c = CUR_CHAR(l);
8171
4.06M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8172
4.06M
           (IS_CHAR(c))) {
8173
4.06M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8174
4.06M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8175
37.8k
      count = 0;
8176
37.8k
      GROW;
8177
37.8k
            if (ctxt->instate == XML_PARSER_EOF) {
8178
0
                xmlBufferFree(buf);
8179
0
                return(-1);
8180
0
            }
8181
37.8k
  }
8182
4.06M
  NEXTL(l);
8183
4.06M
  c = CUR_CHAR(l);
8184
4.06M
  if (c == 0) {
8185
4.25k
      count = 0;
8186
4.25k
      GROW;
8187
4.25k
            if (ctxt->instate == XML_PARSER_EOF) {
8188
0
                xmlBufferFree(buf);
8189
0
                return(-1);
8190
0
            }
8191
4.25k
      c = CUR_CHAR(l);
8192
4.25k
  }
8193
4.06M
    }
8194
8195
5.36k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8196
3.23k
        xmlPopInput(ctxt);
8197
3.23k
    } else if (!IS_CHAR(c)) {
8198
2.12k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8199
2.12k
                          "xmlLoadEntityContent: invalid char value %d\n",
8200
2.12k
                    c);
8201
2.12k
  xmlBufferFree(buf);
8202
2.12k
  return(-1);
8203
2.12k
    }
8204
3.23k
    entity->content = buf->content;
8205
3.23k
    buf->content = NULL;
8206
3.23k
    xmlBufferFree(buf);
8207
8208
3.23k
    return(0);
8209
5.36k
}
8210
8211
/**
8212
 * xmlParseStringPEReference:
8213
 * @ctxt:  an XML parser context
8214
 * @str:  a pointer to an index in the string
8215
 *
8216
 * parse PEReference declarations
8217
 *
8218
 * [69] PEReference ::= '%' Name ';'
8219
 *
8220
 * [ WFC: No Recursion ]
8221
 * A parsed entity must not contain a recursive
8222
 * reference to itself, either directly or indirectly.
8223
 *
8224
 * [ WFC: Entity Declared ]
8225
 * In a document without any DTD, a document with only an internal DTD
8226
 * subset which contains no parameter entity references, or a document
8227
 * with "standalone='yes'", ...  ... The declaration of a parameter
8228
 * entity must precede any reference to it...
8229
 *
8230
 * [ VC: Entity Declared ]
8231
 * In a document with an external subset or external parameter entities
8232
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8233
 * must precede any reference to it...
8234
 *
8235
 * [ WFC: In DTD ]
8236
 * Parameter-entity references may only appear in the DTD.
8237
 * NOTE: misleading but this is handled.
8238
 *
8239
 * Returns the string of the entity content.
8240
 *         str is updated to the current value of the index
8241
 */
8242
static xmlEntityPtr
8243
826k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8244
826k
    const xmlChar *ptr;
8245
826k
    xmlChar cur;
8246
826k
    xmlChar *name;
8247
826k
    xmlEntityPtr entity = NULL;
8248
8249
826k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8250
826k
    ptr = *str;
8251
826k
    cur = *ptr;
8252
826k
    if (cur != '%')
8253
0
        return(NULL);
8254
826k
    ptr++;
8255
826k
    name = xmlParseStringName(ctxt, &ptr);
8256
826k
    if (name == NULL) {
8257
188k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8258
188k
           "xmlParseStringPEReference: no name\n");
8259
188k
  *str = ptr;
8260
188k
  return(NULL);
8261
188k
    }
8262
638k
    cur = *ptr;
8263
638k
    if (cur != ';') {
8264
46.9k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8265
46.9k
  xmlFree(name);
8266
46.9k
  *str = ptr;
8267
46.9k
  return(NULL);
8268
46.9k
    }
8269
591k
    ptr++;
8270
8271
    /*
8272
     * Increase the number of entity references parsed
8273
     */
8274
591k
    ctxt->nbentities++;
8275
8276
    /*
8277
     * Request the entity from SAX
8278
     */
8279
591k
    if ((ctxt->sax != NULL) &&
8280
591k
  (ctxt->sax->getParameterEntity != NULL))
8281
591k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8282
591k
    if (ctxt->instate == XML_PARSER_EOF) {
8283
0
  xmlFree(name);
8284
0
  *str = ptr;
8285
0
  return(NULL);
8286
0
    }
8287
591k
    if (entity == NULL) {
8288
  /*
8289
   * [ WFC: Entity Declared ]
8290
   * In a document without any DTD, a document with only an
8291
   * internal DTD subset which contains no parameter entity
8292
   * references, or a document with "standalone='yes'", ...
8293
   * ... The declaration of a parameter entity must precede
8294
   * any reference to it...
8295
   */
8296
42.2k
  if ((ctxt->standalone == 1) ||
8297
42.2k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8298
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8299
0
     "PEReference: %%%s; not found\n", name);
8300
42.2k
  } else {
8301
      /*
8302
       * [ VC: Entity Declared ]
8303
       * In a document with an external subset or external
8304
       * parameter entities with "standalone='no'", ...
8305
       * ... The declaration of a parameter entity must
8306
       * precede any reference to it...
8307
       */
8308
42.2k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8309
42.2k
        "PEReference: %%%s; not found\n",
8310
42.2k
        name, NULL);
8311
42.2k
      ctxt->valid = 0;
8312
42.2k
  }
8313
42.2k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8314
549k
    } else {
8315
  /*
8316
   * Internal checking in case the entity quest barfed
8317
   */
8318
549k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8319
549k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8320
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8321
0
        "%%%s; is not a parameter entity\n",
8322
0
        name, NULL);
8323
0
  }
8324
549k
    }
8325
591k
    ctxt->hasPErefs = 1;
8326
591k
    xmlFree(name);
8327
591k
    *str = ptr;
8328
591k
    return(entity);
8329
591k
}
8330
8331
/**
8332
 * xmlParseDocTypeDecl:
8333
 * @ctxt:  an XML parser context
8334
 *
8335
 * DEPRECATED: Internal function, don't use.
8336
 *
8337
 * parse a DOCTYPE declaration
8338
 *
8339
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8340
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8341
 *
8342
 * [ VC: Root Element Type ]
8343
 * The Name in the document type declaration must match the element
8344
 * type of the root element.
8345
 */
8346
8347
void
8348
457k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8349
457k
    const xmlChar *name = NULL;
8350
457k
    xmlChar *ExternalID = NULL;
8351
457k
    xmlChar *URI = NULL;
8352
8353
    /*
8354
     * We know that '<!DOCTYPE' has been detected.
8355
     */
8356
457k
    SKIP(9);
8357
8358
457k
    SKIP_BLANKS;
8359
8360
    /*
8361
     * Parse the DOCTYPE name.
8362
     */
8363
457k
    name = xmlParseName(ctxt);
8364
457k
    if (name == NULL) {
8365
2.93k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8366
2.93k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8367
2.93k
    }
8368
457k
    ctxt->intSubName = name;
8369
8370
457k
    SKIP_BLANKS;
8371
8372
    /*
8373
     * Check for SystemID and ExternalID
8374
     */
8375
457k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8376
8377
457k
    if ((URI != NULL) || (ExternalID != NULL)) {
8378
171k
        ctxt->hasExternalSubset = 1;
8379
171k
    }
8380
457k
    ctxt->extSubURI = URI;
8381
457k
    ctxt->extSubSystem = ExternalID;
8382
8383
457k
    SKIP_BLANKS;
8384
8385
    /*
8386
     * Create and update the internal subset.
8387
     */
8388
457k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8389
457k
  (!ctxt->disableSAX))
8390
441k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8391
457k
    if (ctxt->instate == XML_PARSER_EOF)
8392
0
  return;
8393
8394
    /*
8395
     * Is there any internal subset declarations ?
8396
     * they are handled separately in xmlParseInternalSubset()
8397
     */
8398
457k
    if (RAW == '[')
8399
335k
  return;
8400
8401
    /*
8402
     * We should be at the end of the DOCTYPE declaration.
8403
     */
8404
121k
    if (RAW != '>') {
8405
19.0k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8406
19.0k
    }
8407
121k
    NEXT;
8408
121k
}
8409
8410
/**
8411
 * xmlParseInternalSubset:
8412
 * @ctxt:  an XML parser context
8413
 *
8414
 * parse the internal subset declaration
8415
 *
8416
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8417
 */
8418
8419
static void
8420
280k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8421
    /*
8422
     * Is there any DTD definition ?
8423
     */
8424
280k
    if (RAW == '[') {
8425
280k
        int baseInputNr = ctxt->inputNr;
8426
280k
        ctxt->instate = XML_PARSER_DTD;
8427
280k
        NEXT;
8428
  /*
8429
   * Parse the succession of Markup declarations and
8430
   * PEReferences.
8431
   * Subsequence (markupdecl | PEReference | S)*
8432
   */
8433
3.55M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8434
3.55M
               (ctxt->instate != XML_PARSER_EOF)) {
8435
3.35M
      int id = ctxt->input->id;
8436
3.35M
      unsigned long cons = CUR_CONSUMED;
8437
8438
3.35M
      SKIP_BLANKS;
8439
3.35M
      xmlParseMarkupDecl(ctxt);
8440
3.35M
      xmlParsePEReference(ctxt);
8441
8442
            /*
8443
             * Conditional sections are allowed from external entities included
8444
             * by PE References in the internal subset.
8445
             */
8446
3.35M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8447
3.35M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8448
0
                xmlParseConditionalSections(ctxt);
8449
0
            }
8450
8451
3.35M
      if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8452
85.3k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8453
85.3k
       "xmlParseInternalSubset: error detected in Markup declaration\n");
8454
85.3k
                if (ctxt->inputNr > baseInputNr)
8455
8.27k
                    xmlPopInput(ctxt);
8456
77.0k
                else
8457
77.0k
        break;
8458
85.3k
      }
8459
3.35M
  }
8460
280k
  if (RAW == ']') {
8461
190k
      NEXT;
8462
190k
      SKIP_BLANKS;
8463
190k
  }
8464
280k
    }
8465
8466
    /*
8467
     * We should be at the end of the DOCTYPE declaration.
8468
     */
8469
280k
    if (RAW != '>') {
8470
90.0k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8471
90.0k
  return;
8472
90.0k
    }
8473
190k
    NEXT;
8474
190k
}
8475
8476
#ifdef LIBXML_SAX1_ENABLED
8477
/**
8478
 * xmlParseAttribute:
8479
 * @ctxt:  an XML parser context
8480
 * @value:  a xmlChar ** used to store the value of the attribute
8481
 *
8482
 * DEPRECATED: Internal function, don't use.
8483
 *
8484
 * parse an attribute
8485
 *
8486
 * [41] Attribute ::= Name Eq AttValue
8487
 *
8488
 * [ WFC: No External Entity References ]
8489
 * Attribute values cannot contain direct or indirect entity references
8490
 * to external entities.
8491
 *
8492
 * [ WFC: No < in Attribute Values ]
8493
 * The replacement text of any entity referred to directly or indirectly in
8494
 * an attribute value (other than "&lt;") must not contain a <.
8495
 *
8496
 * [ VC: Attribute Value Type ]
8497
 * The attribute must have been declared; the value must be of the type
8498
 * declared for it.
8499
 *
8500
 * [25] Eq ::= S? '=' S?
8501
 *
8502
 * With namespace:
8503
 *
8504
 * [NS 11] Attribute ::= QName Eq AttValue
8505
 *
8506
 * Also the case QName == xmlns:??? is handled independently as a namespace
8507
 * definition.
8508
 *
8509
 * Returns the attribute name, and the value in *value.
8510
 */
8511
8512
const xmlChar *
8513
3.51M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8514
3.51M
    const xmlChar *name;
8515
3.51M
    xmlChar *val;
8516
8517
3.51M
    *value = NULL;
8518
3.51M
    GROW;
8519
3.51M
    name = xmlParseName(ctxt);
8520
3.51M
    if (name == NULL) {
8521
211k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8522
211k
                 "error parsing attribute name\n");
8523
211k
        return(NULL);
8524
211k
    }
8525
8526
    /*
8527
     * read the value
8528
     */
8529
3.30M
    SKIP_BLANKS;
8530
3.30M
    if (RAW == '=') {
8531
3.20M
        NEXT;
8532
3.20M
  SKIP_BLANKS;
8533
3.20M
  val = xmlParseAttValue(ctxt);
8534
3.20M
  ctxt->instate = XML_PARSER_CONTENT;
8535
3.20M
    } else {
8536
99.3k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8537
99.3k
         "Specification mandates value for attribute %s\n", name);
8538
99.3k
  return(NULL);
8539
99.3k
    }
8540
8541
    /*
8542
     * Check that xml:lang conforms to the specification
8543
     * No more registered as an error, just generate a warning now
8544
     * since this was deprecated in XML second edition
8545
     */
8546
3.20M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8547
7.55k
  if (!xmlCheckLanguageID(val)) {
8548
3.29k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8549
3.29k
              "Malformed value for xml:lang : %s\n",
8550
3.29k
        val, NULL);
8551
3.29k
  }
8552
7.55k
    }
8553
8554
    /*
8555
     * Check that xml:space conforms to the specification
8556
     */
8557
3.20M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8558
1.10k
  if (xmlStrEqual(val, BAD_CAST "default"))
8559
109
      *(ctxt->space) = 0;
8560
999
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8561
476
      *(ctxt->space) = 1;
8562
523
  else {
8563
523
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8564
523
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8565
523
                                 val, NULL);
8566
523
  }
8567
1.10k
    }
8568
8569
3.20M
    *value = val;
8570
3.20M
    return(name);
8571
3.30M
}
8572
8573
/**
8574
 * xmlParseStartTag:
8575
 * @ctxt:  an XML parser context
8576
 *
8577
 * DEPRECATED: Internal function, don't use.
8578
 *
8579
 * parse a start of tag either for rule element or
8580
 * EmptyElement. In both case we don't parse the tag closing chars.
8581
 *
8582
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8583
 *
8584
 * [ WFC: Unique Att Spec ]
8585
 * No attribute name may appear more than once in the same start-tag or
8586
 * empty-element tag.
8587
 *
8588
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8589
 *
8590
 * [ WFC: Unique Att Spec ]
8591
 * No attribute name may appear more than once in the same start-tag or
8592
 * empty-element tag.
8593
 *
8594
 * With namespace:
8595
 *
8596
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8597
 *
8598
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8599
 *
8600
 * Returns the element name parsed
8601
 */
8602
8603
const xmlChar *
8604
5.12M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8605
5.12M
    const xmlChar *name;
8606
5.12M
    const xmlChar *attname;
8607
5.12M
    xmlChar *attvalue;
8608
5.12M
    const xmlChar **atts = ctxt->atts;
8609
5.12M
    int nbatts = 0;
8610
5.12M
    int maxatts = ctxt->maxatts;
8611
5.12M
    int i;
8612
8613
5.12M
    if (RAW != '<') return(NULL);
8614
5.12M
    NEXT1;
8615
8616
5.12M
    name = xmlParseName(ctxt);
8617
5.12M
    if (name == NULL) {
8618
106k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8619
106k
       "xmlParseStartTag: invalid element name\n");
8620
106k
        return(NULL);
8621
106k
    }
8622
8623
    /*
8624
     * Now parse the attributes, it ends up with the ending
8625
     *
8626
     * (S Attribute)* S?
8627
     */
8628
5.02M
    SKIP_BLANKS;
8629
5.02M
    GROW;
8630
8631
6.23M
    while (((RAW != '>') &&
8632
6.23M
     ((RAW != '/') || (NXT(1) != '>')) &&
8633
6.23M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8634
3.51M
        int id = ctxt->input->id;
8635
3.51M
  unsigned long cons = CUR_CONSUMED;
8636
8637
3.51M
  attname = xmlParseAttribute(ctxt, &attvalue);
8638
3.51M
        if ((attname != NULL) && (attvalue != NULL)) {
8639
      /*
8640
       * [ WFC: Unique Att Spec ]
8641
       * No attribute name may appear more than once in the same
8642
       * start-tag or empty-element tag.
8643
       */
8644
4.24M
      for (i = 0; i < nbatts;i += 2) {
8645
1.05M
          if (xmlStrEqual(atts[i], attname)) {
8646
2.30k
        xmlErrAttributeDup(ctxt, NULL, attname);
8647
2.30k
        xmlFree(attvalue);
8648
2.30k
        goto failed;
8649
2.30k
    }
8650
1.05M
      }
8651
      /*
8652
       * Add the pair to atts
8653
       */
8654
3.18M
      if (atts == NULL) {
8655
260k
          maxatts = 22; /* allow for 10 attrs by default */
8656
260k
          atts = (const xmlChar **)
8657
260k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8658
260k
    if (atts == NULL) {
8659
0
        xmlErrMemory(ctxt, NULL);
8660
0
        if (attvalue != NULL)
8661
0
      xmlFree(attvalue);
8662
0
        goto failed;
8663
0
    }
8664
260k
    ctxt->atts = atts;
8665
260k
    ctxt->maxatts = maxatts;
8666
2.92M
      } else if (nbatts + 4 > maxatts) {
8667
234
          const xmlChar **n;
8668
8669
234
          maxatts *= 2;
8670
234
          n = (const xmlChar **) xmlRealloc((void *) atts,
8671
234
               maxatts * sizeof(const xmlChar *));
8672
234
    if (n == NULL) {
8673
0
        xmlErrMemory(ctxt, NULL);
8674
0
        if (attvalue != NULL)
8675
0
      xmlFree(attvalue);
8676
0
        goto failed;
8677
0
    }
8678
234
    atts = n;
8679
234
    ctxt->atts = atts;
8680
234
    ctxt->maxatts = maxatts;
8681
234
      }
8682
3.18M
      atts[nbatts++] = attname;
8683
3.18M
      atts[nbatts++] = attvalue;
8684
3.18M
      atts[nbatts] = NULL;
8685
3.18M
      atts[nbatts + 1] = NULL;
8686
3.18M
  } else {
8687
322k
      if (attvalue != NULL)
8688
0
    xmlFree(attvalue);
8689
322k
  }
8690
8691
3.51M
failed:
8692
8693
3.51M
  GROW
8694
3.51M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8695
2.08M
      break;
8696
1.42M
  if (SKIP_BLANKS == 0) {
8697
424k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8698
424k
         "attributes construct error\n");
8699
424k
  }
8700
1.42M
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8701
1.42M
            (attname == NULL) && (attvalue == NULL)) {
8702
211k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8703
211k
         "xmlParseStartTag: problem parsing attributes\n");
8704
211k
      break;
8705
211k
  }
8706
1.21M
  SHRINK;
8707
1.21M
        GROW;
8708
1.21M
    }
8709
8710
    /*
8711
     * SAX: Start of Element !
8712
     */
8713
5.02M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8714
5.02M
  (!ctxt->disableSAX)) {
8715
4.40M
  if (nbatts > 0)
8716
1.89M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8717
2.51M
  else
8718
2.51M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8719
4.40M
    }
8720
8721
5.02M
    if (atts != NULL) {
8722
        /* Free only the content strings */
8723
6.65M
        for (i = 1;i < nbatts;i+=2)
8724
3.18M
      if (atts[i] != NULL)
8725
3.18M
         xmlFree((xmlChar *) atts[i]);
8726
3.46M
    }
8727
5.02M
    return(name);
8728
5.02M
}
8729
8730
/**
8731
 * xmlParseEndTag1:
8732
 * @ctxt:  an XML parser context
8733
 * @line:  line of the start tag
8734
 * @nsNr:  number of namespaces on the start tag
8735
 *
8736
 * parse an end of tag
8737
 *
8738
 * [42] ETag ::= '</' Name S? '>'
8739
 *
8740
 * With namespace
8741
 *
8742
 * [NS 9] ETag ::= '</' QName S? '>'
8743
 */
8744
8745
static void
8746
1.54M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8747
1.54M
    const xmlChar *name;
8748
8749
1.54M
    GROW;
8750
1.54M
    if ((RAW != '<') || (NXT(1) != '/')) {
8751
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8752
0
           "xmlParseEndTag: '</' not found\n");
8753
0
  return;
8754
0
    }
8755
1.54M
    SKIP(2);
8756
8757
1.54M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8758
8759
    /*
8760
     * We should definitely be at the ending "S? '>'" part
8761
     */
8762
1.54M
    GROW;
8763
1.54M
    SKIP_BLANKS;
8764
1.54M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8765
52.8k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8766
52.8k
    } else
8767
1.48M
  NEXT1;
8768
8769
    /*
8770
     * [ WFC: Element Type Match ]
8771
     * The Name in an element's end-tag must match the element type in the
8772
     * start-tag.
8773
     *
8774
     */
8775
1.54M
    if (name != (xmlChar*)1) {
8776
133k
        if (name == NULL) name = BAD_CAST "unparsable";
8777
133k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8778
133k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8779
133k
                    ctxt->name, line, name);
8780
133k
    }
8781
8782
    /*
8783
     * SAX: End of Tag
8784
     */
8785
1.54M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8786
1.54M
  (!ctxt->disableSAX))
8787
1.09M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8788
8789
1.54M
    namePop(ctxt);
8790
1.54M
    spacePop(ctxt);
8791
1.54M
    return;
8792
1.54M
}
8793
8794
/**
8795
 * xmlParseEndTag:
8796
 * @ctxt:  an XML parser context
8797
 *
8798
 * DEPRECATED: Internal function, don't use.
8799
 *
8800
 * parse an end of tag
8801
 *
8802
 * [42] ETag ::= '</' Name S? '>'
8803
 *
8804
 * With namespace
8805
 *
8806
 * [NS 9] ETag ::= '</' QName S? '>'
8807
 */
8808
8809
void
8810
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8811
0
    xmlParseEndTag1(ctxt, 0);
8812
0
}
8813
#endif /* LIBXML_SAX1_ENABLED */
8814
8815
/************************************************************************
8816
 *                  *
8817
 *          SAX 2 specific operations       *
8818
 *                  *
8819
 ************************************************************************/
8820
8821
/*
8822
 * xmlGetNamespace:
8823
 * @ctxt:  an XML parser context
8824
 * @prefix:  the prefix to lookup
8825
 *
8826
 * Lookup the namespace name for the @prefix (which ca be NULL)
8827
 * The prefix must come from the @ctxt->dict dictionary
8828
 *
8829
 * Returns the namespace name or NULL if not bound
8830
 */
8831
static const xmlChar *
8832
8.57M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8833
8.57M
    int i;
8834
8835
8.57M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8836
8.98M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8837
1.99M
        if (ctxt->nsTab[i] == prefix) {
8838
1.56M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8839
1.93k
          return(NULL);
8840
1.55M
      return(ctxt->nsTab[i + 1]);
8841
1.56M
  }
8842
6.98M
    return(NULL);
8843
8.54M
}
8844
8845
/**
8846
 * xmlParseQName:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  pointer to store the prefix part
8849
 *
8850
 * parse an XML Namespace QName
8851
 *
8852
 * [6]  QName  ::= (Prefix ':')? LocalPart
8853
 * [7]  Prefix  ::= NCName
8854
 * [8]  LocalPart  ::= NCName
8855
 *
8856
 * Returns the Name parsed or NULL
8857
 */
8858
8859
static const xmlChar *
8860
14.9M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8861
14.9M
    const xmlChar *l, *p;
8862
8863
14.9M
    GROW;
8864
8865
14.9M
    l = xmlParseNCName(ctxt);
8866
14.9M
    if (l == NULL) {
8867
334k
        if (CUR == ':') {
8868
7.99k
      l = xmlParseName(ctxt);
8869
7.99k
      if (l != NULL) {
8870
7.99k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8871
7.99k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8872
7.99k
    *prefix = NULL;
8873
7.99k
    return(l);
8874
7.99k
      }
8875
7.99k
  }
8876
326k
        return(NULL);
8877
334k
    }
8878
14.6M
    if (CUR == ':') {
8879
2.05M
        NEXT;
8880
2.05M
  p = l;
8881
2.05M
  l = xmlParseNCName(ctxt);
8882
2.05M
  if (l == NULL) {
8883
19.1k
      xmlChar *tmp;
8884
8885
19.1k
            if (ctxt->instate == XML_PARSER_EOF)
8886
0
                return(NULL);
8887
19.1k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8888
19.1k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8889
19.1k
      l = xmlParseNmtoken(ctxt);
8890
19.1k
      if (l == NULL) {
8891
12.5k
                if (ctxt->instate == XML_PARSER_EOF)
8892
0
                    return(NULL);
8893
12.5k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8894
12.5k
            } else {
8895
6.57k
    tmp = xmlBuildQName(l, p, NULL, 0);
8896
6.57k
    xmlFree((char *)l);
8897
6.57k
      }
8898
19.1k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8899
19.1k
      if (tmp != NULL) xmlFree(tmp);
8900
19.1k
      *prefix = NULL;
8901
19.1k
      return(p);
8902
19.1k
  }
8903
2.03M
  if (CUR == ':') {
8904
10.3k
      xmlChar *tmp;
8905
8906
10.3k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8907
10.3k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8908
10.3k
      NEXT;
8909
10.3k
      tmp = (xmlChar *) xmlParseName(ctxt);
8910
10.3k
      if (tmp != NULL) {
8911
7.67k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8912
7.67k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8913
7.67k
    if (tmp != NULL) xmlFree(tmp);
8914
7.67k
    *prefix = p;
8915
7.67k
    return(l);
8916
7.67k
      }
8917
2.64k
            if (ctxt->instate == XML_PARSER_EOF)
8918
0
                return(NULL);
8919
2.64k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8920
2.64k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8921
2.64k
      if (tmp != NULL) xmlFree(tmp);
8922
2.64k
      *prefix = p;
8923
2.64k
      return(l);
8924
2.64k
  }
8925
2.02M
  *prefix = p;
8926
2.02M
    } else
8927
12.5M
        *prefix = NULL;
8928
14.6M
    return(l);
8929
14.6M
}
8930
8931
/**
8932
 * xmlParseQNameAndCompare:
8933
 * @ctxt:  an XML parser context
8934
 * @name:  the localname
8935
 * @prefix:  the prefix, if any.
8936
 *
8937
 * parse an XML name and compares for match
8938
 * (specialized for endtag parsing)
8939
 *
8940
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8941
 * and the name for mismatch
8942
 */
8943
8944
static const xmlChar *
8945
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8946
462k
                        xmlChar const *prefix) {
8947
462k
    const xmlChar *cmp;
8948
462k
    const xmlChar *in;
8949
462k
    const xmlChar *ret;
8950
462k
    const xmlChar *prefix2;
8951
8952
462k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8953
8954
462k
    GROW;
8955
462k
    in = ctxt->input->cur;
8956
8957
462k
    cmp = prefix;
8958
1.65M
    while (*in != 0 && *in == *cmp) {
8959
1.19M
  ++in;
8960
1.19M
  ++cmp;
8961
1.19M
    }
8962
462k
    if ((*cmp == 0) && (*in == ':')) {
8963
441k
        in++;
8964
441k
  cmp = name;
8965
3.69M
  while (*in != 0 && *in == *cmp) {
8966
3.25M
      ++in;
8967
3.25M
      ++cmp;
8968
3.25M
  }
8969
441k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8970
      /* success */
8971
377k
            ctxt->input->col += in - ctxt->input->cur;
8972
377k
      ctxt->input->cur = in;
8973
377k
      return((const xmlChar*) 1);
8974
377k
  }
8975
441k
    }
8976
    /*
8977
     * all strings coms from the dictionary, equality can be done directly
8978
     */
8979
84.9k
    ret = xmlParseQName (ctxt, &prefix2);
8980
84.9k
    if ((ret == name) && (prefix == prefix2))
8981
1.17k
  return((const xmlChar*) 1);
8982
83.7k
    return ret;
8983
84.9k
}
8984
8985
/**
8986
 * xmlParseAttValueInternal:
8987
 * @ctxt:  an XML parser context
8988
 * @len:  attribute len result
8989
 * @alloc:  whether the attribute was reallocated as a new string
8990
 * @normalize:  if 1 then further non-CDATA normalization must be done
8991
 *
8992
 * parse a value for an attribute.
8993
 * NOTE: if no normalization is needed, the routine will return pointers
8994
 *       directly from the data buffer.
8995
 *
8996
 * 3.3.3 Attribute-Value Normalization:
8997
 * Before the value of an attribute is passed to the application or
8998
 * checked for validity, the XML processor must normalize it as follows:
8999
 * - a character reference is processed by appending the referenced
9000
 *   character to the attribute value
9001
 * - an entity reference is processed by recursively processing the
9002
 *   replacement text of the entity
9003
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9004
 *   appending #x20 to the normalized value, except that only a single
9005
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9006
 *   parsed entity or the literal entity value of an internal parsed entity
9007
 * - other characters are processed by appending them to the normalized value
9008
 * If the declared value is not CDATA, then the XML processor must further
9009
 * process the normalized attribute value by discarding any leading and
9010
 * trailing space (#x20) characters, and by replacing sequences of space
9011
 * (#x20) characters by a single space (#x20) character.
9012
 * All attributes for which no declaration has been read should be treated
9013
 * by a non-validating parser as if declared CDATA.
9014
 *
9015
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9016
 *     caller if it was copied, this can be detected by val[*len] == 0.
9017
 */
9018
9019
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9020
10.0k
    const xmlChar *oldbase = ctxt->input->base;\
9021
10.0k
    GROW;\
9022
10.0k
    if (ctxt->instate == XML_PARSER_EOF)\
9023
10.0k
        return(NULL);\
9024
10.0k
    if (oldbase != ctxt->input->base) {\
9025
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9026
0
        start = start + delta;\
9027
0
        in = in + delta;\
9028
0
    }\
9029
10.0k
    end = ctxt->input->end;
9030
9031
static xmlChar *
9032
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9033
                         int normalize)
9034
9.90M
{
9035
9.90M
    xmlChar limit = 0;
9036
9.90M
    const xmlChar *in = NULL, *start, *end, *last;
9037
9.90M
    xmlChar *ret = NULL;
9038
9.90M
    int line, col;
9039
9.90M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9040
2.26M
                    XML_MAX_HUGE_LENGTH :
9041
9.90M
                    XML_MAX_TEXT_LENGTH;
9042
9043
9.90M
    GROW;
9044
9.90M
    in = (xmlChar *) CUR_PTR;
9045
9.90M
    line = ctxt->input->line;
9046
9.90M
    col = ctxt->input->col;
9047
9.90M
    if (*in != '"' && *in != '\'') {
9048
28.3k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9049
28.3k
        return (NULL);
9050
28.3k
    }
9051
9.87M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9052
9053
    /*
9054
     * try to handle in this routine the most common case where no
9055
     * allocation of a new string is required and where content is
9056
     * pure ASCII.
9057
     */
9058
9.87M
    limit = *in++;
9059
9.87M
    col++;
9060
9.87M
    end = ctxt->input->end;
9061
9.87M
    start = in;
9062
9.87M
    if (in >= end) {
9063
993
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9064
993
    }
9065
9.87M
    if (normalize) {
9066
        /*
9067
   * Skip any leading spaces
9068
   */
9069
291k
  while ((in < end) && (*in != limit) &&
9070
291k
         ((*in == 0x20) || (*in == 0x9) ||
9071
287k
          (*in == 0xA) || (*in == 0xD))) {
9072
96.0k
      if (*in == 0xA) {
9073
40.5k
          line++; col = 1;
9074
55.4k
      } else {
9075
55.4k
          col++;
9076
55.4k
      }
9077
96.0k
      in++;
9078
96.0k
      start = in;
9079
96.0k
      if (in >= end) {
9080
264
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9081
264
                if ((in - start) > maxLength) {
9082
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9083
0
                                   "AttValue length too long\n");
9084
0
                    return(NULL);
9085
0
                }
9086
264
      }
9087
96.0k
  }
9088
637k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9089
637k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9090
447k
      col++;
9091
447k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9092
441k
      if (in >= end) {
9093
514
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9094
514
                if ((in - start) > maxLength) {
9095
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9096
0
                                   "AttValue length too long\n");
9097
0
                    return(NULL);
9098
0
                }
9099
514
      }
9100
441k
  }
9101
195k
  last = in;
9102
  /*
9103
   * skip the trailing blanks
9104
   */
9105
204k
  while ((last[-1] == 0x20) && (last > start)) last--;
9106
240k
  while ((in < end) && (*in != limit) &&
9107
240k
         ((*in == 0x20) || (*in == 0x9) ||
9108
97.5k
          (*in == 0xA) || (*in == 0xD))) {
9109
44.8k
      if (*in == 0xA) {
9110
23.0k
          line++, col = 1;
9111
23.0k
      } else {
9112
21.7k
          col++;
9113
21.7k
      }
9114
44.8k
      in++;
9115
44.8k
      if (in >= end) {
9116
273
    const xmlChar *oldbase = ctxt->input->base;
9117
273
    GROW;
9118
273
                if (ctxt->instate == XML_PARSER_EOF)
9119
0
                    return(NULL);
9120
273
    if (oldbase != ctxt->input->base) {
9121
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9122
0
        start = start + delta;
9123
0
        in = in + delta;
9124
0
        last = last + delta;
9125
0
    }
9126
273
    end = ctxt->input->end;
9127
273
                if ((in - start) > maxLength) {
9128
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9129
0
                                   "AttValue length too long\n");
9130
0
                    return(NULL);
9131
0
                }
9132
273
      }
9133
44.8k
  }
9134
195k
        if ((in - start) > maxLength) {
9135
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9136
0
                           "AttValue length too long\n");
9137
0
            return(NULL);
9138
0
        }
9139
195k
  if (*in != limit) goto need_complex;
9140
9.68M
    } else {
9141
190M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9142
190M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9143
180M
      in++;
9144
180M
      col++;
9145
180M
      if (in >= end) {
9146
8.24k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9147
8.24k
                if ((in - start) > maxLength) {
9148
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9149
0
                                   "AttValue length too long\n");
9150
0
                    return(NULL);
9151
0
                }
9152
8.24k
      }
9153
180M
  }
9154
9.68M
  last = in;
9155
9.68M
        if ((in - start) > maxLength) {
9156
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9157
0
                           "AttValue length too long\n");
9158
0
            return(NULL);
9159
0
        }
9160
9.68M
  if (*in != limit) goto need_complex;
9161
9.68M
    }
9162
9.33M
    in++;
9163
9.33M
    col++;
9164
9.33M
    if (len != NULL) {
9165
6.18M
        if (alloc) *alloc = 0;
9166
6.18M
        *len = last - start;
9167
6.18M
        ret = (xmlChar *) start;
9168
6.18M
    } else {
9169
3.15M
        if (alloc) *alloc = 1;
9170
3.15M
        ret = xmlStrndup(start, last - start);
9171
3.15M
    }
9172
9.33M
    CUR_PTR = in;
9173
9.33M
    ctxt->input->line = line;
9174
9.33M
    ctxt->input->col = col;
9175
9.33M
    return ret;
9176
539k
need_complex:
9177
539k
    if (alloc) *alloc = 1;
9178
539k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9179
9.87M
}
9180
9181
/**
9182
 * xmlParseAttribute2:
9183
 * @ctxt:  an XML parser context
9184
 * @pref:  the element prefix
9185
 * @elem:  the element name
9186
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9187
 * @value:  a xmlChar ** used to store the value of the attribute
9188
 * @len:  an int * to save the length of the attribute
9189
 * @alloc:  an int * to indicate if the attribute was allocated
9190
 *
9191
 * parse an attribute in the new SAX2 framework.
9192
 *
9193
 * Returns the attribute name, and the value in *value, .
9194
 */
9195
9196
static const xmlChar *
9197
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9198
                   const xmlChar * pref, const xmlChar * elem,
9199
                   const xmlChar ** prefix, xmlChar ** value,
9200
                   int *len, int *alloc)
9201
6.67M
{
9202
6.67M
    const xmlChar *name;
9203
6.67M
    xmlChar *val, *internal_val = NULL;
9204
6.67M
    int normalize = 0;
9205
9206
6.67M
    *value = NULL;
9207
6.67M
    GROW;
9208
6.67M
    name = xmlParseQName(ctxt, prefix);
9209
6.67M
    if (name == NULL) {
9210
114k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9211
114k
                       "error parsing attribute name\n");
9212
114k
        return (NULL);
9213
114k
    }
9214
9215
    /*
9216
     * get the type if needed
9217
     */
9218
6.56M
    if (ctxt->attsSpecial != NULL) {
9219
850k
        int type;
9220
9221
850k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9222
850k
                                                 pref, elem, *prefix, name);
9223
850k
        if (type != 0)
9224
196k
            normalize = 1;
9225
850k
    }
9226
9227
    /*
9228
     * read the value
9229
     */
9230
6.56M
    SKIP_BLANKS;
9231
6.56M
    if (RAW == '=') {
9232
6.50M
        NEXT;
9233
6.50M
        SKIP_BLANKS;
9234
6.50M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9235
6.50M
  if (normalize) {
9236
      /*
9237
       * Sometimes a second normalisation pass for spaces is needed
9238
       * but that only happens if charrefs or entities references
9239
       * have been used in the attribute value, i.e. the attribute
9240
       * value have been extracted in an allocated string already.
9241
       */
9242
196k
      if (*alloc) {
9243
54.0k
          const xmlChar *val2;
9244
9245
54.0k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9246
54.0k
    if ((val2 != NULL) && (val2 != val)) {
9247
8.93k
        xmlFree(val);
9248
8.93k
        val = (xmlChar *) val2;
9249
8.93k
    }
9250
54.0k
      }
9251
196k
  }
9252
6.50M
        ctxt->instate = XML_PARSER_CONTENT;
9253
6.50M
    } else {
9254
60.0k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9255
60.0k
                          "Specification mandates value for attribute %s\n",
9256
60.0k
                          name);
9257
60.0k
        return (NULL);
9258
60.0k
    }
9259
9260
6.50M
    if (*prefix == ctxt->str_xml) {
9261
        /*
9262
         * Check that xml:lang conforms to the specification
9263
         * No more registered as an error, just generate a warning now
9264
         * since this was deprecated in XML second edition
9265
         */
9266
30.7k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9267
6.00k
            internal_val = xmlStrndup(val, *len);
9268
6.00k
            if (!xmlCheckLanguageID(internal_val)) {
9269
3.63k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9270
3.63k
                              "Malformed value for xml:lang : %s\n",
9271
3.63k
                              internal_val, NULL);
9272
3.63k
            }
9273
6.00k
        }
9274
9275
        /*
9276
         * Check that xml:space conforms to the specification
9277
         */
9278
30.7k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9279
1.33k
            internal_val = xmlStrndup(val, *len);
9280
1.33k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9281
87
                *(ctxt->space) = 0;
9282
1.25k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9283
563
                *(ctxt->space) = 1;
9284
688
            else {
9285
688
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9286
688
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9287
688
                              internal_val, NULL);
9288
688
            }
9289
1.33k
        }
9290
30.7k
        if (internal_val) {
9291
6.95k
            xmlFree(internal_val);
9292
6.95k
        }
9293
30.7k
    }
9294
9295
6.50M
    *value = val;
9296
6.50M
    return (name);
9297
6.56M
}
9298
/**
9299
 * xmlParseStartTag2:
9300
 * @ctxt:  an XML parser context
9301
 *
9302
 * parse a start of tag either for rule element or
9303
 * EmptyElement. In both case we don't parse the tag closing chars.
9304
 * This routine is called when running SAX2 parsing
9305
 *
9306
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9307
 *
9308
 * [ WFC: Unique Att Spec ]
9309
 * No attribute name may appear more than once in the same start-tag or
9310
 * empty-element tag.
9311
 *
9312
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9313
 *
9314
 * [ WFC: Unique Att Spec ]
9315
 * No attribute name may appear more than once in the same start-tag or
9316
 * empty-element tag.
9317
 *
9318
 * With namespace:
9319
 *
9320
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9321
 *
9322
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9323
 *
9324
 * Returns the element name parsed
9325
 */
9326
9327
static const xmlChar *
9328
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9329
8.21M
                  const xmlChar **URI, int *tlen) {
9330
8.21M
    const xmlChar *localname;
9331
8.21M
    const xmlChar *prefix;
9332
8.21M
    const xmlChar *attname;
9333
8.21M
    const xmlChar *aprefix;
9334
8.21M
    const xmlChar *nsname;
9335
8.21M
    xmlChar *attvalue;
9336
8.21M
    const xmlChar **atts = ctxt->atts;
9337
8.21M
    int maxatts = ctxt->maxatts;
9338
8.21M
    int nratts, nbatts, nbdef, inputid;
9339
8.21M
    int i, j, nbNs, attval;
9340
8.21M
    unsigned long cur;
9341
8.21M
    int nsNr = ctxt->nsNr;
9342
9343
8.21M
    if (RAW != '<') return(NULL);
9344
8.21M
    NEXT1;
9345
9346
    /*
9347
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9348
     *       point since the attribute values may be stored as pointers to
9349
     *       the buffer and calling SHRINK would destroy them !
9350
     *       The Shrinking is only possible once the full set of attribute
9351
     *       callbacks have been done.
9352
     */
9353
8.21M
    SHRINK;
9354
8.21M
    cur = ctxt->input->cur - ctxt->input->base;
9355
8.21M
    inputid = ctxt->input->id;
9356
8.21M
    nbatts = 0;
9357
8.21M
    nratts = 0;
9358
8.21M
    nbdef = 0;
9359
8.21M
    nbNs = 0;
9360
8.21M
    attval = 0;
9361
    /* Forget any namespaces added during an earlier parse of this element. */
9362
8.21M
    ctxt->nsNr = nsNr;
9363
9364
8.21M
    localname = xmlParseQName(ctxt, &prefix);
9365
8.21M
    if (localname == NULL) {
9366
210k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9367
210k
           "StartTag: invalid element name\n");
9368
210k
        return(NULL);
9369
210k
    }
9370
8.00M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9371
9372
    /*
9373
     * Now parse the attributes, it ends up with the ending
9374
     *
9375
     * (S Attribute)* S?
9376
     */
9377
8.00M
    SKIP_BLANKS;
9378
8.00M
    GROW;
9379
9380
10.4M
    while (((RAW != '>') &&
9381
10.4M
     ((RAW != '/') || (NXT(1) != '>')) &&
9382
10.4M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9383
6.67M
  int id = ctxt->input->id;
9384
6.67M
  unsigned long cons = CUR_CONSUMED;
9385
6.67M
  int len = -1, alloc = 0;
9386
9387
6.67M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9388
6.67M
                               &aprefix, &attvalue, &len, &alloc);
9389
6.67M
        if ((attname == NULL) || (attvalue == NULL))
9390
184k
            goto next_attr;
9391
6.49M
  if (len < 0) len = xmlStrlen(attvalue);
9392
9393
6.49M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9394
27.1k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9395
27.1k
            xmlURIPtr uri;
9396
9397
27.1k
            if (URL == NULL) {
9398
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9399
0
                if ((attvalue != NULL) && (alloc != 0))
9400
0
                    xmlFree(attvalue);
9401
0
                localname = NULL;
9402
0
                goto done;
9403
0
            }
9404
27.1k
            if (*URL != 0) {
9405
26.3k
                uri = xmlParseURI((const char *) URL);
9406
26.3k
                if (uri == NULL) {
9407
4.33k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9408
4.33k
                             "xmlns: '%s' is not a valid URI\n",
9409
4.33k
                                       URL, NULL, NULL);
9410
21.9k
                } else {
9411
21.9k
                    if (uri->scheme == NULL) {
9412
4.22k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9413
4.22k
                                  "xmlns: URI %s is not absolute\n",
9414
4.22k
                                  URL, NULL, NULL);
9415
4.22k
                    }
9416
21.9k
                    xmlFreeURI(uri);
9417
21.9k
                }
9418
26.3k
                if (URL == ctxt->str_xml_ns) {
9419
0
                    if (attname != ctxt->str_xml) {
9420
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9421
0
                     "xml namespace URI cannot be the default namespace\n",
9422
0
                                 NULL, NULL, NULL);
9423
0
                    }
9424
0
                    goto next_attr;
9425
0
                }
9426
26.3k
                if ((len == 29) &&
9427
26.3k
                    (xmlStrEqual(URL,
9428
303
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9429
4
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9430
4
                         "reuse of the xmlns namespace name is forbidden\n",
9431
4
                             NULL, NULL, NULL);
9432
4
                    goto next_attr;
9433
4
                }
9434
26.3k
            }
9435
            /*
9436
             * check that it's not a defined namespace
9437
             */
9438
37.2k
            for (j = 1;j <= nbNs;j++)
9439
11.5k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9440
1.34k
                    break;
9441
27.1k
            if (j <= nbNs)
9442
1.34k
                xmlErrAttributeDup(ctxt, NULL, attname);
9443
25.7k
            else
9444
25.7k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9445
9446
6.46M
        } else if (aprefix == ctxt->str_xmlns) {
9447
190k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9448
190k
            xmlURIPtr uri;
9449
9450
190k
            if (attname == ctxt->str_xml) {
9451
352
                if (URL != ctxt->str_xml_ns) {
9452
352
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9453
352
                             "xml namespace prefix mapped to wrong URI\n",
9454
352
                             NULL, NULL, NULL);
9455
352
                }
9456
                /*
9457
                 * Do not keep a namespace definition node
9458
                 */
9459
352
                goto next_attr;
9460
352
            }
9461
190k
            if (URL == ctxt->str_xml_ns) {
9462
0
                if (attname != ctxt->str_xml) {
9463
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9464
0
                             "xml namespace URI mapped to wrong prefix\n",
9465
0
                             NULL, NULL, NULL);
9466
0
                }
9467
0
                goto next_attr;
9468
0
            }
9469
190k
            if (attname == ctxt->str_xmlns) {
9470
194
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9471
194
                         "redefinition of the xmlns prefix is forbidden\n",
9472
194
                         NULL, NULL, NULL);
9473
194
                goto next_attr;
9474
194
            }
9475
190k
            if ((len == 29) &&
9476
190k
                (xmlStrEqual(URL,
9477
1.88k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9478
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9479
0
                         "reuse of the xmlns namespace name is forbidden\n",
9480
0
                         NULL, NULL, NULL);
9481
0
                goto next_attr;
9482
0
            }
9483
190k
            if ((URL == NULL) || (URL[0] == 0)) {
9484
807
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9485
807
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9486
807
                              attname, NULL, NULL);
9487
807
                goto next_attr;
9488
189k
            } else {
9489
189k
                uri = xmlParseURI((const char *) URL);
9490
189k
                if (uri == NULL) {
9491
13.1k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9492
13.1k
                         "xmlns:%s: '%s' is not a valid URI\n",
9493
13.1k
                                       attname, URL, NULL);
9494
176k
                } else {
9495
176k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9496
2.02k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9497
2.02k
                                  "xmlns:%s: URI %s is not absolute\n",
9498
2.02k
                                  attname, URL, NULL);
9499
2.02k
                    }
9500
176k
                    xmlFreeURI(uri);
9501
176k
                }
9502
189k
            }
9503
9504
            /*
9505
             * check that it's not a defined namespace
9506
             */
9507
220k
            for (j = 1;j <= nbNs;j++)
9508
32.2k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9509
1.37k
                    break;
9510
189k
            if (j <= nbNs)
9511
1.37k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9512
188k
            else
9513
188k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9514
9515
6.27M
        } else {
9516
            /*
9517
             * Add the pair to atts
9518
             */
9519
6.27M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9520
218k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9521
0
                    goto next_attr;
9522
0
                }
9523
218k
                maxatts = ctxt->maxatts;
9524
218k
                atts = ctxt->atts;
9525
218k
            }
9526
6.27M
            ctxt->attallocs[nratts++] = alloc;
9527
6.27M
            atts[nbatts++] = attname;
9528
6.27M
            atts[nbatts++] = aprefix;
9529
            /*
9530
             * The namespace URI field is used temporarily to point at the
9531
             * base of the current input buffer for non-alloced attributes.
9532
             * When the input buffer is reallocated, all the pointers become
9533
             * invalid, but they can be reconstructed later.
9534
             */
9535
6.27M
            if (alloc)
9536
289k
                atts[nbatts++] = NULL;
9537
5.98M
            else
9538
5.98M
                atts[nbatts++] = ctxt->input->base;
9539
6.27M
            atts[nbatts++] = attvalue;
9540
6.27M
            attvalue += len;
9541
6.27M
            atts[nbatts++] = attvalue;
9542
            /*
9543
             * tag if some deallocation is needed
9544
             */
9545
6.27M
            if (alloc != 0) attval = 1;
9546
6.27M
            attvalue = NULL; /* moved into atts */
9547
6.27M
        }
9548
9549
6.67M
next_attr:
9550
6.67M
        if ((attvalue != NULL) && (alloc != 0)) {
9551
21.2k
            xmlFree(attvalue);
9552
21.2k
            attvalue = NULL;
9553
21.2k
        }
9554
9555
6.67M
  GROW
9556
6.67M
        if (ctxt->instate == XML_PARSER_EOF)
9557
0
            break;
9558
6.67M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9559
3.99M
      break;
9560
2.68M
  if (SKIP_BLANKS == 0) {
9561
283k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9562
283k
         "attributes construct error\n");
9563
283k
      break;
9564
283k
  }
9565
2.40M
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9566
2.40M
            (attname == NULL) && (attvalue == NULL)) {
9567
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9568
0
           "xmlParseStartTag: problem parsing attributes\n");
9569
0
      break;
9570
0
  }
9571
2.40M
        GROW;
9572
2.40M
    }
9573
9574
8.00M
    if (ctxt->input->id != inputid) {
9575
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9576
0
                    "Unexpected change of input\n");
9577
0
        localname = NULL;
9578
0
        goto done;
9579
0
    }
9580
9581
    /* Reconstruct attribute value pointers. */
9582
14.2M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9583
6.27M
        if (atts[i+2] != NULL) {
9584
            /*
9585
             * Arithmetic on dangling pointers is technically undefined
9586
             * behavior, but well...
9587
             */
9588
5.98M
            ptrdiff_t offset = ctxt->input->base - atts[i+2];
9589
5.98M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9590
5.98M
            atts[i+3] += offset;  /* value */
9591
5.98M
            atts[i+4] += offset;  /* valuend */
9592
5.98M
        }
9593
6.27M
    }
9594
9595
    /*
9596
     * The attributes defaulting
9597
     */
9598
8.00M
    if (ctxt->attsDefault != NULL) {
9599
670k
        xmlDefAttrsPtr defaults;
9600
9601
670k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9602
670k
  if (defaults != NULL) {
9603
66.5k
      for (i = 0;i < defaults->nbAttrs;i++) {
9604
40.4k
          attname = defaults->values[5 * i];
9605
40.4k
    aprefix = defaults->values[5 * i + 1];
9606
9607
                /*
9608
     * special work for namespaces defaulted defs
9609
     */
9610
40.4k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9611
        /*
9612
         * check that it's not a defined namespace
9613
         */
9614
2.33k
        for (j = 1;j <= nbNs;j++)
9615
1.23k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9616
608
          break;
9617
1.70k
              if (j <= nbNs) continue;
9618
9619
1.09k
        nsname = xmlGetNamespace(ctxt, NULL);
9620
1.09k
        if (nsname != defaults->values[5 * i + 2]) {
9621
844
      if (nsPush(ctxt, NULL,
9622
844
                 defaults->values[5 * i + 2]) > 0)
9623
829
          nbNs++;
9624
844
        }
9625
38.7k
    } else if (aprefix == ctxt->str_xmlns) {
9626
        /*
9627
         * check that it's not a defined namespace
9628
         */
9629
16.7k
        for (j = 1;j <= nbNs;j++)
9630
14.1k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9631
13.5k
          break;
9632
16.2k
              if (j <= nbNs) continue;
9633
9634
2.68k
        nsname = xmlGetNamespace(ctxt, attname);
9635
2.68k
        if (nsname != defaults->values[2]) {
9636
2.22k
      if (nsPush(ctxt, attname,
9637
2.22k
                 defaults->values[5 * i + 2]) > 0)
9638
2.19k
          nbNs++;
9639
2.22k
        }
9640
22.5k
    } else {
9641
        /*
9642
         * check that it's not a defined attribute
9643
         */
9644
66.8k
        for (j = 0;j < nbatts;j+=5) {
9645
45.3k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9646
1.06k
          break;
9647
45.3k
        }
9648
22.5k
        if (j < nbatts) continue;
9649
9650
21.4k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9651
3.10k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9652
0
                            localname = NULL;
9653
0
                            goto done;
9654
0
      }
9655
3.10k
      maxatts = ctxt->maxatts;
9656
3.10k
      atts = ctxt->atts;
9657
3.10k
        }
9658
21.4k
        atts[nbatts++] = attname;
9659
21.4k
        atts[nbatts++] = aprefix;
9660
21.4k
        if (aprefix == NULL)
9661
18.8k
      atts[nbatts++] = NULL;
9662
2.64k
        else
9663
2.64k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9664
21.4k
        atts[nbatts++] = defaults->values[5 * i + 2];
9665
21.4k
        atts[nbatts++] = defaults->values[5 * i + 3];
9666
21.4k
        if ((ctxt->standalone == 1) &&
9667
21.4k
            (defaults->values[5 * i + 4] != NULL)) {
9668
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9669
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9670
0
                                   attname, localname);
9671
0
        }
9672
21.4k
        nbdef++;
9673
21.4k
    }
9674
40.4k
      }
9675
26.0k
  }
9676
670k
    }
9677
9678
    /*
9679
     * The attributes checkings
9680
     */
9681
14.3M
    for (i = 0; i < nbatts;i += 5) {
9682
        /*
9683
  * The default namespace does not apply to attribute names.
9684
  */
9685
6.29M
  if (atts[i + 1] != NULL) {
9686
564k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9687
564k
      if (nsname == NULL) {
9688
54.0k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9689
54.0k
        "Namespace prefix %s for %s on %s is not defined\n",
9690
54.0k
        atts[i + 1], atts[i], localname);
9691
54.0k
      }
9692
564k
      atts[i + 2] = nsname;
9693
564k
  } else
9694
5.73M
      nsname = NULL;
9695
  /*
9696
   * [ WFC: Unique Att Spec ]
9697
   * No attribute name may appear more than once in the same
9698
   * start-tag or empty-element tag.
9699
   * As extended by the Namespace in XML REC.
9700
   */
9701
8.74M
        for (j = 0; j < i;j += 5) {
9702
2.45M
      if (atts[i] == atts[j]) {
9703
10.3k
          if (atts[i+1] == atts[j+1]) {
9704
2.95k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9705
2.95k
        break;
9706
2.95k
    }
9707
7.38k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9708
152
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9709
152
           "Namespaced Attribute %s in '%s' redefined\n",
9710
152
           atts[i], nsname, NULL);
9711
152
        break;
9712
152
    }
9713
7.38k
      }
9714
2.45M
  }
9715
6.29M
    }
9716
9717
8.00M
    nsname = xmlGetNamespace(ctxt, prefix);
9718
8.00M
    if ((prefix != NULL) && (nsname == NULL)) {
9719
183k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9720
183k
           "Namespace prefix %s on %s is not defined\n",
9721
183k
     prefix, localname, NULL);
9722
183k
    }
9723
8.00M
    *pref = prefix;
9724
8.00M
    *URI = nsname;
9725
9726
    /*
9727
     * SAX: Start of Element !
9728
     */
9729
8.00M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9730
8.00M
  (!ctxt->disableSAX)) {
9731
6.59M
  if (nbNs > 0)
9732
172k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9733
172k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9734
172k
        nbatts / 5, nbdef, atts);
9735
6.42M
  else
9736
6.42M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9737
6.42M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9738
6.59M
    }
9739
9740
8.00M
done:
9741
    /*
9742
     * Free up attribute allocated strings if needed
9743
     */
9744
8.00M
    if (attval != 0) {
9745
597k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9746
325k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9747
289k
          xmlFree((xmlChar *) atts[i]);
9748
271k
    }
9749
9750
8.00M
    return(localname);
9751
8.00M
}
9752
9753
/**
9754
 * xmlParseEndTag2:
9755
 * @ctxt:  an XML parser context
9756
 * @line:  line of the start tag
9757
 * @nsNr:  number of namespaces on the start tag
9758
 *
9759
 * parse an end of tag
9760
 *
9761
 * [42] ETag ::= '</' Name S? '>'
9762
 *
9763
 * With namespace
9764
 *
9765
 * [NS 9] ETag ::= '</' QName S? '>'
9766
 */
9767
9768
static void
9769
2.35M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9770
2.35M
    const xmlChar *name;
9771
9772
2.35M
    GROW;
9773
2.35M
    if ((RAW != '<') || (NXT(1) != '/')) {
9774
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9775
0
  return;
9776
0
    }
9777
2.35M
    SKIP(2);
9778
9779
2.35M
    if (tag->prefix == NULL)
9780
1.89M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9781
462k
    else
9782
462k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9783
9784
    /*
9785
     * We should definitely be at the ending "S? '>'" part
9786
     */
9787
2.35M
    GROW;
9788
2.35M
    if (ctxt->instate == XML_PARSER_EOF)
9789
0
        return;
9790
2.35M
    SKIP_BLANKS;
9791
2.35M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9792
55.4k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9793
55.4k
    } else
9794
2.30M
  NEXT1;
9795
9796
    /*
9797
     * [ WFC: Element Type Match ]
9798
     * The Name in an element's end-tag must match the element type in the
9799
     * start-tag.
9800
     *
9801
     */
9802
2.35M
    if (name != (xmlChar*)1) {
9803
157k
        if (name == NULL) name = BAD_CAST "unparsable";
9804
157k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9805
157k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9806
157k
                    ctxt->name, tag->line, name);
9807
157k
    }
9808
9809
    /*
9810
     * SAX: End of Tag
9811
     */
9812
2.35M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9813
2.35M
  (!ctxt->disableSAX))
9814
1.70M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9815
1.70M
                                tag->URI);
9816
9817
2.35M
    spacePop(ctxt);
9818
2.35M
    if (tag->nsNr != 0)
9819
43.3k
  nsPop(ctxt, tag->nsNr);
9820
2.35M
}
9821
9822
/**
9823
 * xmlParseCDSect:
9824
 * @ctxt:  an XML parser context
9825
 *
9826
 * DEPRECATED: Internal function, don't use.
9827
 *
9828
 * Parse escaped pure raw content.
9829
 *
9830
 * [18] CDSect ::= CDStart CData CDEnd
9831
 *
9832
 * [19] CDStart ::= '<![CDATA['
9833
 *
9834
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9835
 *
9836
 * [21] CDEnd ::= ']]>'
9837
 */
9838
void
9839
29.5k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9840
29.5k
    xmlChar *buf = NULL;
9841
29.5k
    int len = 0;
9842
29.5k
    int size = XML_PARSER_BUFFER_SIZE;
9843
29.5k
    int r, rl;
9844
29.5k
    int s, sl;
9845
29.5k
    int cur, l;
9846
29.5k
    int count = 0;
9847
29.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9848
8.11k
                    XML_MAX_HUGE_LENGTH :
9849
29.5k
                    XML_MAX_TEXT_LENGTH;
9850
9851
    /* Check 2.6.0 was NXT(0) not RAW */
9852
29.5k
    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9853
29.5k
  SKIP(9);
9854
29.5k
    } else
9855
0
        return;
9856
9857
29.5k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9858
29.5k
    r = CUR_CHAR(rl);
9859
29.5k
    if (!IS_CHAR(r)) {
9860
672
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9861
672
  ctxt->instate = XML_PARSER_CONTENT;
9862
672
        return;
9863
672
    }
9864
28.9k
    NEXTL(rl);
9865
28.9k
    s = CUR_CHAR(sl);
9866
28.9k
    if (!IS_CHAR(s)) {
9867
392
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9868
392
  ctxt->instate = XML_PARSER_CONTENT;
9869
392
        return;
9870
392
    }
9871
28.5k
    NEXTL(sl);
9872
28.5k
    cur = CUR_CHAR(l);
9873
28.5k
    buf = (xmlChar *) xmlMallocAtomic(size);
9874
28.5k
    if (buf == NULL) {
9875
0
  xmlErrMemory(ctxt, NULL);
9876
0
  return;
9877
0
    }
9878
15.3M
    while (IS_CHAR(cur) &&
9879
15.3M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9880
15.3M
  if (len + 5 >= size) {
9881
55.6k
      xmlChar *tmp;
9882
9883
55.6k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9884
55.6k
      if (tmp == NULL) {
9885
0
          xmlFree(buf);
9886
0
    xmlErrMemory(ctxt, NULL);
9887
0
    return;
9888
0
      }
9889
55.6k
      buf = tmp;
9890
55.6k
      size *= 2;
9891
55.6k
  }
9892
15.3M
  COPY_BUF(rl,buf,len,r);
9893
15.3M
  r = s;
9894
15.3M
  rl = sl;
9895
15.3M
  s = cur;
9896
15.3M
  sl = l;
9897
15.3M
  count++;
9898
15.3M
  if (count > 50) {
9899
288k
      SHRINK;
9900
288k
      GROW;
9901
288k
            if (ctxt->instate == XML_PARSER_EOF) {
9902
0
    xmlFree(buf);
9903
0
    return;
9904
0
            }
9905
288k
      count = 0;
9906
288k
  }
9907
15.3M
  NEXTL(l);
9908
15.3M
  cur = CUR_CHAR(l);
9909
15.3M
        if (len > maxLength) {
9910
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9911
0
                           "CData section too big found\n");
9912
0
            xmlFree(buf);
9913
0
            return;
9914
0
        }
9915
15.3M
    }
9916
28.5k
    buf[len] = 0;
9917
28.5k
    ctxt->instate = XML_PARSER_CONTENT;
9918
28.5k
    if (cur != '>') {
9919
8.10k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9920
8.10k
                       "CData section not finished\n%.50s\n", buf);
9921
8.10k
  xmlFree(buf);
9922
8.10k
        return;
9923
8.10k
    }
9924
20.4k
    NEXTL(l);
9925
9926
    /*
9927
     * OK the buffer is to be consumed as cdata.
9928
     */
9929
20.4k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9930
12.5k
  if (ctxt->sax->cdataBlock != NULL)
9931
8.30k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9932
4.23k
  else if (ctxt->sax->characters != NULL)
9933
4.23k
      ctxt->sax->characters(ctxt->userData, buf, len);
9934
12.5k
    }
9935
20.4k
    xmlFree(buf);
9936
20.4k
}
9937
9938
/**
9939
 * xmlParseContentInternal:
9940
 * @ctxt:  an XML parser context
9941
 *
9942
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9943
 * unexpected EOF to the caller.
9944
 */
9945
9946
static void
9947
3.47M
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9948
3.47M
    int nameNr = ctxt->nameNr;
9949
9950
3.47M
    GROW;
9951
25.5M
    while ((RAW != 0) &&
9952
25.5M
     (ctxt->instate != XML_PARSER_EOF)) {
9953
22.1M
        int id = ctxt->input->id;
9954
22.1M
  unsigned long cons = CUR_CONSUMED;
9955
22.1M
  const xmlChar *cur = ctxt->input->cur;
9956
9957
  /*
9958
   * First case : a Processing Instruction.
9959
   */
9960
22.1M
  if ((*cur == '<') && (cur[1] == '?')) {
9961
36.0k
      xmlParsePI(ctxt);
9962
36.0k
  }
9963
9964
  /*
9965
   * Second case : a CDSection
9966
   */
9967
  /* 2.6.0 test was *cur not RAW */
9968
22.1M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9969
29.5k
      xmlParseCDSect(ctxt);
9970
29.5k
  }
9971
9972
  /*
9973
   * Third case :  a comment
9974
   */
9975
22.1M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9976
22.1M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9977
101k
      xmlParseComment(ctxt);
9978
101k
      ctxt->instate = XML_PARSER_CONTENT;
9979
101k
  }
9980
9981
  /*
9982
   * Fourth case :  a sub-element.
9983
   */
9984
22.0M
  else if (*cur == '<') {
9985
10.4M
            if (NXT(1) == '/') {
9986
2.23M
                if (ctxt->nameNr <= nameNr)
9987
76.5k
                    break;
9988
2.15M
          xmlParseElementEnd(ctxt);
9989
8.19M
            } else {
9990
8.19M
          xmlParseElementStart(ctxt);
9991
8.19M
            }
9992
10.4M
  }
9993
9994
  /*
9995
   * Fifth case : a reference. If if has not been resolved,
9996
   *    parsing returns it's Name, create the node
9997
   */
9998
9999
11.5M
  else if (*cur == '&') {
10000
3.61M
      xmlParseReference(ctxt);
10001
3.61M
  }
10002
10003
  /*
10004
   * Last case, text. Note that References are handled directly.
10005
   */
10006
7.97M
  else {
10007
7.97M
      xmlParseCharData(ctxt, 0);
10008
7.97M
  }
10009
10010
22.1M
  GROW;
10011
22.1M
  SHRINK;
10012
10013
22.1M
  if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
10014
40.6k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10015
40.6k
                  "detected an error in element content\n");
10016
40.6k
      xmlHaltParser(ctxt);
10017
40.6k
            break;
10018
40.6k
  }
10019
22.1M
    }
10020
3.47M
}
10021
10022
/**
10023
 * xmlParseContent:
10024
 * @ctxt:  an XML parser context
10025
 *
10026
 * Parse a content sequence. Stops at EOF or '</'.
10027
 *
10028
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10029
 */
10030
10031
void
10032
3.27M
xmlParseContent(xmlParserCtxtPtr ctxt) {
10033
3.27M
    int nameNr = ctxt->nameNr;
10034
10035
3.27M
    xmlParseContentInternal(ctxt);
10036
10037
3.27M
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10038
1.61k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10039
1.61k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10040
1.61k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10041
1.61k
                "Premature end of data in tag %s line %d\n",
10042
1.61k
    name, line, NULL);
10043
1.61k
    }
10044
3.27M
}
10045
10046
/**
10047
 * xmlParseElement:
10048
 * @ctxt:  an XML parser context
10049
 *
10050
 * DEPRECATED: Internal function, don't use.
10051
 *
10052
 * parse an XML element
10053
 *
10054
 * [39] element ::= EmptyElemTag | STag content ETag
10055
 *
10056
 * [ WFC: Element Type Match ]
10057
 * The Name in an element's end-tag must match the element type in the
10058
 * start-tag.
10059
 *
10060
 */
10061
10062
void
10063
284k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10064
284k
    if (xmlParseElementStart(ctxt) != 0)
10065
84.3k
        return;
10066
10067
199k
    xmlParseContentInternal(ctxt);
10068
199k
    if (ctxt->instate == XML_PARSER_EOF)
10069
3.02k
  return;
10070
10071
196k
    if (CUR == 0) {
10072
120k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10073
120k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10074
120k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10075
120k
                "Premature end of data in tag %s line %d\n",
10076
120k
    name, line, NULL);
10077
120k
        return;
10078
120k
    }
10079
10080
75.7k
    xmlParseElementEnd(ctxt);
10081
75.7k
}
10082
10083
/**
10084
 * xmlParseElementStart:
10085
 * @ctxt:  an XML parser context
10086
 *
10087
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10088
 * opening tag was parsed, 1 if an empty element was parsed.
10089
 */
10090
static int
10091
8.48M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10092
8.48M
    const xmlChar *name;
10093
8.48M
    const xmlChar *prefix = NULL;
10094
8.48M
    const xmlChar *URI = NULL;
10095
8.48M
    xmlParserNodeInfo node_info;
10096
8.48M
    int line, tlen = 0;
10097
8.48M
    xmlNodePtr ret;
10098
8.48M
    int nsNr = ctxt->nsNr;
10099
10100
8.48M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10101
8.48M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10102
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10103
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10104
0
        xmlParserMaxDepth);
10105
0
  xmlHaltParser(ctxt);
10106
0
  return(-1);
10107
0
    }
10108
10109
    /* Capture start position */
10110
8.48M
    if (ctxt->record_info) {
10111
0
        node_info.begin_pos = ctxt->input->consumed +
10112
0
                          (CUR_PTR - ctxt->input->base);
10113
0
  node_info.begin_line = ctxt->input->line;
10114
0
    }
10115
10116
8.48M
    if (ctxt->spaceNr == 0)
10117
0
  spacePush(ctxt, -1);
10118
8.48M
    else if (*ctxt->space == -2)
10119
770k
  spacePush(ctxt, -1);
10120
7.71M
    else
10121
7.71M
  spacePush(ctxt, *ctxt->space);
10122
10123
8.48M
    line = ctxt->input->line;
10124
8.48M
#ifdef LIBXML_SAX1_ENABLED
10125
8.48M
    if (ctxt->sax2)
10126
5.23M
#endif /* LIBXML_SAX1_ENABLED */
10127
5.23M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10128
3.25M
#ifdef LIBXML_SAX1_ENABLED
10129
3.25M
    else
10130
3.25M
  name = xmlParseStartTag(ctxt);
10131
8.48M
#endif /* LIBXML_SAX1_ENABLED */
10132
8.48M
    if (ctxt->instate == XML_PARSER_EOF)
10133
10
  return(-1);
10134
8.48M
    if (name == NULL) {
10135
263k
  spacePop(ctxt);
10136
263k
        return(-1);
10137
263k
    }
10138
8.21M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10139
8.21M
    ret = ctxt->node;
10140
10141
8.21M
#ifdef LIBXML_VALID_ENABLED
10142
    /*
10143
     * [ VC: Root Element Type ]
10144
     * The Name in the document type declaration must match the element
10145
     * type of the root element.
10146
     */
10147
8.21M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10148
8.21M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10149
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10150
8.21M
#endif /* LIBXML_VALID_ENABLED */
10151
10152
    /*
10153
     * Check for an Empty Element.
10154
     */
10155
8.21M
    if ((RAW == '/') && (NXT(1) == '>')) {
10156
2.31M
        SKIP(2);
10157
2.31M
  if (ctxt->sax2) {
10158
1.61M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10159
1.61M
    (!ctxt->disableSAX))
10160
924k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10161
1.61M
#ifdef LIBXML_SAX1_ENABLED
10162
1.61M
  } else {
10163
707k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10164
707k
    (!ctxt->disableSAX))
10165
562k
    ctxt->sax->endElement(ctxt->userData, name);
10166
707k
#endif /* LIBXML_SAX1_ENABLED */
10167
707k
  }
10168
2.31M
  namePop(ctxt);
10169
2.31M
  spacePop(ctxt);
10170
2.31M
  if (nsNr != ctxt->nsNr)
10171
8.00k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10172
2.31M
  if ( ret != NULL && ctxt->record_info ) {
10173
0
     node_info.end_pos = ctxt->input->consumed +
10174
0
            (CUR_PTR - ctxt->input->base);
10175
0
     node_info.end_line = ctxt->input->line;
10176
0
     node_info.node = ret;
10177
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10178
0
  }
10179
2.31M
  return(1);
10180
2.31M
    }
10181
5.89M
    if (RAW == '>') {
10182
5.64M
        NEXT1;
10183
5.64M
    } else {
10184
259k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10185
259k
         "Couldn't find end of Start Tag %s line %d\n",
10186
259k
                    name, line, NULL);
10187
10188
  /*
10189
   * end of parsing of this node.
10190
   */
10191
259k
  nodePop(ctxt);
10192
259k
  namePop(ctxt);
10193
259k
  spacePop(ctxt);
10194
259k
  if (nsNr != ctxt->nsNr)
10195
8.79k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10196
10197
  /*
10198
   * Capture end position and add node
10199
   */
10200
259k
  if ( ret != NULL && ctxt->record_info ) {
10201
0
     node_info.end_pos = ctxt->input->consumed +
10202
0
            (CUR_PTR - ctxt->input->base);
10203
0
     node_info.end_line = ctxt->input->line;
10204
0
     node_info.node = ret;
10205
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10206
0
  }
10207
259k
  return(-1);
10208
259k
    }
10209
10210
5.64M
    return(0);
10211
5.89M
}
10212
10213
/**
10214
 * xmlParseElementEnd:
10215
 * @ctxt:  an XML parser context
10216
 *
10217
 * Parse the end of an XML element.
10218
 */
10219
static void
10220
2.23M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10221
2.23M
    xmlParserNodeInfo node_info;
10222
2.23M
    xmlNodePtr ret = ctxt->node;
10223
10224
2.23M
    if (ctxt->nameNr <= 0)
10225
0
        return;
10226
10227
    /*
10228
     * parse the end of tag: '</' should be here.
10229
     */
10230
2.23M
    if (ctxt->sax2) {
10231
1.31M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10232
1.31M
  namePop(ctxt);
10233
1.31M
    }
10234
914k
#ifdef LIBXML_SAX1_ENABLED
10235
914k
    else
10236
914k
  xmlParseEndTag1(ctxt, 0);
10237
2.23M
#endif /* LIBXML_SAX1_ENABLED */
10238
10239
    /*
10240
     * Capture end position and add node
10241
     */
10242
2.23M
    if ( ret != NULL && ctxt->record_info ) {
10243
0
       node_info.end_pos = ctxt->input->consumed +
10244
0
                          (CUR_PTR - ctxt->input->base);
10245
0
       node_info.end_line = ctxt->input->line;
10246
0
       node_info.node = ret;
10247
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10248
0
    }
10249
2.23M
}
10250
10251
/**
10252
 * xmlParseVersionNum:
10253
 * @ctxt:  an XML parser context
10254
 *
10255
 * DEPRECATED: Internal function, don't use.
10256
 *
10257
 * parse the XML version value.
10258
 *
10259
 * [26] VersionNum ::= '1.' [0-9]+
10260
 *
10261
 * In practice allow [0-9].[0-9]+ at that level
10262
 *
10263
 * Returns the string giving the XML version number, or NULL
10264
 */
10265
xmlChar *
10266
373k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10267
373k
    xmlChar *buf = NULL;
10268
373k
    int len = 0;
10269
373k
    int size = 10;
10270
373k
    xmlChar cur;
10271
10272
373k
    buf = (xmlChar *) xmlMallocAtomic(size);
10273
373k
    if (buf == NULL) {
10274
0
  xmlErrMemory(ctxt, NULL);
10275
0
  return(NULL);
10276
0
    }
10277
373k
    cur = CUR;
10278
373k
    if (!((cur >= '0') && (cur <= '9'))) {
10279
5.12k
  xmlFree(buf);
10280
5.12k
  return(NULL);
10281
5.12k
    }
10282
368k
    buf[len++] = cur;
10283
368k
    NEXT;
10284
368k
    cur=CUR;
10285
368k
    if (cur != '.') {
10286
4.59k
  xmlFree(buf);
10287
4.59k
  return(NULL);
10288
4.59k
    }
10289
363k
    buf[len++] = cur;
10290
363k
    NEXT;
10291
363k
    cur=CUR;
10292
798k
    while ((cur >= '0') && (cur <= '9')) {
10293
435k
  if (len + 1 >= size) {
10294
1.08k
      xmlChar *tmp;
10295
10296
1.08k
      size *= 2;
10297
1.08k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10298
1.08k
      if (tmp == NULL) {
10299
0
          xmlFree(buf);
10300
0
    xmlErrMemory(ctxt, NULL);
10301
0
    return(NULL);
10302
0
      }
10303
1.08k
      buf = tmp;
10304
1.08k
  }
10305
435k
  buf[len++] = cur;
10306
435k
  NEXT;
10307
435k
  cur=CUR;
10308
435k
    }
10309
363k
    buf[len] = 0;
10310
363k
    return(buf);
10311
363k
}
10312
10313
/**
10314
 * xmlParseVersionInfo:
10315
 * @ctxt:  an XML parser context
10316
 *
10317
 * DEPRECATED: Internal function, don't use.
10318
 *
10319
 * parse the XML version.
10320
 *
10321
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10322
 *
10323
 * [25] Eq ::= S? '=' S?
10324
 *
10325
 * Returns the version string, e.g. "1.0"
10326
 */
10327
10328
xmlChar *
10329
430k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10330
430k
    xmlChar *version = NULL;
10331
10332
430k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10333
383k
  SKIP(7);
10334
383k
  SKIP_BLANKS;
10335
383k
  if (RAW != '=') {
10336
5.28k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10337
5.28k
      return(NULL);
10338
5.28k
        }
10339
378k
  NEXT;
10340
378k
  SKIP_BLANKS;
10341
378k
  if (RAW == '"') {
10342
329k
      NEXT;
10343
329k
      version = xmlParseVersionNum(ctxt);
10344
329k
      if (RAW != '"') {
10345
17.8k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10346
17.8k
      } else
10347
311k
          NEXT;
10348
329k
  } else if (RAW == '\''){
10349
43.5k
      NEXT;
10350
43.5k
      version = xmlParseVersionNum(ctxt);
10351
43.5k
      if (RAW != '\'') {
10352
1.54k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10353
1.54k
      } else
10354
41.9k
          NEXT;
10355
43.5k
  } else {
10356
5.44k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10357
5.44k
  }
10358
378k
    }
10359
424k
    return(version);
10360
430k
}
10361
10362
/**
10363
 * xmlParseEncName:
10364
 * @ctxt:  an XML parser context
10365
 *
10366
 * DEPRECATED: Internal function, don't use.
10367
 *
10368
 * parse the XML encoding name
10369
 *
10370
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10371
 *
10372
 * Returns the encoding name value or NULL
10373
 */
10374
xmlChar *
10375
162k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10376
162k
    xmlChar *buf = NULL;
10377
162k
    int len = 0;
10378
162k
    int size = 10;
10379
162k
    xmlChar cur;
10380
10381
162k
    cur = CUR;
10382
162k
    if (((cur >= 'a') && (cur <= 'z')) ||
10383
162k
        ((cur >= 'A') && (cur <= 'Z'))) {
10384
162k
  buf = (xmlChar *) xmlMallocAtomic(size);
10385
162k
  if (buf == NULL) {
10386
0
      xmlErrMemory(ctxt, NULL);
10387
0
      return(NULL);
10388
0
  }
10389
10390
162k
  buf[len++] = cur;
10391
162k
  NEXT;
10392
162k
  cur = CUR;
10393
1.48M
  while (((cur >= 'a') && (cur <= 'z')) ||
10394
1.48M
         ((cur >= 'A') && (cur <= 'Z')) ||
10395
1.48M
         ((cur >= '0') && (cur <= '9')) ||
10396
1.48M
         (cur == '.') || (cur == '_') ||
10397
1.48M
         (cur == '-')) {
10398
1.31M
      if (len + 1 >= size) {
10399
63.4k
          xmlChar *tmp;
10400
10401
63.4k
    size *= 2;
10402
63.4k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10403
63.4k
    if (tmp == NULL) {
10404
0
        xmlErrMemory(ctxt, NULL);
10405
0
        xmlFree(buf);
10406
0
        return(NULL);
10407
0
    }
10408
63.4k
    buf = tmp;
10409
63.4k
      }
10410
1.31M
      buf[len++] = cur;
10411
1.31M
      NEXT;
10412
1.31M
      cur = CUR;
10413
1.31M
      if (cur == 0) {
10414
948
          SHRINK;
10415
948
    GROW;
10416
948
    cur = CUR;
10417
948
      }
10418
1.31M
        }
10419
162k
  buf[len] = 0;
10420
162k
    } else {
10421
594
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10422
594
    }
10423
162k
    return(buf);
10424
162k
}
10425
10426
/**
10427
 * xmlParseEncodingDecl:
10428
 * @ctxt:  an XML parser context
10429
 *
10430
 * DEPRECATED: Internal function, don't use.
10431
 *
10432
 * parse the XML encoding declaration
10433
 *
10434
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10435
 *
10436
 * this setups the conversion filters.
10437
 *
10438
 * Returns the encoding value or NULL
10439
 */
10440
10441
const xmlChar *
10442
297k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10443
297k
    xmlChar *encoding = NULL;
10444
10445
297k
    SKIP_BLANKS;
10446
297k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10447
165k
  SKIP(8);
10448
165k
  SKIP_BLANKS;
10449
165k
  if (RAW != '=') {
10450
1.64k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10451
1.64k
      return(NULL);
10452
1.64k
        }
10453
164k
  NEXT;
10454
164k
  SKIP_BLANKS;
10455
164k
  if (RAW == '"') {
10456
136k
      NEXT;
10457
136k
      encoding = xmlParseEncName(ctxt);
10458
136k
      if (RAW != '"') {
10459
6.42k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10460
6.42k
    xmlFree((xmlChar *) encoding);
10461
6.42k
    return(NULL);
10462
6.42k
      } else
10463
130k
          NEXT;
10464
136k
  } else if (RAW == '\''){
10465
26.0k
      NEXT;
10466
26.0k
      encoding = xmlParseEncName(ctxt);
10467
26.0k
      if (RAW != '\'') {
10468
981
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10469
981
    xmlFree((xmlChar *) encoding);
10470
981
    return(NULL);
10471
981
      } else
10472
25.0k
          NEXT;
10473
26.0k
  } else {
10474
1.53k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10475
1.53k
  }
10476
10477
        /*
10478
         * Non standard parsing, allowing the user to ignore encoding
10479
         */
10480
156k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10481
48.8k
      xmlFree((xmlChar *) encoding);
10482
48.8k
            return(NULL);
10483
48.8k
  }
10484
10485
  /*
10486
   * UTF-16 encoding switch has already taken place at this stage,
10487
   * more over the little-endian/big-endian selection is already done
10488
   */
10489
107k
        if ((encoding != NULL) &&
10490
107k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10491
106k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10492
      /*
10493
       * If no encoding was passed to the parser, that we are
10494
       * using UTF-16 and no decoder is present i.e. the
10495
       * document is apparently UTF-8 compatible, then raise an
10496
       * encoding mismatch fatal error
10497
       */
10498
426
      if ((ctxt->encoding == NULL) &&
10499
426
          (ctxt->input->buf != NULL) &&
10500
426
          (ctxt->input->buf->encoder == NULL)) {
10501
426
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10502
426
      "Document labelled UTF-16 but has UTF-8 content\n");
10503
426
      }
10504
426
      if (ctxt->encoding != NULL)
10505
0
    xmlFree((xmlChar *) ctxt->encoding);
10506
426
      ctxt->encoding = encoding;
10507
426
  }
10508
  /*
10509
   * UTF-8 encoding is handled natively
10510
   */
10511
107k
        else if ((encoding != NULL) &&
10512
107k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10513
106k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10514
54.5k
      if (ctxt->encoding != NULL)
10515
0
    xmlFree((xmlChar *) ctxt->encoding);
10516
54.5k
      ctxt->encoding = encoding;
10517
54.5k
  }
10518
52.9k
  else if (encoding != NULL) {
10519
51.8k
      xmlCharEncodingHandlerPtr handler;
10520
10521
51.8k
      if (ctxt->input->encoding != NULL)
10522
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10523
51.8k
      ctxt->input->encoding = encoding;
10524
10525
51.8k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10526
51.8k
      if (handler != NULL) {
10527
50.5k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10528
        /* failed to convert */
10529
194
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10530
194
        return(NULL);
10531
194
    }
10532
50.5k
      } else {
10533
1.36k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10534
1.36k
      "Unsupported encoding %s\n", encoding);
10535
1.36k
    return(NULL);
10536
1.36k
      }
10537
51.8k
  }
10538
107k
    }
10539
237k
    return(encoding);
10540
297k
}
10541
10542
/**
10543
 * xmlParseSDDecl:
10544
 * @ctxt:  an XML parser context
10545
 *
10546
 * DEPRECATED: Internal function, don't use.
10547
 *
10548
 * parse the XML standalone declaration
10549
 *
10550
 * [32] SDDecl ::= S 'standalone' Eq
10551
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10552
 *
10553
 * [ VC: Standalone Document Declaration ]
10554
 * TODO The standalone document declaration must have the value "no"
10555
 * if any external markup declarations contain declarations of:
10556
 *  - attributes with default values, if elements to which these
10557
 *    attributes apply appear in the document without specifications
10558
 *    of values for these attributes, or
10559
 *  - entities (other than amp, lt, gt, apos, quot), if references
10560
 *    to those entities appear in the document, or
10561
 *  - attributes with values subject to normalization, where the
10562
 *    attribute appears in the document with a value which will change
10563
 *    as a result of normalization, or
10564
 *  - element types with element content, if white space occurs directly
10565
 *    within any instance of those types.
10566
 *
10567
 * Returns:
10568
 *   1 if standalone="yes"
10569
 *   0 if standalone="no"
10570
 *  -2 if standalone attribute is missing or invalid
10571
 *    (A standalone value of -2 means that the XML declaration was found,
10572
 *     but no value was specified for the standalone attribute).
10573
 */
10574
10575
int
10576
249k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10577
249k
    int standalone = -2;
10578
10579
249k
    SKIP_BLANKS;
10580
249k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10581
49.9k
  SKIP(10);
10582
49.9k
        SKIP_BLANKS;
10583
49.9k
  if (RAW != '=') {
10584
533
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10585
533
      return(standalone);
10586
533
        }
10587
49.4k
  NEXT;
10588
49.4k
  SKIP_BLANKS;
10589
49.4k
        if (RAW == '\''){
10590
22.9k
      NEXT;
10591
22.9k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10592
18.8k
          standalone = 0;
10593
18.8k
                SKIP(2);
10594
18.8k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10595
4.12k
                 (NXT(2) == 's')) {
10596
3.47k
          standalone = 1;
10597
3.47k
    SKIP(3);
10598
3.47k
            } else {
10599
647
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10600
647
      }
10601
22.9k
      if (RAW != '\'') {
10602
1.05k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10603
1.05k
      } else
10604
21.9k
          NEXT;
10605
26.4k
  } else if (RAW == '"'){
10606
25.7k
      NEXT;
10607
25.7k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10608
11.6k
          standalone = 0;
10609
11.6k
    SKIP(2);
10610
14.1k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10611
14.1k
                 (NXT(2) == 's')) {
10612
12.9k
          standalone = 1;
10613
12.9k
                SKIP(3);
10614
12.9k
            } else {
10615
1.20k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10616
1.20k
      }
10617
25.7k
      if (RAW != '"') {
10618
1.76k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10619
1.76k
      } else
10620
24.0k
          NEXT;
10621
25.7k
  } else {
10622
695
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10623
695
        }
10624
49.4k
    }
10625
248k
    return(standalone);
10626
249k
}
10627
10628
/**
10629
 * xmlParseXMLDecl:
10630
 * @ctxt:  an XML parser context
10631
 *
10632
 * DEPRECATED: Internal function, don't use.
10633
 *
10634
 * parse an XML declaration header
10635
 *
10636
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10637
 */
10638
10639
void
10640
414k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10641
414k
    xmlChar *version;
10642
10643
    /*
10644
     * This value for standalone indicates that the document has an
10645
     * XML declaration but it does not have a standalone attribute.
10646
     * It will be overwritten later if a standalone attribute is found.
10647
     */
10648
414k
    ctxt->input->standalone = -2;
10649
10650
    /*
10651
     * We know that '<?xml' is here.
10652
     */
10653
414k
    SKIP(5);
10654
10655
414k
    if (!IS_BLANK_CH(RAW)) {
10656
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10657
0
                 "Blank needed after '<?xml'\n");
10658
0
    }
10659
414k
    SKIP_BLANKS;
10660
10661
    /*
10662
     * We must have the VersionInfo here.
10663
     */
10664
414k
    version = xmlParseVersionInfo(ctxt);
10665
414k
    if (version == NULL) {
10666
62.6k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10667
352k
    } else {
10668
352k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10669
      /*
10670
       * Changed here for XML-1.0 5th edition
10671
       */
10672
7.16k
      if (ctxt->options & XML_PARSE_OLD10) {
10673
2.00k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10674
2.00k
                "Unsupported version '%s'\n",
10675
2.00k
                version);
10676
5.15k
      } else {
10677
5.15k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10678
4.53k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10679
4.53k
                      "Unsupported version '%s'\n",
10680
4.53k
          version, NULL);
10681
4.53k
    } else {
10682
621
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10683
621
              "Unsupported version '%s'\n",
10684
621
              version);
10685
621
    }
10686
5.15k
      }
10687
7.16k
  }
10688
352k
  if (ctxt->version != NULL)
10689
0
      xmlFree((void *) ctxt->version);
10690
352k
  ctxt->version = version;
10691
352k
    }
10692
10693
    /*
10694
     * We may have the encoding declaration
10695
     */
10696
414k
    if (!IS_BLANK_CH(RAW)) {
10697
205k
        if ((RAW == '?') && (NXT(1) == '>')) {
10698
132k
      SKIP(2);
10699
132k
      return;
10700
132k
  }
10701
72.7k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10702
72.7k
    }
10703
281k
    xmlParseEncodingDecl(ctxt);
10704
281k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10705
281k
         (ctxt->instate == XML_PARSER_EOF)) {
10706
  /*
10707
   * The XML REC instructs us to stop parsing right here
10708
   */
10709
1.27k
        return;
10710
1.27k
    }
10711
10712
    /*
10713
     * We may have the standalone status.
10714
     */
10715
280k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10716
32.9k
        if ((RAW == '?') && (NXT(1) == '>')) {
10717
31.3k
      SKIP(2);
10718
31.3k
      return;
10719
31.3k
  }
10720
1.58k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10721
1.58k
    }
10722
10723
    /*
10724
     * We can grow the input buffer freely at that point
10725
     */
10726
249k
    GROW;
10727
10728
249k
    SKIP_BLANKS;
10729
249k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10730
10731
249k
    SKIP_BLANKS;
10732
249k
    if ((RAW == '?') && (NXT(1) == '>')) {
10733
140k
        SKIP(2);
10734
140k
    } else if (RAW == '>') {
10735
        /* Deprecated old WD ... */
10736
1.10k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10737
1.10k
  NEXT;
10738
108k
    } else {
10739
108k
        int c;
10740
10741
108k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10742
4.43M
        while ((c = CUR) != 0) {
10743
4.42M
            NEXT;
10744
4.42M
            if (c == '>')
10745
96.6k
                break;
10746
4.42M
        }
10747
108k
    }
10748
249k
}
10749
10750
/**
10751
 * xmlParseMisc:
10752
 * @ctxt:  an XML parser context
10753
 *
10754
 * DEPRECATED: Internal function, don't use.
10755
 *
10756
 * parse an XML Misc* optional field.
10757
 *
10758
 * [27] Misc ::= Comment | PI |  S
10759
 */
10760
10761
void
10762
806k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10763
867k
    while (ctxt->instate != XML_PARSER_EOF) {
10764
867k
        SKIP_BLANKS;
10765
867k
        GROW;
10766
867k
        if ((RAW == '<') && (NXT(1) == '?')) {
10767
32.3k
      xmlParsePI(ctxt);
10768
834k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10769
28.0k
      xmlParseComment(ctxt);
10770
806k
        } else {
10771
806k
            break;
10772
806k
        }
10773
867k
    }
10774
806k
}
10775
10776
/**
10777
 * xmlParseDocument:
10778
 * @ctxt:  an XML parser context
10779
 *
10780
 * parse an XML document (and build a tree if using the standard SAX
10781
 * interface).
10782
 *
10783
 * [1] document ::= prolog element Misc*
10784
 *
10785
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10786
 *
10787
 * Returns 0, -1 in case of error. the parser context is augmented
10788
 *                as a result of the parsing.
10789
 */
10790
10791
int
10792
377k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10793
377k
    xmlChar start[4];
10794
377k
    xmlCharEncoding enc;
10795
10796
377k
    xmlInitParser();
10797
10798
377k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10799
0
        return(-1);
10800
10801
377k
    GROW;
10802
10803
    /*
10804
     * SAX: detecting the level.
10805
     */
10806
377k
    xmlDetectSAX2(ctxt);
10807
10808
    /*
10809
     * SAX: beginning of the document processing.
10810
     */
10811
377k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10812
377k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10813
377k
    if (ctxt->instate == XML_PARSER_EOF)
10814
0
  return(-1);
10815
10816
377k
    if ((ctxt->encoding == NULL) &&
10817
377k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10818
  /*
10819
   * Get the 4 first bytes and decode the charset
10820
   * if enc != XML_CHAR_ENCODING_NONE
10821
   * plug some encoding conversion routines.
10822
   */
10823
365k
  start[0] = RAW;
10824
365k
  start[1] = NXT(1);
10825
365k
  start[2] = NXT(2);
10826
365k
  start[3] = NXT(3);
10827
365k
  enc = xmlDetectCharEncoding(&start[0], 4);
10828
365k
  if (enc != XML_CHAR_ENCODING_NONE) {
10829
155k
      xmlSwitchEncoding(ctxt, enc);
10830
155k
  }
10831
365k
    }
10832
10833
10834
377k
    if (CUR == 0) {
10835
3.28k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10836
3.28k
  return(-1);
10837
3.28k
    }
10838
10839
    /*
10840
     * Check for the XMLDecl in the Prolog.
10841
     * do not GROW here to avoid the detected encoder to decode more
10842
     * than just the first line, unless the amount of data is really
10843
     * too small to hold "<?xml version="1.0" encoding="foo"
10844
     */
10845
374k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10846
33.0k
       GROW;
10847
33.0k
    }
10848
374k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10849
10850
  /*
10851
   * Note that we will switch encoding on the fly.
10852
   */
10853
142k
  xmlParseXMLDecl(ctxt);
10854
142k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10855
142k
      (ctxt->instate == XML_PARSER_EOF)) {
10856
      /*
10857
       * The XML REC instructs us to stop parsing right here
10858
       */
10859
485
      return(-1);
10860
485
  }
10861
141k
  ctxt->standalone = ctxt->input->standalone;
10862
141k
  SKIP_BLANKS;
10863
231k
    } else {
10864
231k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10865
231k
    }
10866
373k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10867
353k
        ctxt->sax->startDocument(ctxt->userData);
10868
373k
    if (ctxt->instate == XML_PARSER_EOF)
10869
0
  return(-1);
10870
373k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10871
373k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10872
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10873
0
    }
10874
10875
    /*
10876
     * The Misc part of the Prolog
10877
     */
10878
373k
    xmlParseMisc(ctxt);
10879
10880
    /*
10881
     * Then possibly doc type declaration(s) and more Misc
10882
     * (doctypedecl Misc*)?
10883
     */
10884
373k
    GROW;
10885
373k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10886
10887
162k
  ctxt->inSubset = 1;
10888
162k
  xmlParseDocTypeDecl(ctxt);
10889
162k
  if (RAW == '[') {
10890
119k
      ctxt->instate = XML_PARSER_DTD;
10891
119k
      xmlParseInternalSubset(ctxt);
10892
119k
      if (ctxt->instate == XML_PARSER_EOF)
10893
7.86k
    return(-1);
10894
119k
  }
10895
10896
  /*
10897
   * Create and update the external subset.
10898
   */
10899
154k
  ctxt->inSubset = 2;
10900
154k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10901
154k
      (!ctxt->disableSAX))
10902
113k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10903
113k
                                ctxt->extSubSystem, ctxt->extSubURI);
10904
154k
  if (ctxt->instate == XML_PARSER_EOF)
10905
4.81k
      return(-1);
10906
149k
  ctxt->inSubset = 0;
10907
10908
149k
        xmlCleanSpecialAttr(ctxt);
10909
10910
149k
  ctxt->instate = XML_PARSER_PROLOG;
10911
149k
  xmlParseMisc(ctxt);
10912
149k
    }
10913
10914
    /*
10915
     * Time to start parsing the tree itself
10916
     */
10917
360k
    GROW;
10918
360k
    if (RAW != '<') {
10919
76.8k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10920
76.8k
           "Start tag expected, '<' not found\n");
10921
284k
    } else {
10922
284k
  ctxt->instate = XML_PARSER_CONTENT;
10923
284k
  xmlParseElement(ctxt);
10924
284k
  ctxt->instate = XML_PARSER_EPILOG;
10925
10926
10927
  /*
10928
   * The Misc part at the end
10929
   */
10930
284k
  xmlParseMisc(ctxt);
10931
10932
284k
  if (RAW != 0) {
10933
90.4k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10934
90.4k
  }
10935
284k
  ctxt->instate = XML_PARSER_EOF;
10936
284k
    }
10937
10938
    /*
10939
     * SAX: end of the document processing.
10940
     */
10941
360k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10942
360k
        ctxt->sax->endDocument(ctxt->userData);
10943
10944
    /*
10945
     * Remove locally kept entity definitions if the tree was not built
10946
     */
10947
360k
    if ((ctxt->myDoc != NULL) &&
10948
360k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10949
1.67k
  xmlFreeDoc(ctxt->myDoc);
10950
1.67k
  ctxt->myDoc = NULL;
10951
1.67k
    }
10952
10953
360k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10954
26.0k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10955
26.0k
  if (ctxt->valid)
10956
17.2k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10957
26.0k
  if (ctxt->nsWellFormed)
10958
24.8k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10959
26.0k
  if (ctxt->options & XML_PARSE_OLD10)
10960
7.45k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10961
26.0k
    }
10962
360k
    if (! ctxt->wellFormed) {
10963
334k
  ctxt->valid = 0;
10964
334k
  return(-1);
10965
334k
    }
10966
26.0k
    return(0);
10967
360k
}
10968
10969
/**
10970
 * xmlParseExtParsedEnt:
10971
 * @ctxt:  an XML parser context
10972
 *
10973
 * parse a general parsed entity
10974
 * An external general parsed entity is well-formed if it matches the
10975
 * production labeled extParsedEnt.
10976
 *
10977
 * [78] extParsedEnt ::= TextDecl? content
10978
 *
10979
 * Returns 0, -1 in case of error. the parser context is augmented
10980
 *                as a result of the parsing.
10981
 */
10982
10983
int
10984
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10985
0
    xmlChar start[4];
10986
0
    xmlCharEncoding enc;
10987
10988
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10989
0
        return(-1);
10990
10991
0
    xmlDetectSAX2(ctxt);
10992
10993
0
    GROW;
10994
10995
    /*
10996
     * SAX: beginning of the document processing.
10997
     */
10998
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10999
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11000
11001
    /*
11002
     * Get the 4 first bytes and decode the charset
11003
     * if enc != XML_CHAR_ENCODING_NONE
11004
     * plug some encoding conversion routines.
11005
     */
11006
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11007
0
  start[0] = RAW;
11008
0
  start[1] = NXT(1);
11009
0
  start[2] = NXT(2);
11010
0
  start[3] = NXT(3);
11011
0
  enc = xmlDetectCharEncoding(start, 4);
11012
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11013
0
      xmlSwitchEncoding(ctxt, enc);
11014
0
  }
11015
0
    }
11016
11017
11018
0
    if (CUR == 0) {
11019
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11020
0
    }
11021
11022
    /*
11023
     * Check for the XMLDecl in the Prolog.
11024
     */
11025
0
    GROW;
11026
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11027
11028
  /*
11029
   * Note that we will switch encoding on the fly.
11030
   */
11031
0
  xmlParseXMLDecl(ctxt);
11032
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11033
      /*
11034
       * The XML REC instructs us to stop parsing right here
11035
       */
11036
0
      return(-1);
11037
0
  }
11038
0
  SKIP_BLANKS;
11039
0
    } else {
11040
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11041
0
    }
11042
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11043
0
        ctxt->sax->startDocument(ctxt->userData);
11044
0
    if (ctxt->instate == XML_PARSER_EOF)
11045
0
  return(-1);
11046
11047
    /*
11048
     * Doing validity checking on chunk doesn't make sense
11049
     */
11050
0
    ctxt->instate = XML_PARSER_CONTENT;
11051
0
    ctxt->validate = 0;
11052
0
    ctxt->loadsubset = 0;
11053
0
    ctxt->depth = 0;
11054
11055
0
    xmlParseContent(ctxt);
11056
0
    if (ctxt->instate == XML_PARSER_EOF)
11057
0
  return(-1);
11058
11059
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11060
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11061
0
    } else if (RAW != 0) {
11062
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11063
0
    }
11064
11065
    /*
11066
     * SAX: end of the document processing.
11067
     */
11068
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11069
0
        ctxt->sax->endDocument(ctxt->userData);
11070
11071
0
    if (! ctxt->wellFormed) return(-1);
11072
0
    return(0);
11073
0
}
11074
11075
#ifdef LIBXML_PUSH_ENABLED
11076
/************************************************************************
11077
 *                  *
11078
 *    Progressive parsing interfaces        *
11079
 *                  *
11080
 ************************************************************************/
11081
11082
/**
11083
 * xmlParseLookupSequence:
11084
 * @ctxt:  an XML parser context
11085
 * @first:  the first char to lookup
11086
 * @next:  the next char to lookup or zero
11087
 * @third:  the next char to lookup or zero
11088
 *
11089
 * Try to find if a sequence (first, next, third) or  just (first next) or
11090
 * (first) is available in the input stream.
11091
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11092
 * to avoid rescanning sequences of bytes, it DOES change the state of the
11093
 * parser, do not use liberally.
11094
 *
11095
 * Returns the index to the current parsing point if the full sequence
11096
 *      is available, -1 otherwise.
11097
 */
11098
static int
11099
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11100
1.64M
                       xmlChar next, xmlChar third) {
11101
1.64M
    int base, len;
11102
1.64M
    xmlParserInputPtr in;
11103
1.64M
    const xmlChar *buf;
11104
11105
1.64M
    in = ctxt->input;
11106
1.64M
    if (in == NULL) return(-1);
11107
1.64M
    base = in->cur - in->base;
11108
1.64M
    if (base < 0) return(-1);
11109
1.64M
    if (ctxt->checkIndex > base)
11110
461k
        base = ctxt->checkIndex;
11111
1.64M
    if (in->buf == NULL) {
11112
0
  buf = in->base;
11113
0
  len = in->length;
11114
1.64M
    } else {
11115
1.64M
  buf = xmlBufContent(in->buf->buffer);
11116
1.64M
  len = xmlBufUse(in->buf->buffer);
11117
1.64M
    }
11118
    /* take into account the sequence length */
11119
1.64M
    if (third) len -= 2;
11120
1.26M
    else if (next) len --;
11121
1.41G
    for (;base < len;base++) {
11122
1.41G
        if (buf[base] == first) {
11123
2.01M
      if (third != 0) {
11124
732k
    if ((buf[base + 1] != next) ||
11125
732k
        (buf[base + 2] != third)) continue;
11126
1.28M
      } else if (next != 0) {
11127
745k
    if (buf[base + 1] != next) continue;
11128
745k
      }
11129
1.07M
      ctxt->checkIndex = 0;
11130
#ifdef DEBUG_PUSH
11131
      if (next == 0)
11132
    xmlGenericError(xmlGenericErrorContext,
11133
      "PP: lookup '%c' found at %d\n",
11134
      first, base);
11135
      else if (third == 0)
11136
    xmlGenericError(xmlGenericErrorContext,
11137
      "PP: lookup '%c%c' found at %d\n",
11138
      first, next, base);
11139
      else
11140
    xmlGenericError(xmlGenericErrorContext,
11141
      "PP: lookup '%c%c%c' found at %d\n",
11142
      first, next, third, base);
11143
#endif
11144
1.07M
      return(base - (in->cur - in->base));
11145
2.01M
  }
11146
1.41G
    }
11147
573k
    ctxt->checkIndex = base;
11148
#ifdef DEBUG_PUSH
11149
    if (next == 0)
11150
  xmlGenericError(xmlGenericErrorContext,
11151
    "PP: lookup '%c' failed\n", first);
11152
    else if (third == 0)
11153
  xmlGenericError(xmlGenericErrorContext,
11154
    "PP: lookup '%c%c' failed\n", first, next);
11155
    else
11156
  xmlGenericError(xmlGenericErrorContext,
11157
    "PP: lookup '%c%c%c' failed\n", first, next, third);
11158
#endif
11159
573k
    return(-1);
11160
1.64M
}
11161
11162
/**
11163
 * xmlParseGetLasts:
11164
 * @ctxt:  an XML parser context
11165
 * @lastlt:  pointer to store the last '<' from the input
11166
 * @lastgt:  pointer to store the last '>' from the input
11167
 *
11168
 * Lookup the last < and > in the current chunk
11169
 */
11170
static void
11171
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11172
4.30M
                 const xmlChar **lastgt) {
11173
4.30M
    const xmlChar *tmp;
11174
11175
4.30M
    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11176
0
  xmlGenericError(xmlGenericErrorContext,
11177
0
        "Internal error: xmlParseGetLasts\n");
11178
0
  return;
11179
0
    }
11180
4.30M
    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11181
2.54M
        tmp = ctxt->input->end;
11182
2.54M
  tmp--;
11183
1.15G
  while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11184
2.54M
  if (tmp < ctxt->input->base) {
11185
128k
      *lastlt = NULL;
11186
128k
      *lastgt = NULL;
11187
2.41M
  } else {
11188
2.41M
      *lastlt = tmp;
11189
2.41M
      tmp++;
11190
341M
      while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11191
339M
          if (*tmp == '\'') {
11192
120k
        tmp++;
11193
59.3M
        while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11194
120k
        if (tmp < ctxt->input->end) tmp++;
11195
339M
    } else if (*tmp == '"') {
11196
1.10M
        tmp++;
11197
58.3M
        while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11198
1.10M
        if (tmp < ctxt->input->end) tmp++;
11199
1.10M
    } else
11200
337M
        tmp++;
11201
339M
      }
11202
2.41M
      if (tmp < ctxt->input->end)
11203
1.07M
          *lastgt = tmp;
11204
1.34M
      else {
11205
1.34M
          tmp = *lastlt;
11206
1.34M
    tmp--;
11207
84.7M
    while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11208
1.34M
    if (tmp >= ctxt->input->base)
11209
1.26M
        *lastgt = tmp;
11210
83.0k
    else
11211
83.0k
        *lastgt = NULL;
11212
1.34M
      }
11213
2.41M
  }
11214
2.54M
    } else {
11215
1.75M
        *lastlt = NULL;
11216
1.75M
  *lastgt = NULL;
11217
1.75M
    }
11218
4.30M
}
11219
/**
11220
 * xmlCheckCdataPush:
11221
 * @cur: pointer to the block of characters
11222
 * @len: length of the block in bytes
11223
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11224
 *
11225
 * Check that the block of characters is okay as SCdata content [20]
11226
 *
11227
 * Returns the number of bytes to pass if okay, a negative index where an
11228
 *         UTF-8 error occurred otherwise
11229
 */
11230
static int
11231
110k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11232
110k
    int ix;
11233
110k
    unsigned char c;
11234
110k
    int codepoint;
11235
11236
110k
    if ((utf == NULL) || (len <= 0))
11237
694
        return(0);
11238
11239
8.89M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11240
8.86M
        c = utf[ix];
11241
8.86M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11242
5.19M
      if (c >= 0x20)
11243
5.05M
    ix++;
11244
140k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11245
114k
          ix++;
11246
26.2k
      else
11247
26.2k
          return(-ix);
11248
5.19M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11249
1.40M
      if (ix + 2 > len) return(complete ? -ix : ix);
11250
1.40M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11251
9.65k
          return(-ix);
11252
1.39M
      codepoint = (utf[ix] & 0x1f) << 6;
11253
1.39M
      codepoint |= utf[ix+1] & 0x3f;
11254
1.39M
      if (!xmlIsCharQ(codepoint))
11255
1.55k
          return(-ix);
11256
1.39M
      ix += 2;
11257
2.27M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11258
874k
      if (ix + 3 > len) return(complete ? -ix : ix);
11259
873k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11260
873k
          ((utf[ix+2] & 0xc0) != 0x80))
11261
10.3k
        return(-ix);
11262
862k
      codepoint = (utf[ix] & 0xf) << 12;
11263
862k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11264
862k
      codepoint |= utf[ix+2] & 0x3f;
11265
862k
      if (!xmlIsCharQ(codepoint))
11266
530
          return(-ix);
11267
862k
      ix += 3;
11268
1.39M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11269
1.38M
      if (ix + 4 > len) return(complete ? -ix : ix);
11270
1.38M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11271
1.38M
          ((utf[ix+2] & 0xc0) != 0x80) ||
11272
1.38M
    ((utf[ix+3] & 0xc0) != 0x80))
11273
13.1k
        return(-ix);
11274
1.37M
      codepoint = (utf[ix] & 0x7) << 18;
11275
1.37M
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11276
1.37M
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11277
1.37M
      codepoint |= utf[ix+3] & 0x3f;
11278
1.37M
      if (!xmlIsCharQ(codepoint))
11279
2.92k
          return(-ix);
11280
1.36M
      ix += 4;
11281
1.36M
  } else       /* unknown encoding */
11282
13.0k
      return(-ix);
11283
8.86M
      }
11284
27.5k
      return(ix);
11285
110k
}
11286
11287
/**
11288
 * xmlParseTryOrFinish:
11289
 * @ctxt:  an XML parser context
11290
 * @terminate:  last chunk indicator
11291
 *
11292
 * Try to progress on parsing
11293
 *
11294
 * Returns zero if no parsing was possible
11295
 */
11296
static int
11297
3.79M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11298
3.79M
    int ret = 0;
11299
3.79M
    int avail, tlen;
11300
3.79M
    xmlChar cur, next;
11301
3.79M
    const xmlChar *lastlt, *lastgt;
11302
11303
3.79M
    if (ctxt->input == NULL)
11304
0
        return(0);
11305
11306
#ifdef DEBUG_PUSH
11307
    switch (ctxt->instate) {
11308
  case XML_PARSER_EOF:
11309
      xmlGenericError(xmlGenericErrorContext,
11310
        "PP: try EOF\n"); break;
11311
  case XML_PARSER_START:
11312
      xmlGenericError(xmlGenericErrorContext,
11313
        "PP: try START\n"); break;
11314
  case XML_PARSER_MISC:
11315
      xmlGenericError(xmlGenericErrorContext,
11316
        "PP: try MISC\n");break;
11317
  case XML_PARSER_COMMENT:
11318
      xmlGenericError(xmlGenericErrorContext,
11319
        "PP: try COMMENT\n");break;
11320
  case XML_PARSER_PROLOG:
11321
      xmlGenericError(xmlGenericErrorContext,
11322
        "PP: try PROLOG\n");break;
11323
  case XML_PARSER_START_TAG:
11324
      xmlGenericError(xmlGenericErrorContext,
11325
        "PP: try START_TAG\n");break;
11326
  case XML_PARSER_CONTENT:
11327
      xmlGenericError(xmlGenericErrorContext,
11328
        "PP: try CONTENT\n");break;
11329
  case XML_PARSER_CDATA_SECTION:
11330
      xmlGenericError(xmlGenericErrorContext,
11331
        "PP: try CDATA_SECTION\n");break;
11332
  case XML_PARSER_END_TAG:
11333
      xmlGenericError(xmlGenericErrorContext,
11334
        "PP: try END_TAG\n");break;
11335
  case XML_PARSER_ENTITY_DECL:
11336
      xmlGenericError(xmlGenericErrorContext,
11337
        "PP: try ENTITY_DECL\n");break;
11338
  case XML_PARSER_ENTITY_VALUE:
11339
      xmlGenericError(xmlGenericErrorContext,
11340
        "PP: try ENTITY_VALUE\n");break;
11341
  case XML_PARSER_ATTRIBUTE_VALUE:
11342
      xmlGenericError(xmlGenericErrorContext,
11343
        "PP: try ATTRIBUTE_VALUE\n");break;
11344
  case XML_PARSER_DTD:
11345
      xmlGenericError(xmlGenericErrorContext,
11346
        "PP: try DTD\n");break;
11347
  case XML_PARSER_EPILOG:
11348
      xmlGenericError(xmlGenericErrorContext,
11349
        "PP: try EPILOG\n");break;
11350
  case XML_PARSER_PI:
11351
      xmlGenericError(xmlGenericErrorContext,
11352
        "PP: try PI\n");break;
11353
        case XML_PARSER_IGNORE:
11354
            xmlGenericError(xmlGenericErrorContext,
11355
        "PP: try IGNORE\n");break;
11356
    }
11357
#endif
11358
11359
3.79M
    if ((ctxt->input != NULL) &&
11360
3.79M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11361
42.8k
  xmlSHRINK(ctxt);
11362
42.8k
  ctxt->checkIndex = 0;
11363
42.8k
    }
11364
3.79M
    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11365
11366
25.4M
    while (ctxt->instate != XML_PARSER_EOF) {
11367
25.3M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11368
214k
      return(0);
11369
11370
25.1M
  if (ctxt->input == NULL) break;
11371
25.1M
  if (ctxt->input->buf == NULL)
11372
0
      avail = ctxt->input->length -
11373
0
              (ctxt->input->cur - ctxt->input->base);
11374
25.1M
  else {
11375
      /*
11376
       * If we are operating on converted input, try to flush
11377
       * remaining chars to avoid them stalling in the non-converted
11378
       * buffer. But do not do this in document start where
11379
       * encoding="..." may not have been read and we work on a
11380
       * guessed encoding.
11381
       */
11382
25.1M
      if ((ctxt->instate != XML_PARSER_START) &&
11383
25.1M
          (ctxt->input->buf->raw != NULL) &&
11384
25.1M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11385
59.9k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11386
59.9k
                                                 ctxt->input);
11387
59.9k
    size_t current = ctxt->input->cur - ctxt->input->base;
11388
11389
59.9k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11390
59.9k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11391
59.9k
                                      base, current);
11392
59.9k
      }
11393
25.1M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11394
25.1M
        (ctxt->input->cur - ctxt->input->base);
11395
25.1M
  }
11396
25.1M
        if (avail < 1)
11397
270k
      goto done;
11398
24.8M
        switch (ctxt->instate) {
11399
0
            case XML_PARSER_EOF:
11400
          /*
11401
     * Document parsing is done !
11402
     */
11403
0
          goto done;
11404
1.30M
            case XML_PARSER_START:
11405
1.30M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11406
406k
        xmlChar start[4];
11407
406k
        xmlCharEncoding enc;
11408
11409
        /*
11410
         * Very first chars read from the document flow.
11411
         */
11412
406k
        if (avail < 4)
11413
59.0k
      goto done;
11414
11415
        /*
11416
         * Get the 4 first bytes and decode the charset
11417
         * if enc != XML_CHAR_ENCODING_NONE
11418
         * plug some encoding conversion routines,
11419
         * else xmlSwitchEncoding will set to (default)
11420
         * UTF8.
11421
         */
11422
347k
        start[0] = RAW;
11423
347k
        start[1] = NXT(1);
11424
347k
        start[2] = NXT(2);
11425
347k
        start[3] = NXT(3);
11426
347k
        enc = xmlDetectCharEncoding(start, 4);
11427
347k
        xmlSwitchEncoding(ctxt, enc);
11428
347k
        break;
11429
406k
    }
11430
11431
899k
    if (avail < 2)
11432
466
        goto done;
11433
898k
    cur = ctxt->input->cur[0];
11434
898k
    next = ctxt->input->cur[1];
11435
898k
    if (cur == 0) {
11436
4.31k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11437
4.31k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11438
4.31k
                  &xmlDefaultSAXLocator);
11439
4.31k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11440
4.31k
        xmlHaltParser(ctxt);
11441
#ifdef DEBUG_PUSH
11442
        xmlGenericError(xmlGenericErrorContext,
11443
          "PP: entering EOF\n");
11444
#endif
11445
4.31k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11446
4.31k
      ctxt->sax->endDocument(ctxt->userData);
11447
4.31k
        goto done;
11448
4.31k
    }
11449
894k
          if ((cur == '<') && (next == '?')) {
11450
        /* PI or XML decl */
11451
509k
        if (avail < 5) return(ret);
11452
509k
        if ((!terminate) &&
11453
509k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11454
208k
      return(ret);
11455
301k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11456
301k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11457
301k
                  &xmlDefaultSAXLocator);
11458
301k
        if ((ctxt->input->cur[2] == 'x') &&
11459
301k
      (ctxt->input->cur[3] == 'm') &&
11460
301k
      (ctxt->input->cur[4] == 'l') &&
11461
301k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11462
272k
      ret += 5;
11463
#ifdef DEBUG_PUSH
11464
      xmlGenericError(xmlGenericErrorContext,
11465
        "PP: Parsing XML Decl\n");
11466
#endif
11467
272k
      xmlParseXMLDecl(ctxt);
11468
272k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11469
          /*
11470
           * The XML REC instructs us to stop parsing right
11471
           * here
11472
           */
11473
789
          xmlHaltParser(ctxt);
11474
789
          return(0);
11475
789
      }
11476
271k
      ctxt->standalone = ctxt->input->standalone;
11477
271k
      if ((ctxt->encoding == NULL) &&
11478
271k
          (ctxt->input->encoding != NULL))
11479
33.1k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11480
271k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11481
271k
          (!ctxt->disableSAX))
11482
236k
          ctxt->sax->startDocument(ctxt->userData);
11483
271k
      ctxt->instate = XML_PARSER_MISC;
11484
#ifdef DEBUG_PUSH
11485
      xmlGenericError(xmlGenericErrorContext,
11486
        "PP: entering MISC\n");
11487
#endif
11488
271k
        } else {
11489
28.7k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11490
28.7k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11491
28.7k
          (!ctxt->disableSAX))
11492
28.7k
          ctxt->sax->startDocument(ctxt->userData);
11493
28.7k
      ctxt->instate = XML_PARSER_MISC;
11494
#ifdef DEBUG_PUSH
11495
      xmlGenericError(xmlGenericErrorContext,
11496
        "PP: entering MISC\n");
11497
#endif
11498
28.7k
        }
11499
384k
    } else {
11500
384k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11501
384k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11502
384k
                  &xmlDefaultSAXLocator);
11503
384k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11504
384k
        if (ctxt->version == NULL) {
11505
0
            xmlErrMemory(ctxt, NULL);
11506
0
      break;
11507
0
        }
11508
384k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11509
384k
            (!ctxt->disableSAX))
11510
384k
      ctxt->sax->startDocument(ctxt->userData);
11511
384k
        ctxt->instate = XML_PARSER_MISC;
11512
#ifdef DEBUG_PUSH
11513
        xmlGenericError(xmlGenericErrorContext,
11514
          "PP: entering MISC\n");
11515
#endif
11516
384k
    }
11517
685k
    break;
11518
5.67M
            case XML_PARSER_START_TAG: {
11519
5.67M
          const xmlChar *name;
11520
5.67M
    const xmlChar *prefix = NULL;
11521
5.67M
    const xmlChar *URI = NULL;
11522
5.67M
                int line = ctxt->input->line;
11523
5.67M
    int nsNr = ctxt->nsNr;
11524
11525
5.67M
    if ((avail < 2) && (ctxt->inputNr == 1))
11526
0
        goto done;
11527
5.67M
    cur = ctxt->input->cur[0];
11528
5.67M
          if (cur != '<') {
11529
40.7k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11530
40.7k
        xmlHaltParser(ctxt);
11531
40.7k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11532
40.7k
      ctxt->sax->endDocument(ctxt->userData);
11533
40.7k
        goto done;
11534
40.7k
    }
11535
5.63M
    if (!terminate) {
11536
5.46M
        if (ctxt->progressive) {
11537
            /* > can be found unescaped in attribute values */
11538
5.46M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11539
772k
          goto done;
11540
5.46M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11541
0
      goto done;
11542
0
        }
11543
5.46M
    }
11544
4.86M
    if (ctxt->spaceNr == 0)
11545
23.1k
        spacePush(ctxt, -1);
11546
4.83M
    else if (*ctxt->space == -2)
11547
215k
        spacePush(ctxt, -1);
11548
4.62M
    else
11549
4.62M
        spacePush(ctxt, *ctxt->space);
11550
4.86M
#ifdef LIBXML_SAX1_ENABLED
11551
4.86M
    if (ctxt->sax2)
11552
2.98M
#endif /* LIBXML_SAX1_ENABLED */
11553
2.98M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11554
1.87M
#ifdef LIBXML_SAX1_ENABLED
11555
1.87M
    else
11556
1.87M
        name = xmlParseStartTag(ctxt);
11557
4.86M
#endif /* LIBXML_SAX1_ENABLED */
11558
4.86M
    if (ctxt->instate == XML_PARSER_EOF)
11559
0
        goto done;
11560
4.86M
    if (name == NULL) {
11561
53.2k
        spacePop(ctxt);
11562
53.2k
        xmlHaltParser(ctxt);
11563
53.2k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11564
53.2k
      ctxt->sax->endDocument(ctxt->userData);
11565
53.2k
        goto done;
11566
53.2k
    }
11567
4.80M
#ifdef LIBXML_VALID_ENABLED
11568
    /*
11569
     * [ VC: Root Element Type ]
11570
     * The Name in the document type declaration must match
11571
     * the element type of the root element.
11572
     */
11573
4.80M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11574
4.80M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11575
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11576
4.80M
#endif /* LIBXML_VALID_ENABLED */
11577
11578
    /*
11579
     * Check for an Empty Element.
11580
     */
11581
4.80M
    if ((RAW == '/') && (NXT(1) == '>')) {
11582
2.07M
        SKIP(2);
11583
11584
2.07M
        if (ctxt->sax2) {
11585
1.36M
      if ((ctxt->sax != NULL) &&
11586
1.36M
          (ctxt->sax->endElementNs != NULL) &&
11587
1.36M
          (!ctxt->disableSAX))
11588
1.36M
          ctxt->sax->endElementNs(ctxt->userData, name,
11589
1.36M
                                  prefix, URI);
11590
1.36M
      if (ctxt->nsNr - nsNr > 0)
11591
7.30k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11592
1.36M
#ifdef LIBXML_SAX1_ENABLED
11593
1.36M
        } else {
11594
711k
      if ((ctxt->sax != NULL) &&
11595
711k
          (ctxt->sax->endElement != NULL) &&
11596
711k
          (!ctxt->disableSAX))
11597
711k
          ctxt->sax->endElement(ctxt->userData, name);
11598
711k
#endif /* LIBXML_SAX1_ENABLED */
11599
711k
        }
11600
2.07M
        if (ctxt->instate == XML_PARSER_EOF)
11601
0
      goto done;
11602
2.07M
        spacePop(ctxt);
11603
2.07M
        if (ctxt->nameNr == 0) {
11604
13.7k
      ctxt->instate = XML_PARSER_EPILOG;
11605
2.06M
        } else {
11606
2.06M
      ctxt->instate = XML_PARSER_CONTENT;
11607
2.06M
        }
11608
2.07M
                    ctxt->progressive = 1;
11609
2.07M
        break;
11610
2.07M
    }
11611
2.72M
    if (RAW == '>') {
11612
2.39M
        NEXT;
11613
2.39M
    } else {
11614
338k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11615
338k
           "Couldn't find end of Start Tag %s\n",
11616
338k
           name);
11617
338k
        nodePop(ctxt);
11618
338k
        spacePop(ctxt);
11619
338k
    }
11620
2.72M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11621
11622
2.72M
    ctxt->instate = XML_PARSER_CONTENT;
11623
2.72M
                ctxt->progressive = 1;
11624
2.72M
                break;
11625
4.80M
      }
11626
13.9M
            case XML_PARSER_CONTENT: {
11627
13.9M
    int id;
11628
13.9M
    unsigned long cons;
11629
13.9M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
72.6k
        goto done;
11631
13.8M
    cur = ctxt->input->cur[0];
11632
13.8M
    next = ctxt->input->cur[1];
11633
11634
13.8M
    id = ctxt->input->id;
11635
13.8M
          cons = CUR_CONSUMED;
11636
13.8M
    if ((cur == '<') && (next == '/')) {
11637
1.66M
        ctxt->instate = XML_PARSER_END_TAG;
11638
1.66M
        break;
11639
12.1M
          } else if ((cur == '<') && (next == '?')) {
11640
32.3k
        if ((!terminate) &&
11641
32.3k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11642
13.9k
                        ctxt->progressive = XML_PARSER_PI;
11643
13.9k
      goto done;
11644
13.9k
                    }
11645
18.3k
        xmlParsePI(ctxt);
11646
18.3k
        ctxt->instate = XML_PARSER_CONTENT;
11647
18.3k
                    ctxt->progressive = 1;
11648
12.1M
    } else if ((cur == '<') && (next != '!')) {
11649
4.41M
        ctxt->instate = XML_PARSER_START_TAG;
11650
4.41M
        break;
11651
7.73M
    } else if ((cur == '<') && (next == '!') &&
11652
7.73M
               (ctxt->input->cur[2] == '-') &&
11653
7.73M
         (ctxt->input->cur[3] == '-')) {
11654
159k
        int term;
11655
11656
159k
              if (avail < 4)
11657
0
            goto done;
11658
159k
        ctxt->input->cur += 4;
11659
159k
        term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11660
159k
        ctxt->input->cur -= 4;
11661
159k
        if ((!terminate) && (term < 0)) {
11662
58.5k
                        ctxt->progressive = XML_PARSER_COMMENT;
11663
58.5k
      goto done;
11664
58.5k
                    }
11665
100k
        xmlParseComment(ctxt);
11666
100k
        ctxt->instate = XML_PARSER_CONTENT;
11667
100k
                    ctxt->progressive = 1;
11668
7.57M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11669
7.57M
        (ctxt->input->cur[2] == '[') &&
11670
7.57M
        (ctxt->input->cur[3] == 'C') &&
11671
7.57M
        (ctxt->input->cur[4] == 'D') &&
11672
7.57M
        (ctxt->input->cur[5] == 'A') &&
11673
7.57M
        (ctxt->input->cur[6] == 'T') &&
11674
7.57M
        (ctxt->input->cur[7] == 'A') &&
11675
7.57M
        (ctxt->input->cur[8] == '[')) {
11676
30.6k
        SKIP(9);
11677
30.6k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11678
30.6k
        break;
11679
7.54M
    } else if ((cur == '<') && (next == '!') &&
11680
7.54M
               (avail < 9)) {
11681
4.12k
        goto done;
11682
7.53M
    } else if (cur == '&') {
11683
394k
        if ((!terminate) &&
11684
394k
            (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11685
104k
      goto done;
11686
290k
        xmlParseReference(ctxt);
11687
7.14M
    } else {
11688
        /* TODO Avoid the extra copy, handle directly !!! */
11689
        /*
11690
         * Goal of the following test is:
11691
         *  - minimize calls to the SAX 'character' callback
11692
         *    when they are mergeable
11693
         *  - handle an problem for isBlank when we only parse
11694
         *    a sequence of blank chars and the next one is
11695
         *    not available to check against '<' presence.
11696
         *  - tries to homogenize the differences in SAX
11697
         *    callbacks between the push and pull versions
11698
         *    of the parser.
11699
         */
11700
7.14M
        if ((ctxt->inputNr == 1) &&
11701
7.14M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11702
5.95M
      if (!terminate) {
11703
5.81M
          if (ctxt->progressive) {
11704
5.81M
        if ((lastlt == NULL) ||
11705
5.81M
            (ctxt->input->cur > lastlt))
11706
571k
            goto done;
11707
5.81M
          } else if (xmlParseLookupSequence(ctxt,
11708
0
                                            '<', 0, 0) < 0) {
11709
0
        goto done;
11710
0
          }
11711
5.81M
      }
11712
5.95M
                    }
11713
6.57M
        ctxt->checkIndex = 0;
11714
6.57M
        xmlParseCharData(ctxt, 0);
11715
6.57M
    }
11716
6.97M
    if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11717
97.6k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11718
97.6k
                    "detected an error in element content\n");
11719
97.6k
        xmlHaltParser(ctxt);
11720
97.6k
        break;
11721
97.6k
    }
11722
6.88M
    break;
11723
6.97M
      }
11724
6.88M
            case XML_PARSER_END_TAG:
11725
1.75M
    if (avail < 2)
11726
0
        goto done;
11727
1.75M
    if (!terminate) {
11728
1.70M
        if (ctxt->progressive) {
11729
            /* > can be found unescaped in attribute values */
11730
1.70M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11731
85.9k
          goto done;
11732
1.70M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11733
0
      goto done;
11734
0
        }
11735
1.70M
    }
11736
1.66M
    if (ctxt->sax2) {
11737
1.03M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11738
1.03M
        nameNsPop(ctxt);
11739
1.03M
    }
11740
627k
#ifdef LIBXML_SAX1_ENABLED
11741
627k
      else
11742
627k
        xmlParseEndTag1(ctxt, 0);
11743
1.66M
#endif /* LIBXML_SAX1_ENABLED */
11744
1.66M
    if (ctxt->instate == XML_PARSER_EOF) {
11745
        /* Nothing */
11746
1.66M
    } else if (ctxt->nameNr == 0) {
11747
78.1k
        ctxt->instate = XML_PARSER_EPILOG;
11748
1.58M
    } else {
11749
1.58M
        ctxt->instate = XML_PARSER_CONTENT;
11750
1.58M
    }
11751
1.66M
    break;
11752
136k
            case XML_PARSER_CDATA_SECTION: {
11753
          /*
11754
     * The Push mode need to have the SAX callback for
11755
     * cdataBlock merge back contiguous callbacks.
11756
     */
11757
136k
    int base;
11758
11759
136k
    base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11760
136k
    if (base < 0) {
11761
82.9k
        if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11762
57.2k
            int tmp;
11763
11764
57.2k
      tmp = xmlCheckCdataPush(ctxt->input->cur,
11765
57.2k
                              XML_PARSER_BIG_BUFFER_SIZE, 0);
11766
57.2k
      if (tmp < 0) {
11767
3.38k
          tmp = -tmp;
11768
3.38k
          ctxt->input->cur += tmp;
11769
3.38k
          goto encoding_error;
11770
3.38k
      }
11771
53.8k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11772
53.8k
          if (ctxt->sax->cdataBlock != NULL)
11773
30.3k
        ctxt->sax->cdataBlock(ctxt->userData,
11774
30.3k
                              ctxt->input->cur, tmp);
11775
23.5k
          else if (ctxt->sax->characters != NULL)
11776
23.5k
        ctxt->sax->characters(ctxt->userData,
11777
23.5k
                              ctxt->input->cur, tmp);
11778
53.8k
      }
11779
53.8k
      if (ctxt->instate == XML_PARSER_EOF)
11780
0
          goto done;
11781
53.8k
      SKIPL(tmp);
11782
53.8k
      ctxt->checkIndex = 0;
11783
53.8k
        }
11784
79.5k
        goto done;
11785
82.9k
    } else {
11786
53.6k
        int tmp;
11787
11788
53.6k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11789
53.6k
        if ((tmp < 0) || (tmp != base)) {
11790
39.3k
      tmp = -tmp;
11791
39.3k
      ctxt->input->cur += tmp;
11792
39.3k
      goto encoding_error;
11793
39.3k
        }
11794
14.3k
        if ((ctxt->sax != NULL) && (base == 0) &&
11795
14.3k
            (ctxt->sax->cdataBlock != NULL) &&
11796
14.3k
            (!ctxt->disableSAX)) {
11797
      /*
11798
       * Special case to provide identical behaviour
11799
       * between pull and push parsers on enpty CDATA
11800
       * sections
11801
       */
11802
443
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11803
443
           (!strncmp((const char *)&ctxt->input->cur[-9],
11804
443
                     "<![CDATA[", 9)))
11805
443
           ctxt->sax->cdataBlock(ctxt->userData,
11806
443
                                 BAD_CAST "", 0);
11807
13.8k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11808
13.8k
      (!ctxt->disableSAX)) {
11809
13.6k
      if (ctxt->sax->cdataBlock != NULL)
11810
10.2k
          ctxt->sax->cdataBlock(ctxt->userData,
11811
10.2k
              ctxt->input->cur, base);
11812
3.40k
      else if (ctxt->sax->characters != NULL)
11813
3.40k
          ctxt->sax->characters(ctxt->userData,
11814
3.40k
              ctxt->input->cur, base);
11815
13.6k
        }
11816
14.3k
        if (ctxt->instate == XML_PARSER_EOF)
11817
0
      goto done;
11818
14.3k
        SKIPL(base + 3);
11819
14.3k
        ctxt->checkIndex = 0;
11820
14.3k
        ctxt->instate = XML_PARSER_CONTENT;
11821
#ifdef DEBUG_PUSH
11822
        xmlGenericError(xmlGenericErrorContext,
11823
          "PP: entering CONTENT\n");
11824
#endif
11825
14.3k
    }
11826
14.3k
    break;
11827
136k
      }
11828
785k
            case XML_PARSER_MISC:
11829
785k
    SKIP_BLANKS;
11830
785k
    if (ctxt->input->buf == NULL)
11831
0
        avail = ctxt->input->length -
11832
0
                (ctxt->input->cur - ctxt->input->base);
11833
785k
    else
11834
785k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11835
785k
                (ctxt->input->cur - ctxt->input->base);
11836
785k
    if (avail < 2)
11837
13.2k
        goto done;
11838
772k
    cur = ctxt->input->cur[0];
11839
772k
    next = ctxt->input->cur[1];
11840
772k
          if ((cur == '<') && (next == '?')) {
11841
38.4k
        if ((!terminate) &&
11842
38.4k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11843
3.99k
                        ctxt->progressive = XML_PARSER_PI;
11844
3.99k
      goto done;
11845
3.99k
                    }
11846
#ifdef DEBUG_PUSH
11847
        xmlGenericError(xmlGenericErrorContext,
11848
          "PP: Parsing PI\n");
11849
#endif
11850
34.4k
        xmlParsePI(ctxt);
11851
34.4k
        if (ctxt->instate == XML_PARSER_EOF)
11852
0
      goto done;
11853
34.4k
        ctxt->instate = XML_PARSER_MISC;
11854
34.4k
                    ctxt->progressive = 1;
11855
34.4k
        ctxt->checkIndex = 0;
11856
733k
    } else if ((cur == '<') && (next == '!') &&
11857
733k
        (ctxt->input->cur[2] == '-') &&
11858
733k
        (ctxt->input->cur[3] == '-')) {
11859
54.6k
        if ((!terminate) &&
11860
54.6k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11861
33.6k
                        ctxt->progressive = XML_PARSER_COMMENT;
11862
33.6k
      goto done;
11863
33.6k
                    }
11864
#ifdef DEBUG_PUSH
11865
        xmlGenericError(xmlGenericErrorContext,
11866
          "PP: Parsing Comment\n");
11867
#endif
11868
20.9k
        xmlParseComment(ctxt);
11869
20.9k
        if (ctxt->instate == XML_PARSER_EOF)
11870
0
      goto done;
11871
20.9k
        ctxt->instate = XML_PARSER_MISC;
11872
20.9k
                    ctxt->progressive = 1;
11873
20.9k
        ctxt->checkIndex = 0;
11874
679k
    } else if ((cur == '<') && (next == '!') &&
11875
679k
        (ctxt->input->cur[2] == 'D') &&
11876
679k
        (ctxt->input->cur[3] == 'O') &&
11877
679k
        (ctxt->input->cur[4] == 'C') &&
11878
679k
        (ctxt->input->cur[5] == 'T') &&
11879
679k
        (ctxt->input->cur[6] == 'Y') &&
11880
679k
        (ctxt->input->cur[7] == 'P') &&
11881
679k
        (ctxt->input->cur[8] == 'E')) {
11882
338k
        if ((!terminate) &&
11883
338k
            (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11884
42.7k
                        ctxt->progressive = XML_PARSER_DTD;
11885
42.7k
      goto done;
11886
42.7k
                    }
11887
#ifdef DEBUG_PUSH
11888
        xmlGenericError(xmlGenericErrorContext,
11889
          "PP: Parsing internal subset\n");
11890
#endif
11891
295k
        ctxt->inSubset = 1;
11892
295k
                    ctxt->progressive = 0;
11893
295k
        ctxt->checkIndex = 0;
11894
295k
        xmlParseDocTypeDecl(ctxt);
11895
295k
        if (ctxt->instate == XML_PARSER_EOF)
11896
0
      goto done;
11897
295k
        if (RAW == '[') {
11898
218k
      ctxt->instate = XML_PARSER_DTD;
11899
#ifdef DEBUG_PUSH
11900
      xmlGenericError(xmlGenericErrorContext,
11901
        "PP: entering DTD\n");
11902
#endif
11903
218k
        } else {
11904
      /*
11905
       * Create and update the external subset.
11906
       */
11907
76.8k
      ctxt->inSubset = 2;
11908
76.8k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11909
76.8k
          (ctxt->sax->externalSubset != NULL))
11910
70.5k
          ctxt->sax->externalSubset(ctxt->userData,
11911
70.5k
            ctxt->intSubName, ctxt->extSubSystem,
11912
70.5k
            ctxt->extSubURI);
11913
76.8k
      ctxt->inSubset = 0;
11914
76.8k
      xmlCleanSpecialAttr(ctxt);
11915
76.8k
      ctxt->instate = XML_PARSER_PROLOG;
11916
#ifdef DEBUG_PUSH
11917
      xmlGenericError(xmlGenericErrorContext,
11918
        "PP: entering PROLOG\n");
11919
#endif
11920
76.8k
        }
11921
340k
    } else if ((cur == '<') && (next == '!') &&
11922
340k
               (avail < 9)) {
11923
3.92k
        goto done;
11924
336k
    } else {
11925
336k
        ctxt->instate = XML_PARSER_START_TAG;
11926
336k
        ctxt->progressive = XML_PARSER_START_TAG;
11927
336k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11928
#ifdef DEBUG_PUSH
11929
        xmlGenericError(xmlGenericErrorContext,
11930
          "PP: entering START_TAG\n");
11931
#endif
11932
336k
    }
11933
687k
    break;
11934
687k
            case XML_PARSER_PROLOG:
11935
239k
    SKIP_BLANKS;
11936
239k
    if (ctxt->input->buf == NULL)
11937
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11938
239k
    else
11939
239k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11940
239k
                            (ctxt->input->cur - ctxt->input->base);
11941
239k
    if (avail < 2)
11942
12.5k
        goto done;
11943
226k
    cur = ctxt->input->cur[0];
11944
226k
    next = ctxt->input->cur[1];
11945
226k
          if ((cur == '<') && (next == '?')) {
11946
28.2k
        if ((!terminate) &&
11947
28.2k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11948
11.8k
                        ctxt->progressive = XML_PARSER_PI;
11949
11.8k
      goto done;
11950
11.8k
                    }
11951
#ifdef DEBUG_PUSH
11952
        xmlGenericError(xmlGenericErrorContext,
11953
          "PP: Parsing PI\n");
11954
#endif
11955
16.3k
        xmlParsePI(ctxt);
11956
16.3k
        if (ctxt->instate == XML_PARSER_EOF)
11957
0
      goto done;
11958
16.3k
        ctxt->instate = XML_PARSER_PROLOG;
11959
16.3k
                    ctxt->progressive = 1;
11960
198k
    } else if ((cur == '<') && (next == '!') &&
11961
198k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11962
26.5k
        if ((!terminate) &&
11963
26.5k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11964
4.11k
                        ctxt->progressive = XML_PARSER_COMMENT;
11965
4.11k
      goto done;
11966
4.11k
                    }
11967
#ifdef DEBUG_PUSH
11968
        xmlGenericError(xmlGenericErrorContext,
11969
          "PP: Parsing Comment\n");
11970
#endif
11971
22.4k
        xmlParseComment(ctxt);
11972
22.4k
        if (ctxt->instate == XML_PARSER_EOF)
11973
0
      goto done;
11974
22.4k
        ctxt->instate = XML_PARSER_PROLOG;
11975
22.4k
                    ctxt->progressive = 1;
11976
171k
    } else if ((cur == '<') && (next == '!') &&
11977
171k
               (avail < 4)) {
11978
577
        goto done;
11979
171k
    } else {
11980
171k
        ctxt->instate = XML_PARSER_START_TAG;
11981
171k
        if (ctxt->progressive == 0)
11982
149k
      ctxt->progressive = XML_PARSER_START_TAG;
11983
171k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11984
#ifdef DEBUG_PUSH
11985
        xmlGenericError(xmlGenericErrorContext,
11986
          "PP: entering START_TAG\n");
11987
#endif
11988
171k
    }
11989
210k
    break;
11990
210k
            case XML_PARSER_EPILOG:
11991
95.4k
    SKIP_BLANKS;
11992
95.4k
    if (ctxt->input->buf == NULL)
11993
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11994
95.4k
    else
11995
95.4k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11996
95.4k
                            (ctxt->input->cur - ctxt->input->base);
11997
95.4k
    if (avail < 2)
11998
70.5k
        goto done;
11999
24.9k
    cur = ctxt->input->cur[0];
12000
24.9k
    next = ctxt->input->cur[1];
12001
24.9k
          if ((cur == '<') && (next == '?')) {
12002
4.27k
        if ((!terminate) &&
12003
4.27k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12004
2.91k
                        ctxt->progressive = XML_PARSER_PI;
12005
2.91k
      goto done;
12006
2.91k
                    }
12007
#ifdef DEBUG_PUSH
12008
        xmlGenericError(xmlGenericErrorContext,
12009
          "PP: Parsing PI\n");
12010
#endif
12011
1.36k
        xmlParsePI(ctxt);
12012
1.36k
        if (ctxt->instate == XML_PARSER_EOF)
12013
0
      goto done;
12014
1.36k
        ctxt->instate = XML_PARSER_EPILOG;
12015
1.36k
                    ctxt->progressive = 1;
12016
20.6k
    } else if ((cur == '<') && (next == '!') &&
12017
20.6k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12018
4.26k
        if ((!terminate) &&
12019
4.26k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12020
2.93k
                        ctxt->progressive = XML_PARSER_COMMENT;
12021
2.93k
      goto done;
12022
2.93k
                    }
12023
#ifdef DEBUG_PUSH
12024
        xmlGenericError(xmlGenericErrorContext,
12025
          "PP: Parsing Comment\n");
12026
#endif
12027
1.32k
        xmlParseComment(ctxt);
12028
1.32k
        if (ctxt->instate == XML_PARSER_EOF)
12029
0
      goto done;
12030
1.32k
        ctxt->instate = XML_PARSER_EPILOG;
12031
1.32k
                    ctxt->progressive = 1;
12032
16.4k
    } else if ((cur == '<') && (next == '!') &&
12033
16.4k
               (avail < 4)) {
12034
562
        goto done;
12035
15.8k
    } else {
12036
15.8k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12037
15.8k
        xmlHaltParser(ctxt);
12038
#ifdef DEBUG_PUSH
12039
        xmlGenericError(xmlGenericErrorContext,
12040
          "PP: entering EOF\n");
12041
#endif
12042
15.8k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12043
15.8k
      ctxt->sax->endDocument(ctxt->userData);
12044
15.8k
        goto done;
12045
15.8k
    }
12046
2.68k
    break;
12047
967k
            case XML_PARSER_DTD: {
12048
          /*
12049
     * Sorry but progressive parsing of the internal subset
12050
     * is not expected to be supported. We first check that
12051
     * the full content of the internal subset is available and
12052
     * the parsing is launched only at that point.
12053
     * Internal subset ends up with "']' S? '>'" in an unescaped
12054
     * section and not in a ']]>' sequence which are conditional
12055
     * sections (whoever argued to keep that crap in XML deserve
12056
     * a place in hell !).
12057
     */
12058
967k
    int base, i;
12059
967k
    xmlChar *buf;
12060
967k
          xmlChar quote = 0;
12061
967k
                size_t use;
12062
12063
967k
    base = ctxt->input->cur - ctxt->input->base;
12064
967k
    if (base < 0) return(0);
12065
967k
    if (ctxt->checkIndex > base)
12066
543k
        base = ctxt->checkIndex;
12067
967k
    buf = xmlBufContent(ctxt->input->buf->buffer);
12068
967k
                use = xmlBufUse(ctxt->input->buf->buffer);
12069
2.12G
    for (;(unsigned int) base < use; base++) {
12070
2.12G
        if (quote != 0) {
12071
1.23G
            if (buf[base] == quote)
12072
60.8M
          quote = 0;
12073
1.23G
      continue;
12074
1.23G
        }
12075
883M
        if ((quote == 0) && (buf[base] == '<')) {
12076
25.7M
            int found  = 0;
12077
      /* special handling of comments */
12078
25.7M
            if (((unsigned int) base + 4 < use) &&
12079
25.7M
          (buf[base + 1] == '!') &&
12080
25.7M
          (buf[base + 2] == '-') &&
12081
25.7M
          (buf[base + 3] == '-')) {
12082
162M
          for (;(unsigned int) base + 3 < use; base++) {
12083
162M
        if ((buf[base] == '-') &&
12084
162M
            (buf[base + 1] == '-') &&
12085
162M
            (buf[base + 2] == '>')) {
12086
1.69M
            found = 1;
12087
1.69M
            base += 2;
12088
1.69M
            break;
12089
1.69M
        }
12090
162M
                }
12091
1.79M
          if (!found) {
12092
#if 0
12093
              fprintf(stderr, "unfinished comment\n");
12094
#endif
12095
93.7k
              break; /* for */
12096
93.7k
                }
12097
1.69M
                continue;
12098
1.79M
      }
12099
25.7M
        }
12100
882M
        if (buf[base] == '"') {
12101
59.4M
            quote = '"';
12102
59.4M
      continue;
12103
59.4M
        }
12104
822M
        if (buf[base] == '\'') {
12105
1.60M
            quote = '\'';
12106
1.60M
      continue;
12107
1.60M
        }
12108
821M
        if (buf[base] == ']') {
12109
#if 0
12110
            fprintf(stderr, "%c%c%c%c: ", buf[base],
12111
              buf[base + 1], buf[base + 2], buf[base + 3]);
12112
#endif
12113
288k
            if ((unsigned int) base +1 >= use)
12114
1.97k
          break;
12115
286k
      if (buf[base + 1] == ']') {
12116
          /* conditional crap, skip both ']' ! */
12117
24.7k
          base++;
12118
24.7k
          continue;
12119
24.7k
      }
12120
1.30M
            for (i = 1; (unsigned int) base + i < use; i++) {
12121
1.30M
          if (buf[base + i] == '>') {
12122
#if 0
12123
              fprintf(stderr, "found\n");
12124
#endif
12125
161k
              goto found_end_int_subset;
12126
161k
          }
12127
1.14M
          if (!IS_BLANK_CH(buf[base + i])) {
12128
#if 0
12129
              fprintf(stderr, "not found\n");
12130
#endif
12131
99.2k
              goto not_end_of_int_subset;
12132
99.2k
          }
12133
1.14M
      }
12134
#if 0
12135
      fprintf(stderr, "end of stream\n");
12136
#endif
12137
961
            break;
12138
12139
261k
        }
12140
820M
not_end_of_int_subset:
12141
820M
                    continue; /* for */
12142
821M
    }
12143
    /*
12144
     * We didn't found the end of the Internal subset
12145
     */
12146
805k
                if (quote == 0)
12147
575k
                    ctxt->checkIndex = base;
12148
229k
                else
12149
229k
                    ctxt->checkIndex = 0;
12150
#ifdef DEBUG_PUSH
12151
    if (next == 0)
12152
        xmlGenericError(xmlGenericErrorContext,
12153
          "PP: lookup of int subset end filed\n");
12154
#endif
12155
805k
          goto done;
12156
12157
161k
found_end_int_subset:
12158
161k
                ctxt->checkIndex = 0;
12159
161k
    xmlParseInternalSubset(ctxt);
12160
161k
    if (ctxt->instate == XML_PARSER_EOF)
12161
4.92k
        goto done;
12162
156k
    ctxt->inSubset = 2;
12163
156k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12164
156k
        (ctxt->sax->externalSubset != NULL))
12165
128k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12166
128k
          ctxt->extSubSystem, ctxt->extSubURI);
12167
156k
    ctxt->inSubset = 0;
12168
156k
    xmlCleanSpecialAttr(ctxt);
12169
156k
    if (ctxt->instate == XML_PARSER_EOF)
12170
3.67k
        goto done;
12171
152k
    ctxt->instate = XML_PARSER_PROLOG;
12172
152k
    ctxt->checkIndex = 0;
12173
#ifdef DEBUG_PUSH
12174
    xmlGenericError(xmlGenericErrorContext,
12175
      "PP: entering PROLOG\n");
12176
#endif
12177
152k
                break;
12178
156k
      }
12179
0
            case XML_PARSER_COMMENT:
12180
0
    xmlGenericError(xmlGenericErrorContext,
12181
0
      "PP: internal error, state == COMMENT\n");
12182
0
    ctxt->instate = XML_PARSER_CONTENT;
12183
#ifdef DEBUG_PUSH
12184
    xmlGenericError(xmlGenericErrorContext,
12185
      "PP: entering CONTENT\n");
12186
#endif
12187
0
    break;
12188
0
            case XML_PARSER_IGNORE:
12189
0
    xmlGenericError(xmlGenericErrorContext,
12190
0
      "PP: internal error, state == IGNORE");
12191
0
          ctxt->instate = XML_PARSER_DTD;
12192
#ifdef DEBUG_PUSH
12193
    xmlGenericError(xmlGenericErrorContext,
12194
      "PP: entering DTD\n");
12195
#endif
12196
0
          break;
12197
0
            case XML_PARSER_PI:
12198
0
    xmlGenericError(xmlGenericErrorContext,
12199
0
      "PP: internal error, state == PI\n");
12200
0
    ctxt->instate = XML_PARSER_CONTENT;
12201
#ifdef DEBUG_PUSH
12202
    xmlGenericError(xmlGenericErrorContext,
12203
      "PP: entering CONTENT\n");
12204
#endif
12205
0
    break;
12206
0
            case XML_PARSER_ENTITY_DECL:
12207
0
    xmlGenericError(xmlGenericErrorContext,
12208
0
      "PP: internal error, state == ENTITY_DECL\n");
12209
0
    ctxt->instate = XML_PARSER_DTD;
12210
#ifdef DEBUG_PUSH
12211
    xmlGenericError(xmlGenericErrorContext,
12212
      "PP: entering DTD\n");
12213
#endif
12214
0
    break;
12215
0
            case XML_PARSER_ENTITY_VALUE:
12216
0
    xmlGenericError(xmlGenericErrorContext,
12217
0
      "PP: internal error, state == ENTITY_VALUE\n");
12218
0
    ctxt->instate = XML_PARSER_CONTENT;
12219
#ifdef DEBUG_PUSH
12220
    xmlGenericError(xmlGenericErrorContext,
12221
      "PP: entering DTD\n");
12222
#endif
12223
0
    break;
12224
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12225
0
    xmlGenericError(xmlGenericErrorContext,
12226
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12227
0
    ctxt->instate = XML_PARSER_START_TAG;
12228
#ifdef DEBUG_PUSH
12229
    xmlGenericError(xmlGenericErrorContext,
12230
      "PP: entering START_TAG\n");
12231
#endif
12232
0
    break;
12233
0
            case XML_PARSER_SYSTEM_LITERAL:
12234
0
    xmlGenericError(xmlGenericErrorContext,
12235
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12236
0
    ctxt->instate = XML_PARSER_START_TAG;
12237
#ifdef DEBUG_PUSH
12238
    xmlGenericError(xmlGenericErrorContext,
12239
      "PP: entering START_TAG\n");
12240
#endif
12241
0
    break;
12242
0
            case XML_PARSER_PUBLIC_LITERAL:
12243
0
    xmlGenericError(xmlGenericErrorContext,
12244
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12245
0
    ctxt->instate = XML_PARSER_START_TAG;
12246
#ifdef DEBUG_PUSH
12247
    xmlGenericError(xmlGenericErrorContext,
12248
      "PP: entering START_TAG\n");
12249
#endif
12250
0
    break;
12251
24.8M
  }
12252
24.8M
    }
12253
3.32M
done:
12254
#ifdef DEBUG_PUSH
12255
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12256
#endif
12257
3.32M
    return(ret);
12258
42.6k
encoding_error:
12259
42.6k
    {
12260
42.6k
        char buffer[150];
12261
12262
42.6k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12263
42.6k
      ctxt->input->cur[0], ctxt->input->cur[1],
12264
42.6k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12265
42.6k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12266
42.6k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12267
42.6k
         BAD_CAST buffer, NULL);
12268
42.6k
    }
12269
42.6k
    return(0);
12270
3.79M
}
12271
12272
/**
12273
 * xmlParseCheckTransition:
12274
 * @ctxt:  an XML parser context
12275
 * @chunk:  a char array
12276
 * @size:  the size in byte of the chunk
12277
 *
12278
 * Check depending on the current parser state if the chunk given must be
12279
 * processed immediately or one need more data to advance on parsing.
12280
 *
12281
 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12282
 */
12283
static int
12284
4.26M
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12285
4.26M
    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12286
0
        return(-1);
12287
4.26M
    if (ctxt->instate == XML_PARSER_START_TAG) {
12288
1.45M
        if (memchr(chunk, '>', size) != NULL)
12289
666k
            return(1);
12290
783k
        return(0);
12291
1.45M
    }
12292
2.81M
    if (ctxt->progressive == XML_PARSER_COMMENT) {
12293
143k
        if (memchr(chunk, '>', size) != NULL)
12294
89.8k
            return(1);
12295
53.7k
        return(0);
12296
143k
    }
12297
2.67M
    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12298
251k
        if (memchr(chunk, '>', size) != NULL)
12299
98.3k
            return(1);
12300
153k
        return(0);
12301
251k
    }
12302
2.42M
    if (ctxt->progressive == XML_PARSER_PI) {
12303
44.5k
        if (memchr(chunk, '>', size) != NULL)
12304
28.1k
            return(1);
12305
16.3k
        return(0);
12306
44.5k
    }
12307
2.37M
    if (ctxt->instate == XML_PARSER_END_TAG) {
12308
68.3k
        if (memchr(chunk, '>', size) != NULL)
12309
60.3k
            return(1);
12310
7.95k
        return(0);
12311
68.3k
    }
12312
2.30M
    if ((ctxt->progressive == XML_PARSER_DTD) ||
12313
2.30M
        (ctxt->instate == XML_PARSER_DTD)) {
12314
993k
        if (memchr(chunk, '>', size) != NULL)
12315
710k
            return(1);
12316
282k
        return(0);
12317
993k
    }
12318
1.31M
    return(1);
12319
2.30M
}
12320
12321
/**
12322
 * xmlParseChunk:
12323
 * @ctxt:  an XML parser context
12324
 * @chunk:  an char array
12325
 * @size:  the size in byte of the chunk
12326
 * @terminate:  last chunk indicator
12327
 *
12328
 * Parse a Chunk of memory
12329
 *
12330
 * Returns zero if no error, the xmlParserErrors otherwise.
12331
 */
12332
int
12333
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12334
10.4M
              int terminate) {
12335
10.4M
    int end_in_lf = 0;
12336
10.4M
    int remain = 0;
12337
10.4M
    size_t old_avail = 0;
12338
10.4M
    size_t avail = 0;
12339
12340
10.4M
    if (ctxt == NULL)
12341
0
        return(XML_ERR_INTERNAL_ERROR);
12342
10.4M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12343
5.35M
        return(ctxt->errNo);
12344
5.07M
    if (ctxt->instate == XML_PARSER_EOF)
12345
1.55k
        return(-1);
12346
5.07M
    if (ctxt->instate == XML_PARSER_START)
12347
955k
        xmlDetectSAX2(ctxt);
12348
5.07M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12349
5.07M
        (chunk[size - 1] == '\r')) {
12350
21.9k
  end_in_lf = 1;
12351
21.9k
  size--;
12352
21.9k
    }
12353
12354
5.09M
xmldecl_done:
12355
12356
5.09M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12357
5.09M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12358
4.66M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12359
4.66M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12360
4.66M
  int res;
12361
12362
4.66M
        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12363
        /*
12364
         * Specific handling if we autodetected an encoding, we should not
12365
         * push more than the first line ... which depend on the encoding
12366
         * And only push the rest once the final encoding was detected
12367
         */
12368
4.66M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12369
4.66M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12370
37.7k
            unsigned int len = 45;
12371
12372
37.7k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12373
37.7k
                               BAD_CAST "UTF-16")) ||
12374
37.7k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12375
9.27k
                               BAD_CAST "UTF16")))
12376
28.4k
                len = 90;
12377
9.27k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12378
9.27k
                                    BAD_CAST "UCS-4")) ||
12379
9.27k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12380
8.75k
                                    BAD_CAST "UCS4")))
12381
517
                len = 180;
12382
12383
37.7k
            if (ctxt->input->buf->rawconsumed < len)
12384
8.64k
                len -= ctxt->input->buf->rawconsumed;
12385
12386
            /*
12387
             * Change size for reading the initial declaration only
12388
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12389
             * will blindly copy extra bytes from memory.
12390
             */
12391
37.7k
            if ((unsigned int) size > len) {
12392
23.5k
                remain = size - len;
12393
23.5k
                size = len;
12394
23.5k
            } else {
12395
14.2k
                remain = 0;
12396
14.2k
            }
12397
37.7k
        }
12398
4.66M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12399
4.66M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12400
4.66M
  if (res < 0) {
12401
2.38k
      ctxt->errNo = XML_PARSER_EOF;
12402
2.38k
      xmlHaltParser(ctxt);
12403
2.38k
      return (XML_PARSER_EOF);
12404
2.38k
  }
12405
#ifdef DEBUG_PUSH
12406
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12407
#endif
12408
12409
4.66M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12410
430k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12411
430k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12412
430k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12413
430k
        (in->raw != NULL)) {
12414
25.5k
    int nbchars;
12415
25.5k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12416
25.5k
    size_t current = ctxt->input->cur - ctxt->input->base;
12417
12418
25.5k
    nbchars = xmlCharEncInput(in, terminate);
12419
25.5k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12420
25.5k
    if (nbchars < 0) {
12421
        /* TODO 2.6.0 */
12422
3.30k
        xmlGenericError(xmlGenericErrorContext,
12423
3.30k
            "xmlParseChunk: encoder error\n");
12424
3.30k
                    xmlHaltParser(ctxt);
12425
3.30k
        return(XML_ERR_INVALID_ENCODING);
12426
3.30k
    }
12427
25.5k
      }
12428
430k
  }
12429
430k
    }
12430
5.09M
    if (remain != 0) {
12431
22.8k
        xmlParseTryOrFinish(ctxt, 0);
12432
5.06M
    } else {
12433
5.06M
        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12434
5.06M
            avail = xmlBufUse(ctxt->input->buf->buffer);
12435
        /*
12436
         * Depending on the current state it may not be such
12437
         * a good idea to try parsing if there is nothing in the chunk
12438
         * which would be worth doing a parser state transition and we
12439
         * need to wait for more data
12440
         */
12441
5.06M
        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12442
5.06M
            (old_avail == 0) || (avail == 0) ||
12443
5.06M
            (xmlParseCheckTransition(ctxt,
12444
4.26M
                       (const char *)&ctxt->input->base[old_avail],
12445
4.26M
                                     avail - old_avail)))
12446
3.76M
            xmlParseTryOrFinish(ctxt, terminate);
12447
5.06M
    }
12448
5.09M
    if (ctxt->instate == XML_PARSER_EOF)
12449
224k
        return(ctxt->errNo);
12450
12451
4.86M
    if ((ctxt->input != NULL) &&
12452
4.86M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12453
4.86M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12454
4.86M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12455
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12456
0
        xmlHaltParser(ctxt);
12457
0
    }
12458
4.86M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12459
220k
        return(ctxt->errNo);
12460
12461
4.64M
    if (remain != 0) {
12462
21.3k
        chunk += size;
12463
21.3k
        size = remain;
12464
21.3k
        remain = 0;
12465
21.3k
        goto xmldecl_done;
12466
21.3k
    }
12467
4.62M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12468
4.62M
        (ctxt->input->buf != NULL)) {
12469
17.8k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12470
17.8k
           ctxt->input);
12471
17.8k
  size_t current = ctxt->input->cur - ctxt->input->base;
12472
12473
17.8k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12474
12475
17.8k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12476
17.8k
            base, current);
12477
17.8k
    }
12478
4.62M
    if (terminate) {
12479
  /*
12480
   * Check for termination
12481
   */
12482
210k
  int cur_avail = 0;
12483
12484
210k
  if (ctxt->input != NULL) {
12485
210k
      if (ctxt->input->buf == NULL)
12486
0
    cur_avail = ctxt->input->length -
12487
0
          (ctxt->input->cur - ctxt->input->base);
12488
210k
      else
12489
210k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12490
210k
                    (ctxt->input->cur - ctxt->input->base);
12491
210k
  }
12492
12493
210k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12494
210k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12495
150k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12496
150k
  }
12497
210k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12498
3.70k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12499
3.70k
  }
12500
210k
  if (ctxt->instate != XML_PARSER_EOF) {
12501
210k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12502
210k
    ctxt->sax->endDocument(ctxt->userData);
12503
210k
  }
12504
210k
  ctxt->instate = XML_PARSER_EOF;
12505
210k
    }
12506
4.62M
    if (ctxt->wellFormed == 0)
12507
1.32M
  return((xmlParserErrors) ctxt->errNo);
12508
3.29M
    else
12509
3.29M
        return(0);
12510
4.62M
}
12511
12512
/************************************************************************
12513
 *                  *
12514
 *    I/O front end functions to the parser     *
12515
 *                  *
12516
 ************************************************************************/
12517
12518
/**
12519
 * xmlCreatePushParserCtxt:
12520
 * @sax:  a SAX handler
12521
 * @user_data:  The user data returned on SAX callbacks
12522
 * @chunk:  a pointer to an array of chars
12523
 * @size:  number of chars in the array
12524
 * @filename:  an optional file name or URI
12525
 *
12526
 * Create a parser context for using the XML parser in push mode.
12527
 * If @buffer and @size are non-NULL, the data is used to detect
12528
 * the encoding.  The remaining characters will be parsed so they
12529
 * don't need to be fed in again through xmlParseChunk.
12530
 * To allow content encoding detection, @size should be >= 4
12531
 * The value of @filename is used for fetching external entities
12532
 * and error/warning reports.
12533
 *
12534
 * Returns the new parser context or NULL
12535
 */
12536
12537
xmlParserCtxtPtr
12538
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12539
727k
                        const char *chunk, int size, const char *filename) {
12540
727k
    xmlParserCtxtPtr ctxt;
12541
727k
    xmlParserInputPtr inputStream;
12542
727k
    xmlParserInputBufferPtr buf;
12543
727k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12544
12545
    /*
12546
     * plug some encoding conversion routines
12547
     */
12548
727k
    if ((chunk != NULL) && (size >= 4))
12549
347k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12550
12551
727k
    buf = xmlAllocParserInputBuffer(enc);
12552
727k
    if (buf == NULL) return(NULL);
12553
12554
727k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12555
727k
    if (ctxt == NULL) {
12556
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12557
0
  xmlFreeParserInputBuffer(buf);
12558
0
  return(NULL);
12559
0
    }
12560
727k
    ctxt->dictNames = 1;
12561
727k
    if (filename == NULL) {
12562
363k
  ctxt->directory = NULL;
12563
363k
    } else {
12564
363k
        ctxt->directory = xmlParserGetDirectory(filename);
12565
363k
    }
12566
12567
727k
    inputStream = xmlNewInputStream(ctxt);
12568
727k
    if (inputStream == NULL) {
12569
0
  xmlFreeParserCtxt(ctxt);
12570
0
  xmlFreeParserInputBuffer(buf);
12571
0
  return(NULL);
12572
0
    }
12573
12574
727k
    if (filename == NULL)
12575
363k
  inputStream->filename = NULL;
12576
363k
    else {
12577
363k
  inputStream->filename = (char *)
12578
363k
      xmlCanonicPath((const xmlChar *) filename);
12579
363k
  if (inputStream->filename == NULL) {
12580
0
      xmlFreeParserCtxt(ctxt);
12581
0
      xmlFreeParserInputBuffer(buf);
12582
0
      return(NULL);
12583
0
  }
12584
363k
    }
12585
727k
    inputStream->buf = buf;
12586
727k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12587
727k
    inputPush(ctxt, inputStream);
12588
12589
    /*
12590
     * If the caller didn't provide an initial 'chunk' for determining
12591
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12592
     * that it can be automatically determined later
12593
     */
12594
727k
    if ((size == 0) || (chunk == NULL)) {
12595
380k
  ctxt->charset = XML_CHAR_ENCODING_NONE;
12596
380k
    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12597
347k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12598
347k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12599
12600
347k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12601
12602
347k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12603
#ifdef DEBUG_PUSH
12604
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12605
#endif
12606
347k
    }
12607
12608
727k
    if (enc != XML_CHAR_ENCODING_NONE) {
12609
150k
        xmlSwitchEncoding(ctxt, enc);
12610
150k
    }
12611
12612
727k
    return(ctxt);
12613
727k
}
12614
#endif /* LIBXML_PUSH_ENABLED */
12615
12616
/**
12617
 * xmlHaltParser:
12618
 * @ctxt:  an XML parser context
12619
 *
12620
 * Blocks further parser processing don't override error
12621
 * for internal use
12622
 */
12623
static void
12624
3.86M
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12625
3.86M
    if (ctxt == NULL)
12626
0
        return;
12627
3.86M
    ctxt->instate = XML_PARSER_EOF;
12628
3.86M
    ctxt->disableSAX = 1;
12629
3.86M
    while (ctxt->inputNr > 1)
12630
3.36k
        xmlFreeInputStream(inputPop(ctxt));
12631
3.86M
    if (ctxt->input != NULL) {
12632
        /*
12633
   * in case there was a specific allocation deallocate before
12634
   * overriding base
12635
   */
12636
3.86M
        if (ctxt->input->free != NULL) {
12637
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12638
0
      ctxt->input->free = NULL;
12639
0
  }
12640
3.86M
        if (ctxt->input->buf != NULL) {
12641
3.72M
            xmlFreeParserInputBuffer(ctxt->input->buf);
12642
3.72M
            ctxt->input->buf = NULL;
12643
3.72M
        }
12644
3.86M
  ctxt->input->cur = BAD_CAST"";
12645
3.86M
        ctxt->input->length = 0;
12646
3.86M
  ctxt->input->base = ctxt->input->cur;
12647
3.86M
        ctxt->input->end = ctxt->input->cur;
12648
3.86M
    }
12649
3.86M
}
12650
12651
/**
12652
 * xmlStopParser:
12653
 * @ctxt:  an XML parser context
12654
 *
12655
 * Blocks further parser processing
12656
 */
12657
void
12658
364k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12659
364k
    if (ctxt == NULL)
12660
0
        return;
12661
364k
    xmlHaltParser(ctxt);
12662
364k
    ctxt->errNo = XML_ERR_USER_STOP;
12663
364k
}
12664
12665
/**
12666
 * xmlCreateIOParserCtxt:
12667
 * @sax:  a SAX handler
12668
 * @user_data:  The user data returned on SAX callbacks
12669
 * @ioread:  an I/O read function
12670
 * @ioclose:  an I/O close function
12671
 * @ioctx:  an I/O handler
12672
 * @enc:  the charset encoding if known
12673
 *
12674
 * Create a parser context for using the XML parser with an existing
12675
 * I/O stream
12676
 *
12677
 * Returns the new parser context or NULL
12678
 */
12679
xmlParserCtxtPtr
12680
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12681
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12682
0
  void *ioctx, xmlCharEncoding enc) {
12683
0
    xmlParserCtxtPtr ctxt;
12684
0
    xmlParserInputPtr inputStream;
12685
0
    xmlParserInputBufferPtr buf;
12686
12687
0
    if (ioread == NULL) return(NULL);
12688
12689
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12690
0
    if (buf == NULL) {
12691
0
        if (ioclose != NULL)
12692
0
            ioclose(ioctx);
12693
0
        return (NULL);
12694
0
    }
12695
12696
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12697
0
    if (ctxt == NULL) {
12698
0
  xmlFreeParserInputBuffer(buf);
12699
0
  return(NULL);
12700
0
    }
12701
12702
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12703
0
    if (inputStream == NULL) {
12704
0
  xmlFreeParserCtxt(ctxt);
12705
0
  return(NULL);
12706
0
    }
12707
0
    inputPush(ctxt, inputStream);
12708
12709
0
    return(ctxt);
12710
0
}
12711
12712
#ifdef LIBXML_VALID_ENABLED
12713
/************************************************************************
12714
 *                  *
12715
 *    Front ends when parsing a DTD       *
12716
 *                  *
12717
 ************************************************************************/
12718
12719
/**
12720
 * xmlIOParseDTD:
12721
 * @sax:  the SAX handler block or NULL
12722
 * @input:  an Input Buffer
12723
 * @enc:  the charset encoding if known
12724
 *
12725
 * Load and parse a DTD
12726
 *
12727
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12728
 * @input will be freed by the function in any case.
12729
 */
12730
12731
xmlDtdPtr
12732
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12733
0
        xmlCharEncoding enc) {
12734
0
    xmlDtdPtr ret = NULL;
12735
0
    xmlParserCtxtPtr ctxt;
12736
0
    xmlParserInputPtr pinput = NULL;
12737
0
    xmlChar start[4];
12738
12739
0
    if (input == NULL)
12740
0
  return(NULL);
12741
12742
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12743
0
    if (ctxt == NULL) {
12744
0
        xmlFreeParserInputBuffer(input);
12745
0
  return(NULL);
12746
0
    }
12747
12748
    /* We are loading a DTD */
12749
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12750
12751
0
    xmlDetectSAX2(ctxt);
12752
12753
    /*
12754
     * generate a parser input from the I/O handler
12755
     */
12756
12757
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12758
0
    if (pinput == NULL) {
12759
0
        xmlFreeParserInputBuffer(input);
12760
0
  xmlFreeParserCtxt(ctxt);
12761
0
  return(NULL);
12762
0
    }
12763
12764
    /*
12765
     * plug some encoding conversion routines here.
12766
     */
12767
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12768
0
  xmlFreeParserCtxt(ctxt);
12769
0
  return(NULL);
12770
0
    }
12771
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12772
0
        xmlSwitchEncoding(ctxt, enc);
12773
0
    }
12774
12775
0
    pinput->filename = NULL;
12776
0
    pinput->line = 1;
12777
0
    pinput->col = 1;
12778
0
    pinput->base = ctxt->input->cur;
12779
0
    pinput->cur = ctxt->input->cur;
12780
0
    pinput->free = NULL;
12781
12782
    /*
12783
     * let's parse that entity knowing it's an external subset.
12784
     */
12785
0
    ctxt->inSubset = 2;
12786
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12787
0
    if (ctxt->myDoc == NULL) {
12788
0
  xmlErrMemory(ctxt, "New Doc failed");
12789
0
  return(NULL);
12790
0
    }
12791
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12792
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793
0
                                 BAD_CAST "none", BAD_CAST "none");
12794
12795
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12796
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12797
  /*
12798
   * Get the 4 first bytes and decode the charset
12799
   * if enc != XML_CHAR_ENCODING_NONE
12800
   * plug some encoding conversion routines.
12801
   */
12802
0
  start[0] = RAW;
12803
0
  start[1] = NXT(1);
12804
0
  start[2] = NXT(2);
12805
0
  start[3] = NXT(3);
12806
0
  enc = xmlDetectCharEncoding(start, 4);
12807
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12808
0
      xmlSwitchEncoding(ctxt, enc);
12809
0
  }
12810
0
    }
12811
12812
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12813
12814
0
    if (ctxt->myDoc != NULL) {
12815
0
  if (ctxt->wellFormed) {
12816
0
      ret = ctxt->myDoc->extSubset;
12817
0
      ctxt->myDoc->extSubset = NULL;
12818
0
      if (ret != NULL) {
12819
0
    xmlNodePtr tmp;
12820
12821
0
    ret->doc = NULL;
12822
0
    tmp = ret->children;
12823
0
    while (tmp != NULL) {
12824
0
        tmp->doc = NULL;
12825
0
        tmp = tmp->next;
12826
0
    }
12827
0
      }
12828
0
  } else {
12829
0
      ret = NULL;
12830
0
  }
12831
0
        xmlFreeDoc(ctxt->myDoc);
12832
0
        ctxt->myDoc = NULL;
12833
0
    }
12834
0
    xmlFreeParserCtxt(ctxt);
12835
12836
0
    return(ret);
12837
0
}
12838
12839
/**
12840
 * xmlSAXParseDTD:
12841
 * @sax:  the SAX handler block
12842
 * @ExternalID:  a NAME* containing the External ID of the DTD
12843
 * @SystemID:  a NAME* containing the URL to the DTD
12844
 *
12845
 * DEPRECATED: Don't use.
12846
 *
12847
 * Load and parse an external subset.
12848
 *
12849
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12850
 */
12851
12852
xmlDtdPtr
12853
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12854
0
                          const xmlChar *SystemID) {
12855
0
    xmlDtdPtr ret = NULL;
12856
0
    xmlParserCtxtPtr ctxt;
12857
0
    xmlParserInputPtr input = NULL;
12858
0
    xmlCharEncoding enc;
12859
0
    xmlChar* systemIdCanonic;
12860
12861
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12862
12863
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12864
0
    if (ctxt == NULL) {
12865
0
  return(NULL);
12866
0
    }
12867
12868
    /* We are loading a DTD */
12869
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12870
12871
    /*
12872
     * Canonicalise the system ID
12873
     */
12874
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12875
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12876
0
  xmlFreeParserCtxt(ctxt);
12877
0
  return(NULL);
12878
0
    }
12879
12880
    /*
12881
     * Ask the Entity resolver to load the damn thing
12882
     */
12883
12884
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12885
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12886
0
                                   systemIdCanonic);
12887
0
    if (input == NULL) {
12888
0
  xmlFreeParserCtxt(ctxt);
12889
0
  if (systemIdCanonic != NULL)
12890
0
      xmlFree(systemIdCanonic);
12891
0
  return(NULL);
12892
0
    }
12893
12894
    /*
12895
     * plug some encoding conversion routines here.
12896
     */
12897
0
    if (xmlPushInput(ctxt, input) < 0) {
12898
0
  xmlFreeParserCtxt(ctxt);
12899
0
  if (systemIdCanonic != NULL)
12900
0
      xmlFree(systemIdCanonic);
12901
0
  return(NULL);
12902
0
    }
12903
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12904
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12905
0
  xmlSwitchEncoding(ctxt, enc);
12906
0
    }
12907
12908
0
    if (input->filename == NULL)
12909
0
  input->filename = (char *) systemIdCanonic;
12910
0
    else
12911
0
  xmlFree(systemIdCanonic);
12912
0
    input->line = 1;
12913
0
    input->col = 1;
12914
0
    input->base = ctxt->input->cur;
12915
0
    input->cur = ctxt->input->cur;
12916
0
    input->free = NULL;
12917
12918
    /*
12919
     * let's parse that entity knowing it's an external subset.
12920
     */
12921
0
    ctxt->inSubset = 2;
12922
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12923
0
    if (ctxt->myDoc == NULL) {
12924
0
  xmlErrMemory(ctxt, "New Doc failed");
12925
0
  xmlFreeParserCtxt(ctxt);
12926
0
  return(NULL);
12927
0
    }
12928
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12929
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12930
0
                                 ExternalID, SystemID);
12931
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12932
12933
0
    if (ctxt->myDoc != NULL) {
12934
0
  if (ctxt->wellFormed) {
12935
0
      ret = ctxt->myDoc->extSubset;
12936
0
      ctxt->myDoc->extSubset = NULL;
12937
0
      if (ret != NULL) {
12938
0
    xmlNodePtr tmp;
12939
12940
0
    ret->doc = NULL;
12941
0
    tmp = ret->children;
12942
0
    while (tmp != NULL) {
12943
0
        tmp->doc = NULL;
12944
0
        tmp = tmp->next;
12945
0
    }
12946
0
      }
12947
0
  } else {
12948
0
      ret = NULL;
12949
0
  }
12950
0
        xmlFreeDoc(ctxt->myDoc);
12951
0
        ctxt->myDoc = NULL;
12952
0
    }
12953
0
    xmlFreeParserCtxt(ctxt);
12954
12955
0
    return(ret);
12956
0
}
12957
12958
12959
/**
12960
 * xmlParseDTD:
12961
 * @ExternalID:  a NAME* containing the External ID of the DTD
12962
 * @SystemID:  a NAME* containing the URL to the DTD
12963
 *
12964
 * Load and parse an external subset.
12965
 *
12966
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12967
 */
12968
12969
xmlDtdPtr
12970
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12971
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12972
0
}
12973
#endif /* LIBXML_VALID_ENABLED */
12974
12975
/************************************************************************
12976
 *                  *
12977
 *    Front ends when parsing an Entity     *
12978
 *                  *
12979
 ************************************************************************/
12980
12981
/**
12982
 * xmlParseCtxtExternalEntity:
12983
 * @ctx:  the existing parsing context
12984
 * @URL:  the URL for the entity to load
12985
 * @ID:  the System ID for the entity to load
12986
 * @lst:  the return value for the set of parsed nodes
12987
 *
12988
 * Parse an external general entity within an existing parsing context
12989
 * An external general parsed entity is well-formed if it matches the
12990
 * production labeled extParsedEnt.
12991
 *
12992
 * [78] extParsedEnt ::= TextDecl? content
12993
 *
12994
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12995
 *    the parser error code otherwise
12996
 */
12997
12998
int
12999
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13000
0
                 const xmlChar *ID, xmlNodePtr *lst) {
13001
0
    void *userData;
13002
13003
0
    if (ctx == NULL) return(-1);
13004
    /*
13005
     * If the user provided their own SAX callbacks, then reuse the
13006
     * userData callback field, otherwise the expected setup in a
13007
     * DOM builder is to have userData == ctxt
13008
     */
13009
0
    if (ctx->userData == ctx)
13010
0
        userData = NULL;
13011
0
    else
13012
0
        userData = ctx->userData;
13013
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
13014
0
                                         userData, ctx->depth + 1,
13015
0
                                         URL, ID, lst);
13016
0
}
13017
13018
/**
13019
 * xmlParseExternalEntityPrivate:
13020
 * @doc:  the document the chunk pertains to
13021
 * @oldctxt:  the previous parser context if available
13022
 * @sax:  the SAX handler block (possibly NULL)
13023
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13024
 * @depth:  Used for loop detection, use 0
13025
 * @URL:  the URL for the entity to load
13026
 * @ID:  the System ID for the entity to load
13027
 * @list:  the return value for the set of parsed nodes
13028
 *
13029
 * Private version of xmlParseExternalEntity()
13030
 *
13031
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13032
 *    the parser error code otherwise
13033
 */
13034
13035
static xmlParserErrors
13036
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13037
                xmlSAXHandlerPtr sax,
13038
          void *user_data, int depth, const xmlChar *URL,
13039
3.23M
          const xmlChar *ID, xmlNodePtr *list) {
13040
3.23M
    xmlParserCtxtPtr ctxt;
13041
3.23M
    xmlDocPtr newDoc;
13042
3.23M
    xmlNodePtr newRoot;
13043
3.23M
    xmlParserErrors ret = XML_ERR_OK;
13044
3.23M
    xmlChar start[4];
13045
3.23M
    xmlCharEncoding enc;
13046
13047
3.23M
    if (((depth > 40) &&
13048
3.23M
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13049
3.23M
  (depth > 1024)) {
13050
4.47k
  return(XML_ERR_ENTITY_LOOP);
13051
4.47k
    }
13052
13053
3.23M
    if (list != NULL)
13054
3.22M
        *list = NULL;
13055
3.23M
    if ((URL == NULL) && (ID == NULL))
13056
323
  return(XML_ERR_INTERNAL_ERROR);
13057
3.23M
    if (doc == NULL)
13058
0
  return(XML_ERR_INTERNAL_ERROR);
13059
13060
3.23M
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
13061
3.23M
                                             oldctxt);
13062
3.23M
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13063
3.21M
    xmlDetectSAX2(ctxt);
13064
13065
3.21M
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13066
3.21M
    if (newDoc == NULL) {
13067
0
  xmlFreeParserCtxt(ctxt);
13068
0
  return(XML_ERR_INTERNAL_ERROR);
13069
0
    }
13070
3.21M
    newDoc->properties = XML_DOC_INTERNAL;
13071
3.21M
    if (doc) {
13072
3.21M
        newDoc->intSubset = doc->intSubset;
13073
3.21M
        newDoc->extSubset = doc->extSubset;
13074
3.21M
        if (doc->dict) {
13075
2.22M
            newDoc->dict = doc->dict;
13076
2.22M
            xmlDictReference(newDoc->dict);
13077
2.22M
        }
13078
3.21M
        if (doc->URL != NULL) {
13079
2.00M
            newDoc->URL = xmlStrdup(doc->URL);
13080
2.00M
        }
13081
3.21M
    }
13082
3.21M
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13083
3.21M
    if (newRoot == NULL) {
13084
0
  if (sax != NULL)
13085
0
  xmlFreeParserCtxt(ctxt);
13086
0
  newDoc->intSubset = NULL;
13087
0
  newDoc->extSubset = NULL;
13088
0
        xmlFreeDoc(newDoc);
13089
0
  return(XML_ERR_INTERNAL_ERROR);
13090
0
    }
13091
3.21M
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13092
3.21M
    nodePush(ctxt, newDoc->children);
13093
3.21M
    if (doc == NULL) {
13094
0
        ctxt->myDoc = newDoc;
13095
3.21M
    } else {
13096
3.21M
        ctxt->myDoc = doc;
13097
3.21M
        newRoot->doc = doc;
13098
3.21M
    }
13099
13100
    /*
13101
     * Get the 4 first bytes and decode the charset
13102
     * if enc != XML_CHAR_ENCODING_NONE
13103
     * plug some encoding conversion routines.
13104
     */
13105
3.21M
    GROW;
13106
3.21M
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13107
3.21M
  start[0] = RAW;
13108
3.21M
  start[1] = NXT(1);
13109
3.21M
  start[2] = NXT(2);
13110
3.21M
  start[3] = NXT(3);
13111
3.21M
  enc = xmlDetectCharEncoding(start, 4);
13112
3.21M
  if (enc != XML_CHAR_ENCODING_NONE) {
13113
3.34k
      xmlSwitchEncoding(ctxt, enc);
13114
3.34k
  }
13115
3.21M
    }
13116
13117
    /*
13118
     * Parse a possible text declaration first
13119
     */
13120
3.21M
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13121
2.22k
  xmlParseTextDecl(ctxt);
13122
        /*
13123
         * An XML-1.0 document can't reference an entity not XML-1.0
13124
         */
13125
2.22k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13126
2.22k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13127
217
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13128
217
                           "Version mismatch between document and entity\n");
13129
217
        }
13130
2.22k
    }
13131
13132
3.21M
    ctxt->instate = XML_PARSER_CONTENT;
13133
3.21M
    ctxt->depth = depth;
13134
3.21M
    if (oldctxt != NULL) {
13135
3.21M
  ctxt->_private = oldctxt->_private;
13136
3.21M
  ctxt->loadsubset = oldctxt->loadsubset;
13137
3.21M
  ctxt->validate = oldctxt->validate;
13138
3.21M
  ctxt->valid = oldctxt->valid;
13139
3.21M
  ctxt->replaceEntities = oldctxt->replaceEntities;
13140
3.21M
        if (oldctxt->validate) {
13141
1.75M
            ctxt->vctxt.error = oldctxt->vctxt.error;
13142
1.75M
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
13143
1.75M
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
13144
1.75M
        }
13145
3.21M
  ctxt->external = oldctxt->external;
13146
3.21M
        if (ctxt->dict) xmlDictFree(ctxt->dict);
13147
3.21M
        ctxt->dict = oldctxt->dict;
13148
3.21M
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13149
3.21M
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13150
3.21M
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13151
3.21M
        ctxt->dictNames = oldctxt->dictNames;
13152
3.21M
        ctxt->attsDefault = oldctxt->attsDefault;
13153
3.21M
        ctxt->attsSpecial = oldctxt->attsSpecial;
13154
3.21M
        ctxt->linenumbers = oldctxt->linenumbers;
13155
3.21M
  ctxt->record_info = oldctxt->record_info;
13156
3.21M
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13157
3.21M
  ctxt->node_seq.length = oldctxt->node_seq.length;
13158
3.21M
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13159
3.21M
    } else {
13160
  /*
13161
   * Doing validity checking on chunk without context
13162
   * doesn't make sense
13163
   */
13164
0
  ctxt->_private = NULL;
13165
0
  ctxt->validate = 0;
13166
0
  ctxt->external = 2;
13167
0
  ctxt->loadsubset = 0;
13168
0
    }
13169
13170
3.21M
    xmlParseContent(ctxt);
13171
13172
3.21M
    if ((RAW == '<') && (NXT(1) == '/')) {
13173
542
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13174
3.21M
    } else if (RAW != 0) {
13175
4
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13176
4
    }
13177
3.21M
    if (ctxt->node != newDoc->children) {
13178
2.99M
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13179
2.99M
    }
13180
13181
3.21M
    if (!ctxt->wellFormed) {
13182
3.20M
        if (ctxt->errNo == 0)
13183
0
      ret = XML_ERR_INTERNAL_ERROR;
13184
3.20M
  else
13185
3.20M
      ret = (xmlParserErrors)ctxt->errNo;
13186
3.20M
    } else {
13187
8.59k
  if (list != NULL) {
13188
6.53k
      xmlNodePtr cur;
13189
13190
      /*
13191
       * Return the newly created nodeset after unlinking it from
13192
       * they pseudo parent.
13193
       */
13194
6.53k
      cur = newDoc->children->children;
13195
6.53k
      *list = cur;
13196
10.4k
      while (cur != NULL) {
13197
3.91k
    cur->parent = NULL;
13198
3.91k
    cur = cur->next;
13199
3.91k
      }
13200
6.53k
            newDoc->children->children = NULL;
13201
6.53k
  }
13202
8.59k
  ret = XML_ERR_OK;
13203
8.59k
    }
13204
13205
    /*
13206
     * Record in the parent context the number of entities replacement
13207
     * done when parsing that reference.
13208
     */
13209
3.21M
    if (oldctxt != NULL)
13210
3.21M
        oldctxt->nbentities += ctxt->nbentities;
13211
13212
    /*
13213
     * Also record the size of the entity parsed
13214
     */
13215
3.21M
    if (ctxt->input != NULL && oldctxt != NULL) {
13216
3.21M
  oldctxt->sizeentities += ctxt->input->consumed;
13217
3.21M
  oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13218
3.21M
    }
13219
    /*
13220
     * And record the last error if any
13221
     */
13222
3.21M
    if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13223
3.20M
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13224
13225
3.21M
    if (oldctxt != NULL) {
13226
3.21M
        ctxt->dict = NULL;
13227
3.21M
        ctxt->attsDefault = NULL;
13228
3.21M
        ctxt->attsSpecial = NULL;
13229
3.21M
        oldctxt->validate = ctxt->validate;
13230
3.21M
        oldctxt->valid = ctxt->valid;
13231
3.21M
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13232
3.21M
        oldctxt->node_seq.length = ctxt->node_seq.length;
13233
3.21M
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13234
3.21M
    }
13235
3.21M
    ctxt->node_seq.maximum = 0;
13236
3.21M
    ctxt->node_seq.length = 0;
13237
3.21M
    ctxt->node_seq.buffer = NULL;
13238
3.21M
    xmlFreeParserCtxt(ctxt);
13239
3.21M
    newDoc->intSubset = NULL;
13240
3.21M
    newDoc->extSubset = NULL;
13241
3.21M
    xmlFreeDoc(newDoc);
13242
13243
3.21M
    return(ret);
13244
3.21M
}
13245
13246
#ifdef LIBXML_SAX1_ENABLED
13247
/**
13248
 * xmlParseExternalEntity:
13249
 * @doc:  the document the chunk pertains to
13250
 * @sax:  the SAX handler block (possibly NULL)
13251
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13252
 * @depth:  Used for loop detection, use 0
13253
 * @URL:  the URL for the entity to load
13254
 * @ID:  the System ID for the entity to load
13255
 * @lst:  the return value for the set of parsed nodes
13256
 *
13257
 * Parse an external general entity
13258
 * An external general parsed entity is well-formed if it matches the
13259
 * production labeled extParsedEnt.
13260
 *
13261
 * [78] extParsedEnt ::= TextDecl? content
13262
 *
13263
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13264
 *    the parser error code otherwise
13265
 */
13266
13267
int
13268
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13269
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13270
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13271
0
                           ID, lst));
13272
0
}
13273
13274
/**
13275
 * xmlParseBalancedChunkMemory:
13276
 * @doc:  the document the chunk pertains to (must not be NULL)
13277
 * @sax:  the SAX handler block (possibly NULL)
13278
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13279
 * @depth:  Used for loop detection, use 0
13280
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13281
 * @lst:  the return value for the set of parsed nodes
13282
 *
13283
 * Parse a well-balanced chunk of an XML document
13284
 * called by the parser
13285
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13286
 * the content production in the XML grammar:
13287
 *
13288
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13289
 *
13290
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13291
 *    the parser error code otherwise
13292
 */
13293
13294
int
13295
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13296
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13297
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13298
0
                                                depth, string, lst, 0 );
13299
0
}
13300
#endif /* LIBXML_SAX1_ENABLED */
13301
13302
/**
13303
 * xmlParseBalancedChunkMemoryInternal:
13304
 * @oldctxt:  the existing parsing context
13305
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13306
 * @user_data:  the user data field for the parser context
13307
 * @lst:  the return value for the set of parsed nodes
13308
 *
13309
 *
13310
 * Parse a well-balanced chunk of an XML document
13311
 * called by the parser
13312
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13313
 * the content production in the XML grammar:
13314
 *
13315
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13316
 *
13317
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13318
 * error code otherwise
13319
 *
13320
 * In case recover is set to 1, the nodelist will not be empty even if
13321
 * the parsed chunk is not well balanced.
13322
 */
13323
static xmlParserErrors
13324
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13325
62.0k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13326
62.0k
    xmlParserCtxtPtr ctxt;
13327
62.0k
    xmlDocPtr newDoc = NULL;
13328
62.0k
    xmlNodePtr newRoot;
13329
62.0k
    xmlSAXHandlerPtr oldsax = NULL;
13330
62.0k
    xmlNodePtr content = NULL;
13331
62.0k
    xmlNodePtr last = NULL;
13332
62.0k
    int size;
13333
62.0k
    xmlParserErrors ret = XML_ERR_OK;
13334
62.0k
#ifdef SAX2
13335
62.0k
    int i;
13336
62.0k
#endif
13337
13338
62.0k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13339
62.0k
        (oldctxt->depth >  1024)) {
13340
482
  return(XML_ERR_ENTITY_LOOP);
13341
482
    }
13342
13343
13344
61.5k
    if (lst != NULL)
13345
61.3k
        *lst = NULL;
13346
61.5k
    if (string == NULL)
13347
154
        return(XML_ERR_INTERNAL_ERROR);
13348
13349
61.4k
    size = xmlStrlen(string);
13350
13351
61.4k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13352
61.4k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13353
60.8k
    if (user_data != NULL)
13354
0
  ctxt->userData = user_data;
13355
60.8k
    else
13356
60.8k
  ctxt->userData = ctxt;
13357
60.8k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13358
60.8k
    ctxt->dict = oldctxt->dict;
13359
60.8k
    ctxt->input_id = oldctxt->input_id + 1;
13360
60.8k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13361
60.8k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13362
60.8k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13363
13364
60.8k
#ifdef SAX2
13365
    /* propagate namespaces down the entity */
13366
459k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13367
398k
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13368
398k
    }
13369
60.8k
#endif
13370
13371
60.8k
    oldsax = ctxt->sax;
13372
60.8k
    ctxt->sax = oldctxt->sax;
13373
60.8k
    xmlDetectSAX2(ctxt);
13374
60.8k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13375
60.8k
    ctxt->options = oldctxt->options;
13376
13377
60.8k
    ctxt->_private = oldctxt->_private;
13378
60.8k
    if (oldctxt->myDoc == NULL) {
13379
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13380
0
  if (newDoc == NULL) {
13381
0
      ctxt->sax = oldsax;
13382
0
      ctxt->dict = NULL;
13383
0
      xmlFreeParserCtxt(ctxt);
13384
0
      return(XML_ERR_INTERNAL_ERROR);
13385
0
  }
13386
0
  newDoc->properties = XML_DOC_INTERNAL;
13387
0
  newDoc->dict = ctxt->dict;
13388
0
  xmlDictReference(newDoc->dict);
13389
0
  ctxt->myDoc = newDoc;
13390
60.8k
    } else {
13391
60.8k
  ctxt->myDoc = oldctxt->myDoc;
13392
60.8k
        content = ctxt->myDoc->children;
13393
60.8k
  last = ctxt->myDoc->last;
13394
60.8k
    }
13395
60.8k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13396
60.8k
    if (newRoot == NULL) {
13397
0
  ctxt->sax = oldsax;
13398
0
  ctxt->dict = NULL;
13399
0
  xmlFreeParserCtxt(ctxt);
13400
0
  if (newDoc != NULL) {
13401
0
      xmlFreeDoc(newDoc);
13402
0
  }
13403
0
  return(XML_ERR_INTERNAL_ERROR);
13404
0
    }
13405
60.8k
    ctxt->myDoc->children = NULL;
13406
60.8k
    ctxt->myDoc->last = NULL;
13407
60.8k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13408
60.8k
    nodePush(ctxt, ctxt->myDoc->children);
13409
60.8k
    ctxt->instate = XML_PARSER_CONTENT;
13410
60.8k
    ctxt->depth = oldctxt->depth + 1;
13411
13412
60.8k
    ctxt->validate = 0;
13413
60.8k
    ctxt->loadsubset = oldctxt->loadsubset;
13414
60.8k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13415
  /*
13416
   * ID/IDREF registration will be done in xmlValidateElement below
13417
   */
13418
45.0k
  ctxt->loadsubset |= XML_SKIP_IDS;
13419
45.0k
    }
13420
60.8k
    ctxt->dictNames = oldctxt->dictNames;
13421
60.8k
    ctxt->attsDefault = oldctxt->attsDefault;
13422
60.8k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13423
13424
60.8k
    xmlParseContent(ctxt);
13425
60.8k
    if ((RAW == '<') && (NXT(1) == '/')) {
13426
254
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13427
60.6k
    } else if (RAW != 0) {
13428
6
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13429
6
    }
13430
60.8k
    if (ctxt->node != ctxt->myDoc->children) {
13431
11.7k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13432
11.7k
    }
13433
13434
60.8k
    if (!ctxt->wellFormed) {
13435
19.7k
        if (ctxt->errNo == 0)
13436
0
      ret = XML_ERR_INTERNAL_ERROR;
13437
19.7k
  else
13438
19.7k
      ret = (xmlParserErrors)ctxt->errNo;
13439
41.1k
    } else {
13440
41.1k
      ret = XML_ERR_OK;
13441
41.1k
    }
13442
13443
60.8k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13444
41.1k
  xmlNodePtr cur;
13445
13446
  /*
13447
   * Return the newly created nodeset after unlinking it from
13448
   * they pseudo parent.
13449
   */
13450
41.1k
  cur = ctxt->myDoc->children->children;
13451
41.1k
  *lst = cur;
13452
91.5k
  while (cur != NULL) {
13453
50.3k
#ifdef LIBXML_VALID_ENABLED
13454
50.3k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13455
50.3k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13456
50.3k
    (cur->type == XML_ELEMENT_NODE)) {
13457
4.09k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13458
4.09k
      oldctxt->myDoc, cur);
13459
4.09k
      }
13460
50.3k
#endif /* LIBXML_VALID_ENABLED */
13461
50.3k
      cur->parent = NULL;
13462
50.3k
      cur = cur->next;
13463
50.3k
  }
13464
41.1k
  ctxt->myDoc->children->children = NULL;
13465
41.1k
    }
13466
60.8k
    if (ctxt->myDoc != NULL) {
13467
60.8k
  xmlFreeNode(ctxt->myDoc->children);
13468
60.8k
        ctxt->myDoc->children = content;
13469
60.8k
        ctxt->myDoc->last = last;
13470
60.8k
    }
13471
13472
    /*
13473
     * Record in the parent context the number of entities replacement
13474
     * done when parsing that reference.
13475
     */
13476
60.8k
    if (oldctxt != NULL)
13477
60.8k
        oldctxt->nbentities += ctxt->nbentities;
13478
13479
    /*
13480
     * Also record the last error if any
13481
     */
13482
60.8k
    if (ctxt->lastError.code != XML_ERR_OK)
13483
19.9k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13484
13485
60.8k
    ctxt->sax = oldsax;
13486
60.8k
    ctxt->dict = NULL;
13487
60.8k
    ctxt->attsDefault = NULL;
13488
60.8k
    ctxt->attsSpecial = NULL;
13489
60.8k
    xmlFreeParserCtxt(ctxt);
13490
60.8k
    if (newDoc != NULL) {
13491
0
  xmlFreeDoc(newDoc);
13492
0
    }
13493
13494
60.8k
    return(ret);
13495
60.8k
}
13496
13497
/**
13498
 * xmlParseInNodeContext:
13499
 * @node:  the context node
13500
 * @data:  the input string
13501
 * @datalen:  the input string length in bytes
13502
 * @options:  a combination of xmlParserOption
13503
 * @lst:  the return value for the set of parsed nodes
13504
 *
13505
 * Parse a well-balanced chunk of an XML document
13506
 * within the context (DTD, namespaces, etc ...) of the given node.
13507
 *
13508
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13509
 * the content production in the XML grammar:
13510
 *
13511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13512
 *
13513
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13514
 * error code otherwise
13515
 */
13516
xmlParserErrors
13517
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13518
0
                      int options, xmlNodePtr *lst) {
13519
0
#ifdef SAX2
13520
0
    xmlParserCtxtPtr ctxt;
13521
0
    xmlDocPtr doc = NULL;
13522
0
    xmlNodePtr fake, cur;
13523
0
    int nsnr = 0;
13524
13525
0
    xmlParserErrors ret = XML_ERR_OK;
13526
13527
    /*
13528
     * check all input parameters, grab the document
13529
     */
13530
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13531
0
        return(XML_ERR_INTERNAL_ERROR);
13532
0
    switch (node->type) {
13533
0
        case XML_ELEMENT_NODE:
13534
0
        case XML_ATTRIBUTE_NODE:
13535
0
        case XML_TEXT_NODE:
13536
0
        case XML_CDATA_SECTION_NODE:
13537
0
        case XML_ENTITY_REF_NODE:
13538
0
        case XML_PI_NODE:
13539
0
        case XML_COMMENT_NODE:
13540
0
        case XML_DOCUMENT_NODE:
13541
0
        case XML_HTML_DOCUMENT_NODE:
13542
0
      break;
13543
0
  default:
13544
0
      return(XML_ERR_INTERNAL_ERROR);
13545
13546
0
    }
13547
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13548
0
           (node->type != XML_DOCUMENT_NODE) &&
13549
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13550
0
  node = node->parent;
13551
0
    if (node == NULL)
13552
0
  return(XML_ERR_INTERNAL_ERROR);
13553
0
    if (node->type == XML_ELEMENT_NODE)
13554
0
  doc = node->doc;
13555
0
    else
13556
0
        doc = (xmlDocPtr) node;
13557
0
    if (doc == NULL)
13558
0
  return(XML_ERR_INTERNAL_ERROR);
13559
13560
    /*
13561
     * allocate a context and set-up everything not related to the
13562
     * node position in the tree
13563
     */
13564
0
    if (doc->type == XML_DOCUMENT_NODE)
13565
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13566
0
#ifdef LIBXML_HTML_ENABLED
13567
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13568
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13569
        /*
13570
         * When parsing in context, it makes no sense to add implied
13571
         * elements like html/body/etc...
13572
         */
13573
0
        options |= HTML_PARSE_NOIMPLIED;
13574
0
    }
13575
0
#endif
13576
0
    else
13577
0
        return(XML_ERR_INTERNAL_ERROR);
13578
13579
0
    if (ctxt == NULL)
13580
0
        return(XML_ERR_NO_MEMORY);
13581
13582
    /*
13583
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13584
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13585
     * we must wait until the last moment to free the original one.
13586
     */
13587
0
    if (doc->dict != NULL) {
13588
0
        if (ctxt->dict != NULL)
13589
0
      xmlDictFree(ctxt->dict);
13590
0
  ctxt->dict = doc->dict;
13591
0
    } else
13592
0
        options |= XML_PARSE_NODICT;
13593
13594
0
    if (doc->encoding != NULL) {
13595
0
        xmlCharEncodingHandlerPtr hdlr;
13596
13597
0
        if (ctxt->encoding != NULL)
13598
0
      xmlFree((xmlChar *) ctxt->encoding);
13599
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13600
13601
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13602
0
        if (hdlr != NULL) {
13603
0
            xmlSwitchToEncoding(ctxt, hdlr);
13604
0
  } else {
13605
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13606
0
        }
13607
0
    }
13608
13609
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13610
0
    xmlDetectSAX2(ctxt);
13611
0
    ctxt->myDoc = doc;
13612
    /* parsing in context, i.e. as within existing content */
13613
0
    ctxt->input_id = 2;
13614
0
    ctxt->instate = XML_PARSER_CONTENT;
13615
13616
0
    fake = xmlNewDocComment(node->doc, NULL);
13617
0
    if (fake == NULL) {
13618
0
        xmlFreeParserCtxt(ctxt);
13619
0
  return(XML_ERR_NO_MEMORY);
13620
0
    }
13621
0
    xmlAddChild(node, fake);
13622
13623
0
    if (node->type == XML_ELEMENT_NODE) {
13624
0
  nodePush(ctxt, node);
13625
  /*
13626
   * initialize the SAX2 namespaces stack
13627
   */
13628
0
  cur = node;
13629
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13630
0
      xmlNsPtr ns = cur->nsDef;
13631
0
      const xmlChar *iprefix, *ihref;
13632
13633
0
      while (ns != NULL) {
13634
0
    if (ctxt->dict) {
13635
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13636
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13637
0
    } else {
13638
0
        iprefix = ns->prefix;
13639
0
        ihref = ns->href;
13640
0
    }
13641
13642
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13643
0
        nsPush(ctxt, iprefix, ihref);
13644
0
        nsnr++;
13645
0
    }
13646
0
    ns = ns->next;
13647
0
      }
13648
0
      cur = cur->parent;
13649
0
  }
13650
0
    }
13651
13652
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13653
  /*
13654
   * ID/IDREF registration will be done in xmlValidateElement below
13655
   */
13656
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13657
0
    }
13658
13659
0
#ifdef LIBXML_HTML_ENABLED
13660
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13661
0
        __htmlParseContent(ctxt);
13662
0
    else
13663
0
#endif
13664
0
  xmlParseContent(ctxt);
13665
13666
0
    nsPop(ctxt, nsnr);
13667
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13668
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13669
0
    } else if (RAW != 0) {
13670
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13671
0
    }
13672
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13673
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13674
0
  ctxt->wellFormed = 0;
13675
0
    }
13676
13677
0
    if (!ctxt->wellFormed) {
13678
0
        if (ctxt->errNo == 0)
13679
0
      ret = XML_ERR_INTERNAL_ERROR;
13680
0
  else
13681
0
      ret = (xmlParserErrors)ctxt->errNo;
13682
0
    } else {
13683
0
        ret = XML_ERR_OK;
13684
0
    }
13685
13686
    /*
13687
     * Return the newly created nodeset after unlinking it from
13688
     * the pseudo sibling.
13689
     */
13690
13691
0
    cur = fake->next;
13692
0
    fake->next = NULL;
13693
0
    node->last = fake;
13694
13695
0
    if (cur != NULL) {
13696
0
  cur->prev = NULL;
13697
0
    }
13698
13699
0
    *lst = cur;
13700
13701
0
    while (cur != NULL) {
13702
0
  cur->parent = NULL;
13703
0
  cur = cur->next;
13704
0
    }
13705
13706
0
    xmlUnlinkNode(fake);
13707
0
    xmlFreeNode(fake);
13708
13709
13710
0
    if (ret != XML_ERR_OK) {
13711
0
        xmlFreeNodeList(*lst);
13712
0
  *lst = NULL;
13713
0
    }
13714
13715
0
    if (doc->dict != NULL)
13716
0
        ctxt->dict = NULL;
13717
0
    xmlFreeParserCtxt(ctxt);
13718
13719
0
    return(ret);
13720
#else /* !SAX2 */
13721
    return(XML_ERR_INTERNAL_ERROR);
13722
#endif
13723
0
}
13724
13725
#ifdef LIBXML_SAX1_ENABLED
13726
/**
13727
 * xmlParseBalancedChunkMemoryRecover:
13728
 * @doc:  the document the chunk pertains to (must not be NULL)
13729
 * @sax:  the SAX handler block (possibly NULL)
13730
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13731
 * @depth:  Used for loop detection, use 0
13732
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13733
 * @lst:  the return value for the set of parsed nodes
13734
 * @recover: return nodes even if the data is broken (use 0)
13735
 *
13736
 *
13737
 * Parse a well-balanced chunk of an XML document
13738
 * called by the parser
13739
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13740
 * the content production in the XML grammar:
13741
 *
13742
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13743
 *
13744
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13745
 *    the parser error code otherwise
13746
 *
13747
 * In case recover is set to 1, the nodelist will not be empty even if
13748
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13749
 * some extent.
13750
 */
13751
int
13752
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13753
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13754
0
     int recover) {
13755
0
    xmlParserCtxtPtr ctxt;
13756
0
    xmlDocPtr newDoc;
13757
0
    xmlSAXHandlerPtr oldsax = NULL;
13758
0
    xmlNodePtr content, newRoot;
13759
0
    int size;
13760
0
    int ret = 0;
13761
13762
0
    if (depth > 40) {
13763
0
  return(XML_ERR_ENTITY_LOOP);
13764
0
    }
13765
13766
13767
0
    if (lst != NULL)
13768
0
        *lst = NULL;
13769
0
    if (string == NULL)
13770
0
        return(-1);
13771
13772
0
    size = xmlStrlen(string);
13773
13774
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13775
0
    if (ctxt == NULL) return(-1);
13776
0
    ctxt->userData = ctxt;
13777
0
    if (sax != NULL) {
13778
0
  oldsax = ctxt->sax;
13779
0
        ctxt->sax = sax;
13780
0
  if (user_data != NULL)
13781
0
      ctxt->userData = user_data;
13782
0
    }
13783
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13784
0
    if (newDoc == NULL) {
13785
0
  xmlFreeParserCtxt(ctxt);
13786
0
  return(-1);
13787
0
    }
13788
0
    newDoc->properties = XML_DOC_INTERNAL;
13789
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13790
0
        xmlDictFree(ctxt->dict);
13791
0
  ctxt->dict = doc->dict;
13792
0
  xmlDictReference(ctxt->dict);
13793
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13794
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13795
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13796
0
  ctxt->dictNames = 1;
13797
0
    } else {
13798
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13799
0
    }
13800
    /* doc == NULL is only supported for historic reasons */
13801
0
    if (doc != NULL) {
13802
0
  newDoc->intSubset = doc->intSubset;
13803
0
  newDoc->extSubset = doc->extSubset;
13804
0
    }
13805
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13806
0
    if (newRoot == NULL) {
13807
0
  if (sax != NULL)
13808
0
      ctxt->sax = oldsax;
13809
0
  xmlFreeParserCtxt(ctxt);
13810
0
  newDoc->intSubset = NULL;
13811
0
  newDoc->extSubset = NULL;
13812
0
        xmlFreeDoc(newDoc);
13813
0
  return(-1);
13814
0
    }
13815
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13816
0
    nodePush(ctxt, newRoot);
13817
    /* doc == NULL is only supported for historic reasons */
13818
0
    if (doc == NULL) {
13819
0
  ctxt->myDoc = newDoc;
13820
0
    } else {
13821
0
  ctxt->myDoc = newDoc;
13822
0
  newDoc->children->doc = doc;
13823
  /* Ensure that doc has XML spec namespace */
13824
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13825
0
  newDoc->oldNs = doc->oldNs;
13826
0
    }
13827
0
    ctxt->instate = XML_PARSER_CONTENT;
13828
0
    ctxt->input_id = 2;
13829
0
    ctxt->depth = depth;
13830
13831
    /*
13832
     * Doing validity checking on chunk doesn't make sense
13833
     */
13834
0
    ctxt->validate = 0;
13835
0
    ctxt->loadsubset = 0;
13836
0
    xmlDetectSAX2(ctxt);
13837
13838
0
    if ( doc != NULL ){
13839
0
        content = doc->children;
13840
0
        doc->children = NULL;
13841
0
        xmlParseContent(ctxt);
13842
0
        doc->children = content;
13843
0
    }
13844
0
    else {
13845
0
        xmlParseContent(ctxt);
13846
0
    }
13847
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13848
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13849
0
    } else if (RAW != 0) {
13850
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13851
0
    }
13852
0
    if (ctxt->node != newDoc->children) {
13853
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13854
0
    }
13855
13856
0
    if (!ctxt->wellFormed) {
13857
0
        if (ctxt->errNo == 0)
13858
0
      ret = 1;
13859
0
  else
13860
0
      ret = ctxt->errNo;
13861
0
    } else {
13862
0
      ret = 0;
13863
0
    }
13864
13865
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13866
0
  xmlNodePtr cur;
13867
13868
  /*
13869
   * Return the newly created nodeset after unlinking it from
13870
   * they pseudo parent.
13871
   */
13872
0
  cur = newDoc->children->children;
13873
0
  *lst = cur;
13874
0
  while (cur != NULL) {
13875
0
      xmlSetTreeDoc(cur, doc);
13876
0
      cur->parent = NULL;
13877
0
      cur = cur->next;
13878
0
  }
13879
0
  newDoc->children->children = NULL;
13880
0
    }
13881
13882
0
    if (sax != NULL)
13883
0
  ctxt->sax = oldsax;
13884
0
    xmlFreeParserCtxt(ctxt);
13885
0
    newDoc->intSubset = NULL;
13886
0
    newDoc->extSubset = NULL;
13887
    /* This leaks the namespace list if doc == NULL */
13888
0
    newDoc->oldNs = NULL;
13889
0
    xmlFreeDoc(newDoc);
13890
13891
0
    return(ret);
13892
0
}
13893
13894
/**
13895
 * xmlSAXParseEntity:
13896
 * @sax:  the SAX handler block
13897
 * @filename:  the filename
13898
 *
13899
 * DEPRECATED: Don't use.
13900
 *
13901
 * parse an XML external entity out of context and build a tree.
13902
 * It use the given SAX function block to handle the parsing callback.
13903
 * If sax is NULL, fallback to the default DOM tree building routines.
13904
 *
13905
 * [78] extParsedEnt ::= TextDecl? content
13906
 *
13907
 * This correspond to a "Well Balanced" chunk
13908
 *
13909
 * Returns the resulting document tree
13910
 */
13911
13912
xmlDocPtr
13913
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13914
0
    xmlDocPtr ret;
13915
0
    xmlParserCtxtPtr ctxt;
13916
13917
0
    ctxt = xmlCreateFileParserCtxt(filename);
13918
0
    if (ctxt == NULL) {
13919
0
  return(NULL);
13920
0
    }
13921
0
    if (sax != NULL) {
13922
0
  if (ctxt->sax != NULL)
13923
0
      xmlFree(ctxt->sax);
13924
0
        ctxt->sax = sax;
13925
0
        ctxt->userData = NULL;
13926
0
    }
13927
13928
0
    xmlParseExtParsedEnt(ctxt);
13929
13930
0
    if (ctxt->wellFormed)
13931
0
  ret = ctxt->myDoc;
13932
0
    else {
13933
0
        ret = NULL;
13934
0
        xmlFreeDoc(ctxt->myDoc);
13935
0
        ctxt->myDoc = NULL;
13936
0
    }
13937
0
    if (sax != NULL)
13938
0
        ctxt->sax = NULL;
13939
0
    xmlFreeParserCtxt(ctxt);
13940
13941
0
    return(ret);
13942
0
}
13943
13944
/**
13945
 * xmlParseEntity:
13946
 * @filename:  the filename
13947
 *
13948
 * parse an XML external entity out of context and build a tree.
13949
 *
13950
 * [78] extParsedEnt ::= TextDecl? content
13951
 *
13952
 * This correspond to a "Well Balanced" chunk
13953
 *
13954
 * Returns the resulting document tree
13955
 */
13956
13957
xmlDocPtr
13958
0
xmlParseEntity(const char *filename) {
13959
0
    return(xmlSAXParseEntity(NULL, filename));
13960
0
}
13961
#endif /* LIBXML_SAX1_ENABLED */
13962
13963
/**
13964
 * xmlCreateEntityParserCtxtInternal:
13965
 * @URL:  the entity URL
13966
 * @ID:  the entity PUBLIC ID
13967
 * @base:  a possible base for the target URI
13968
 * @pctx:  parser context used to set options on new context
13969
 *
13970
 * Create a parser context for an external entity
13971
 * Automatic support for ZLIB/Compress compressed document is provided
13972
 * by default if found at compile-time.
13973
 *
13974
 * Returns the new parser context or NULL
13975
 */
13976
static xmlParserCtxtPtr
13977
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13978
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13979
3.23M
        xmlParserCtxtPtr pctx) {
13980
3.23M
    xmlParserCtxtPtr ctxt;
13981
3.23M
    xmlParserInputPtr inputStream;
13982
3.23M
    char *directory = NULL;
13983
3.23M
    xmlChar *uri;
13984
13985
3.23M
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13986
3.23M
    if (ctxt == NULL) {
13987
0
  return(NULL);
13988
0
    }
13989
13990
3.23M
    if (pctx != NULL) {
13991
3.23M
        ctxt->options = pctx->options;
13992
3.23M
        ctxt->_private = pctx->_private;
13993
  /*
13994
   * this is a subparser of pctx, so the input_id should be
13995
   * incremented to distinguish from main entity
13996
   */
13997
3.23M
  ctxt->input_id = pctx->input_id + 1;
13998
3.23M
    }
13999
14000
    /* Don't read from stdin. */
14001
3.23M
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
14002
4
        URL = BAD_CAST "./-";
14003
14004
3.23M
    uri = xmlBuildURI(URL, base);
14005
14006
3.23M
    if (uri == NULL) {
14007
5.63k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14008
5.63k
  if (inputStream == NULL) {
14009
5.58k
      xmlFreeParserCtxt(ctxt);
14010
5.58k
      return(NULL);
14011
5.58k
  }
14012
14013
52
  inputPush(ctxt, inputStream);
14014
14015
52
  if ((ctxt->directory == NULL) && (directory == NULL))
14016
52
      directory = xmlParserGetDirectory((char *)URL);
14017
52
  if ((ctxt->directory == NULL) && (directory != NULL))
14018
52
      ctxt->directory = directory;
14019
3.22M
    } else {
14020
3.22M
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14021
3.22M
  if (inputStream == NULL) {
14022
14.8k
      xmlFree(uri);
14023
14.8k
      xmlFreeParserCtxt(ctxt);
14024
14.8k
      return(NULL);
14025
14.8k
  }
14026
14027
3.21M
  inputPush(ctxt, inputStream);
14028
14029
3.21M
  if ((ctxt->directory == NULL) && (directory == NULL))
14030
3.21M
      directory = xmlParserGetDirectory((char *)uri);
14031
3.21M
  if ((ctxt->directory == NULL) && (directory != NULL))
14032
3.21M
      ctxt->directory = directory;
14033
3.21M
  xmlFree(uri);
14034
3.21M
    }
14035
3.21M
    return(ctxt);
14036
3.23M
}
14037
14038
/**
14039
 * xmlCreateEntityParserCtxt:
14040
 * @URL:  the entity URL
14041
 * @ID:  the entity PUBLIC ID
14042
 * @base:  a possible base for the target URI
14043
 *
14044
 * Create a parser context for an external entity
14045
 * Automatic support for ZLIB/Compress compressed document is provided
14046
 * by default if found at compile-time.
14047
 *
14048
 * Returns the new parser context or NULL
14049
 */
14050
xmlParserCtxtPtr
14051
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14052
0
                    const xmlChar *base) {
14053
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
14054
14055
0
}
14056
14057
/************************************************************************
14058
 *                  *
14059
 *    Front ends when parsing from a file     *
14060
 *                  *
14061
 ************************************************************************/
14062
14063
/**
14064
 * xmlCreateURLParserCtxt:
14065
 * @filename:  the filename or URL
14066
 * @options:  a combination of xmlParserOption
14067
 *
14068
 * Create a parser context for a file or URL content.
14069
 * Automatic support for ZLIB/Compress compressed document is provided
14070
 * by default if found at compile-time and for file accesses
14071
 *
14072
 * Returns the new parser context or NULL
14073
 */
14074
xmlParserCtxtPtr
14075
xmlCreateURLParserCtxt(const char *filename, int options)
14076
0
{
14077
0
    xmlParserCtxtPtr ctxt;
14078
0
    xmlParserInputPtr inputStream;
14079
0
    char *directory = NULL;
14080
14081
0
    ctxt = xmlNewParserCtxt();
14082
0
    if (ctxt == NULL) {
14083
0
  xmlErrMemory(NULL, "cannot allocate parser context");
14084
0
  return(NULL);
14085
0
    }
14086
14087
0
    if (options)
14088
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14089
0
    ctxt->linenumbers = 1;
14090
14091
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14092
0
    if (inputStream == NULL) {
14093
0
  xmlFreeParserCtxt(ctxt);
14094
0
  return(NULL);
14095
0
    }
14096
14097
0
    inputPush(ctxt, inputStream);
14098
0
    if ((ctxt->directory == NULL) && (directory == NULL))
14099
0
        directory = xmlParserGetDirectory(filename);
14100
0
    if ((ctxt->directory == NULL) && (directory != NULL))
14101
0
        ctxt->directory = directory;
14102
14103
0
    return(ctxt);
14104
0
}
14105
14106
/**
14107
 * xmlCreateFileParserCtxt:
14108
 * @filename:  the filename
14109
 *
14110
 * Create a parser context for a file content.
14111
 * Automatic support for ZLIB/Compress compressed document is provided
14112
 * by default if found at compile-time.
14113
 *
14114
 * Returns the new parser context or NULL
14115
 */
14116
xmlParserCtxtPtr
14117
xmlCreateFileParserCtxt(const char *filename)
14118
0
{
14119
0
    return(xmlCreateURLParserCtxt(filename, 0));
14120
0
}
14121
14122
#ifdef LIBXML_SAX1_ENABLED
14123
/**
14124
 * xmlSAXParseFileWithData:
14125
 * @sax:  the SAX handler block
14126
 * @filename:  the filename
14127
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14128
 *             documents
14129
 * @data:  the userdata
14130
 *
14131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14132
 *
14133
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14134
 * compressed document is provided by default if found at compile-time.
14135
 * It use the given SAX function block to handle the parsing callback.
14136
 * If sax is NULL, fallback to the default DOM tree building routines.
14137
 *
14138
 * User data (void *) is stored within the parser context in the
14139
 * context's _private member, so it is available nearly everywhere in libxml
14140
 *
14141
 * Returns the resulting document tree
14142
 */
14143
14144
xmlDocPtr
14145
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14146
0
                        int recovery, void *data) {
14147
0
    xmlDocPtr ret;
14148
0
    xmlParserCtxtPtr ctxt;
14149
14150
0
    xmlInitParser();
14151
14152
0
    ctxt = xmlCreateFileParserCtxt(filename);
14153
0
    if (ctxt == NULL) {
14154
0
  return(NULL);
14155
0
    }
14156
0
    if (sax != NULL) {
14157
0
  if (ctxt->sax != NULL)
14158
0
      xmlFree(ctxt->sax);
14159
0
        ctxt->sax = sax;
14160
0
    }
14161
0
    xmlDetectSAX2(ctxt);
14162
0
    if (data!=NULL) {
14163
0
  ctxt->_private = data;
14164
0
    }
14165
14166
0
    if (ctxt->directory == NULL)
14167
0
        ctxt->directory = xmlParserGetDirectory(filename);
14168
14169
0
    ctxt->recovery = recovery;
14170
14171
0
    xmlParseDocument(ctxt);
14172
14173
0
    if ((ctxt->wellFormed) || recovery) {
14174
0
        ret = ctxt->myDoc;
14175
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14176
0
      if (ctxt->input->buf->compressed > 0)
14177
0
    ret->compression = 9;
14178
0
      else
14179
0
    ret->compression = ctxt->input->buf->compressed;
14180
0
  }
14181
0
    }
14182
0
    else {
14183
0
       ret = NULL;
14184
0
       xmlFreeDoc(ctxt->myDoc);
14185
0
       ctxt->myDoc = NULL;
14186
0
    }
14187
0
    if (sax != NULL)
14188
0
        ctxt->sax = NULL;
14189
0
    xmlFreeParserCtxt(ctxt);
14190
14191
0
    return(ret);
14192
0
}
14193
14194
/**
14195
 * xmlSAXParseFile:
14196
 * @sax:  the SAX handler block
14197
 * @filename:  the filename
14198
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14199
 *             documents
14200
 *
14201
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14202
 *
14203
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14204
 * compressed document is provided by default if found at compile-time.
14205
 * It use the given SAX function block to handle the parsing callback.
14206
 * If sax is NULL, fallback to the default DOM tree building routines.
14207
 *
14208
 * Returns the resulting document tree
14209
 */
14210
14211
xmlDocPtr
14212
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14213
0
                          int recovery) {
14214
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14215
0
}
14216
14217
/**
14218
 * xmlRecoverDoc:
14219
 * @cur:  a pointer to an array of xmlChar
14220
 *
14221
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14222
 *
14223
 * parse an XML in-memory document and build a tree.
14224
 * In the case the document is not Well Formed, a attempt to build a
14225
 * tree is tried anyway
14226
 *
14227
 * Returns the resulting document tree or NULL in case of failure
14228
 */
14229
14230
xmlDocPtr
14231
0
xmlRecoverDoc(const xmlChar *cur) {
14232
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14233
0
}
14234
14235
/**
14236
 * xmlParseFile:
14237
 * @filename:  the filename
14238
 *
14239
 * DEPRECATED: Use xmlReadFile.
14240
 *
14241
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14242
 * compressed document is provided by default if found at compile-time.
14243
 *
14244
 * Returns the resulting document tree if the file was wellformed,
14245
 * NULL otherwise.
14246
 */
14247
14248
xmlDocPtr
14249
0
xmlParseFile(const char *filename) {
14250
0
    return(xmlSAXParseFile(NULL, filename, 0));
14251
0
}
14252
14253
/**
14254
 * xmlRecoverFile:
14255
 * @filename:  the filename
14256
 *
14257
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14258
 *
14259
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14260
 * compressed document is provided by default if found at compile-time.
14261
 * In the case the document is not Well Formed, it attempts to build
14262
 * a tree anyway
14263
 *
14264
 * Returns the resulting document tree or NULL in case of failure
14265
 */
14266
14267
xmlDocPtr
14268
0
xmlRecoverFile(const char *filename) {
14269
0
    return(xmlSAXParseFile(NULL, filename, 1));
14270
0
}
14271
14272
14273
/**
14274
 * xmlSetupParserForBuffer:
14275
 * @ctxt:  an XML parser context
14276
 * @buffer:  a xmlChar * buffer
14277
 * @filename:  a file name
14278
 *
14279
 * DEPRECATED: Don't use.
14280
 *
14281
 * Setup the parser context to parse a new buffer; Clears any prior
14282
 * contents from the parser context. The buffer parameter must not be
14283
 * NULL, but the filename parameter can be
14284
 */
14285
void
14286
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14287
                             const char* filename)
14288
0
{
14289
0
    xmlParserInputPtr input;
14290
14291
0
    if ((ctxt == NULL) || (buffer == NULL))
14292
0
        return;
14293
14294
0
    input = xmlNewInputStream(ctxt);
14295
0
    if (input == NULL) {
14296
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14297
0
        xmlClearParserCtxt(ctxt);
14298
0
        return;
14299
0
    }
14300
14301
0
    xmlClearParserCtxt(ctxt);
14302
0
    if (filename != NULL)
14303
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14304
0
    input->base = buffer;
14305
0
    input->cur = buffer;
14306
0
    input->end = &buffer[xmlStrlen(buffer)];
14307
0
    inputPush(ctxt, input);
14308
0
}
14309
14310
/**
14311
 * xmlSAXUserParseFile:
14312
 * @sax:  a SAX handler
14313
 * @user_data:  The user data returned on SAX callbacks
14314
 * @filename:  a file name
14315
 *
14316
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14317
 *
14318
 * parse an XML file and call the given SAX handler routines.
14319
 * Automatic support for ZLIB/Compress compressed document is provided
14320
 *
14321
 * Returns 0 in case of success or a error number otherwise
14322
 */
14323
int
14324
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14325
0
                    const char *filename) {
14326
0
    int ret = 0;
14327
0
    xmlParserCtxtPtr ctxt;
14328
14329
0
    ctxt = xmlCreateFileParserCtxt(filename);
14330
0
    if (ctxt == NULL) return -1;
14331
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14332
0
  xmlFree(ctxt->sax);
14333
0
    ctxt->sax = sax;
14334
0
    xmlDetectSAX2(ctxt);
14335
14336
0
    if (user_data != NULL)
14337
0
  ctxt->userData = user_data;
14338
14339
0
    xmlParseDocument(ctxt);
14340
14341
0
    if (ctxt->wellFormed)
14342
0
  ret = 0;
14343
0
    else {
14344
0
        if (ctxt->errNo != 0)
14345
0
      ret = ctxt->errNo;
14346
0
  else
14347
0
      ret = -1;
14348
0
    }
14349
0
    if (sax != NULL)
14350
0
  ctxt->sax = NULL;
14351
0
    if (ctxt->myDoc != NULL) {
14352
0
        xmlFreeDoc(ctxt->myDoc);
14353
0
  ctxt->myDoc = NULL;
14354
0
    }
14355
0
    xmlFreeParserCtxt(ctxt);
14356
14357
0
    return ret;
14358
0
}
14359
#endif /* LIBXML_SAX1_ENABLED */
14360
14361
/************************************************************************
14362
 *                  *
14363
 *    Front ends when parsing from memory     *
14364
 *                  *
14365
 ************************************************************************/
14366
14367
/**
14368
 * xmlCreateMemoryParserCtxt:
14369
 * @buffer:  a pointer to a char array
14370
 * @size:  the size of the array
14371
 *
14372
 * Create a parser context for an XML in-memory document.
14373
 *
14374
 * Returns the new parser context or NULL
14375
 */
14376
xmlParserCtxtPtr
14377
426k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14378
426k
    xmlParserCtxtPtr ctxt;
14379
426k
    xmlParserInputPtr input;
14380
426k
    xmlParserInputBufferPtr buf;
14381
14382
426k
    if (buffer == NULL)
14383
0
  return(NULL);
14384
426k
    if (size <= 0)
14385
5.01k
  return(NULL);
14386
14387
420k
    ctxt = xmlNewParserCtxt();
14388
420k
    if (ctxt == NULL)
14389
0
  return(NULL);
14390
14391
    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14392
420k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14393
420k
    if (buf == NULL) {
14394
0
  xmlFreeParserCtxt(ctxt);
14395
0
  return(NULL);
14396
0
    }
14397
14398
420k
    input = xmlNewInputStream(ctxt);
14399
420k
    if (input == NULL) {
14400
0
  xmlFreeParserInputBuffer(buf);
14401
0
  xmlFreeParserCtxt(ctxt);
14402
0
  return(NULL);
14403
0
    }
14404
14405
420k
    input->filename = NULL;
14406
420k
    input->buf = buf;
14407
420k
    xmlBufResetInput(input->buf->buffer, input);
14408
14409
420k
    inputPush(ctxt, input);
14410
420k
    return(ctxt);
14411
420k
}
14412
14413
#ifdef LIBXML_SAX1_ENABLED
14414
/**
14415
 * xmlSAXParseMemoryWithData:
14416
 * @sax:  the SAX handler block
14417
 * @buffer:  an pointer to a char array
14418
 * @size:  the size of the array
14419
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14420
 *             documents
14421
 * @data:  the userdata
14422
 *
14423
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14424
 *
14425
 * parse an XML in-memory block and use the given SAX function block
14426
 * to handle the parsing callback. If sax is NULL, fallback to the default
14427
 * DOM tree building routines.
14428
 *
14429
 * User data (void *) is stored within the parser context in the
14430
 * context's _private member, so it is available nearly everywhere in libxml
14431
 *
14432
 * Returns the resulting document tree
14433
 */
14434
14435
xmlDocPtr
14436
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14437
0
            int size, int recovery, void *data) {
14438
0
    xmlDocPtr ret;
14439
0
    xmlParserCtxtPtr ctxt;
14440
14441
0
    xmlInitParser();
14442
14443
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14444
0
    if (ctxt == NULL) return(NULL);
14445
0
    if (sax != NULL) {
14446
0
  if (ctxt->sax != NULL)
14447
0
      xmlFree(ctxt->sax);
14448
0
        ctxt->sax = sax;
14449
0
    }
14450
0
    xmlDetectSAX2(ctxt);
14451
0
    if (data!=NULL) {
14452
0
  ctxt->_private=data;
14453
0
    }
14454
14455
0
    ctxt->recovery = recovery;
14456
14457
0
    xmlParseDocument(ctxt);
14458
14459
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14460
0
    else {
14461
0
       ret = NULL;
14462
0
       xmlFreeDoc(ctxt->myDoc);
14463
0
       ctxt->myDoc = NULL;
14464
0
    }
14465
0
    if (sax != NULL)
14466
0
  ctxt->sax = NULL;
14467
0
    xmlFreeParserCtxt(ctxt);
14468
14469
0
    return(ret);
14470
0
}
14471
14472
/**
14473
 * xmlSAXParseMemory:
14474
 * @sax:  the SAX handler block
14475
 * @buffer:  an pointer to a char array
14476
 * @size:  the size of the array
14477
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14478
 *             documents
14479
 *
14480
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14481
 *
14482
 * parse an XML in-memory block and use the given SAX function block
14483
 * to handle the parsing callback. If sax is NULL, fallback to the default
14484
 * DOM tree building routines.
14485
 *
14486
 * Returns the resulting document tree
14487
 */
14488
xmlDocPtr
14489
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14490
0
            int size, int recovery) {
14491
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14492
0
}
14493
14494
/**
14495
 * xmlParseMemory:
14496
 * @buffer:  an pointer to a char array
14497
 * @size:  the size of the array
14498
 *
14499
 * DEPRECATED: Use xmlReadMemory.
14500
 *
14501
 * parse an XML in-memory block and build a tree.
14502
 *
14503
 * Returns the resulting document tree
14504
 */
14505
14506
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14507
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14508
0
}
14509
14510
/**
14511
 * xmlRecoverMemory:
14512
 * @buffer:  an pointer to a char array
14513
 * @size:  the size of the array
14514
 *
14515
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14516
 *
14517
 * parse an XML in-memory block and build a tree.
14518
 * In the case the document is not Well Formed, an attempt to
14519
 * build a tree is tried anyway
14520
 *
14521
 * Returns the resulting document tree or NULL in case of error
14522
 */
14523
14524
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14525
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14526
0
}
14527
14528
/**
14529
 * xmlSAXUserParseMemory:
14530
 * @sax:  a SAX handler
14531
 * @user_data:  The user data returned on SAX callbacks
14532
 * @buffer:  an in-memory XML document input
14533
 * @size:  the length of the XML document in bytes
14534
 *
14535
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14536
 *
14537
 * parse an XML in-memory buffer and call the given SAX handler routines.
14538
 *
14539
 * Returns 0 in case of success or a error number otherwise
14540
 */
14541
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14542
0
        const char *buffer, int size) {
14543
0
    int ret = 0;
14544
0
    xmlParserCtxtPtr ctxt;
14545
14546
0
    xmlInitParser();
14547
14548
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14549
0
    if (ctxt == NULL) return -1;
14550
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14551
0
        xmlFree(ctxt->sax);
14552
0
    ctxt->sax = sax;
14553
0
    xmlDetectSAX2(ctxt);
14554
14555
0
    if (user_data != NULL)
14556
0
  ctxt->userData = user_data;
14557
14558
0
    xmlParseDocument(ctxt);
14559
14560
0
    if (ctxt->wellFormed)
14561
0
  ret = 0;
14562
0
    else {
14563
0
        if (ctxt->errNo != 0)
14564
0
      ret = ctxt->errNo;
14565
0
  else
14566
0
      ret = -1;
14567
0
    }
14568
0
    if (sax != NULL)
14569
0
        ctxt->sax = NULL;
14570
0
    if (ctxt->myDoc != NULL) {
14571
0
        xmlFreeDoc(ctxt->myDoc);
14572
0
  ctxt->myDoc = NULL;
14573
0
    }
14574
0
    xmlFreeParserCtxt(ctxt);
14575
14576
0
    return ret;
14577
0
}
14578
#endif /* LIBXML_SAX1_ENABLED */
14579
14580
/**
14581
 * xmlCreateDocParserCtxt:
14582
 * @cur:  a pointer to an array of xmlChar
14583
 *
14584
 * Creates a parser context for an XML in-memory document.
14585
 *
14586
 * Returns the new parser context or NULL
14587
 */
14588
xmlParserCtxtPtr
14589
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14590
0
    int len;
14591
14592
0
    if (cur == NULL)
14593
0
  return(NULL);
14594
0
    len = xmlStrlen(cur);
14595
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14596
0
}
14597
14598
#ifdef LIBXML_SAX1_ENABLED
14599
/**
14600
 * xmlSAXParseDoc:
14601
 * @sax:  the SAX handler block
14602
 * @cur:  a pointer to an array of xmlChar
14603
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14604
 *             documents
14605
 *
14606
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14607
 *
14608
 * parse an XML in-memory document and build a tree.
14609
 * It use the given SAX function block to handle the parsing callback.
14610
 * If sax is NULL, fallback to the default DOM tree building routines.
14611
 *
14612
 * Returns the resulting document tree
14613
 */
14614
14615
xmlDocPtr
14616
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14617
0
    xmlDocPtr ret;
14618
0
    xmlParserCtxtPtr ctxt;
14619
0
    xmlSAXHandlerPtr oldsax = NULL;
14620
14621
0
    if (cur == NULL) return(NULL);
14622
14623
14624
0
    ctxt = xmlCreateDocParserCtxt(cur);
14625
0
    if (ctxt == NULL) return(NULL);
14626
0
    if (sax != NULL) {
14627
0
        oldsax = ctxt->sax;
14628
0
        ctxt->sax = sax;
14629
0
        ctxt->userData = NULL;
14630
0
    }
14631
0
    xmlDetectSAX2(ctxt);
14632
14633
0
    xmlParseDocument(ctxt);
14634
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14635
0
    else {
14636
0
       ret = NULL;
14637
0
       xmlFreeDoc(ctxt->myDoc);
14638
0
       ctxt->myDoc = NULL;
14639
0
    }
14640
0
    if (sax != NULL)
14641
0
  ctxt->sax = oldsax;
14642
0
    xmlFreeParserCtxt(ctxt);
14643
14644
0
    return(ret);
14645
0
}
14646
14647
/**
14648
 * xmlParseDoc:
14649
 * @cur:  a pointer to an array of xmlChar
14650
 *
14651
 * DEPRECATED: Use xmlReadDoc.
14652
 *
14653
 * parse an XML in-memory document and build a tree.
14654
 *
14655
 * Returns the resulting document tree
14656
 */
14657
14658
xmlDocPtr
14659
0
xmlParseDoc(const xmlChar *cur) {
14660
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14661
0
}
14662
#endif /* LIBXML_SAX1_ENABLED */
14663
14664
#ifdef LIBXML_LEGACY_ENABLED
14665
/************************************************************************
14666
 *                  *
14667
 *  Specific function to keep track of entities references    *
14668
 *  and used by the XSLT debugger         *
14669
 *                  *
14670
 ************************************************************************/
14671
14672
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14673
14674
/**
14675
 * xmlAddEntityReference:
14676
 * @ent : A valid entity
14677
 * @firstNode : A valid first node for children of entity
14678
 * @lastNode : A valid last node of children entity
14679
 *
14680
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14681
 */
14682
static void
14683
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14684
                      xmlNodePtr lastNode)
14685
{
14686
    if (xmlEntityRefFunc != NULL) {
14687
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14688
    }
14689
}
14690
14691
14692
/**
14693
 * xmlSetEntityReferenceFunc:
14694
 * @func: A valid function
14695
 *
14696
 * Set the function to call call back when a xml reference has been made
14697
 */
14698
void
14699
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14700
{
14701
    xmlEntityRefFunc = func;
14702
}
14703
#endif /* LIBXML_LEGACY_ENABLED */
14704
14705
/************************************************************************
14706
 *                  *
14707
 *        Miscellaneous       *
14708
 *                  *
14709
 ************************************************************************/
14710
14711
static int xmlParserInitialized = 0;
14712
14713
/**
14714
 * xmlInitParser:
14715
 *
14716
 * Initialization function for the XML parser.
14717
 * This is not reentrant. Call once before processing in case of
14718
 * use in multithreaded programs.
14719
 */
14720
14721
void
14722
5.69M
xmlInitParser(void) {
14723
5.69M
    if (xmlParserInitialized != 0)
14724
5.68M
  return;
14725
14726
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14727
    if (xmlFree == free)
14728
        atexit(xmlCleanupParser);
14729
#endif
14730
14731
4.53k
#ifdef LIBXML_THREAD_ENABLED
14732
4.53k
    __xmlGlobalInitMutexLock();
14733
4.53k
    if (xmlParserInitialized == 0) {
14734
4.53k
#endif
14735
4.53k
  xmlInitThreads();
14736
4.53k
  xmlInitGlobals();
14737
4.53k
  xmlInitMemory();
14738
4.53k
        xmlInitializeDict();
14739
4.53k
  xmlInitCharEncodingHandlers();
14740
4.53k
  xmlDefaultSAXHandlerInit();
14741
4.53k
  xmlRegisterDefaultInputCallbacks();
14742
4.53k
#ifdef LIBXML_OUTPUT_ENABLED
14743
4.53k
  xmlRegisterDefaultOutputCallbacks();
14744
4.53k
#endif /* LIBXML_OUTPUT_ENABLED */
14745
4.53k
#ifdef LIBXML_HTML_ENABLED
14746
4.53k
  htmlInitAutoClose();
14747
4.53k
  htmlDefaultSAXHandlerInit();
14748
4.53k
#endif
14749
4.53k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14750
4.53k
  xmlXPathInit();
14751
4.53k
#endif
14752
4.53k
  xmlParserInitialized = 1;
14753
4.53k
#ifdef LIBXML_THREAD_ENABLED
14754
4.53k
    }
14755
4.53k
    __xmlGlobalInitMutexUnlock();
14756
4.53k
#endif
14757
4.53k
}
14758
14759
/**
14760
 * xmlCleanupParser:
14761
 *
14762
 * This function name is somewhat misleading. It does not clean up
14763
 * parser state, it cleans up memory allocated by the library itself.
14764
 * It is a cleanup function for the XML library. It tries to reclaim all
14765
 * related global memory allocated for the library processing.
14766
 * It doesn't deallocate any document related memory. One should
14767
 * call xmlCleanupParser() only when the process has finished using
14768
 * the library and all XML/HTML documents built with it.
14769
 * See also xmlInitParser() which has the opposite function of preparing
14770
 * the library for operations.
14771
 *
14772
 * WARNING: if your application is multithreaded or has plugin support
14773
 *          calling this may crash the application if another thread or
14774
 *          a plugin is still using libxml2. It's sometimes very hard to
14775
 *          guess if libxml2 is in use in the application, some libraries
14776
 *          or plugins may use it without notice. In case of doubt abstain
14777
 *          from calling this function or do it just before calling exit()
14778
 *          to avoid leak reports from valgrind !
14779
 */
14780
14781
void
14782
0
xmlCleanupParser(void) {
14783
0
    if (!xmlParserInitialized)
14784
0
  return;
14785
14786
0
    xmlCleanupCharEncodingHandlers();
14787
0
#ifdef LIBXML_CATALOG_ENABLED
14788
0
    xmlCatalogCleanup();
14789
0
#endif
14790
0
    xmlDictCleanup();
14791
0
    xmlCleanupInputCallbacks();
14792
0
#ifdef LIBXML_OUTPUT_ENABLED
14793
0
    xmlCleanupOutputCallbacks();
14794
0
#endif
14795
0
#ifdef LIBXML_SCHEMAS_ENABLED
14796
0
    xmlSchemaCleanupTypes();
14797
0
    xmlRelaxNGCleanupTypes();
14798
0
#endif
14799
0
    xmlCleanupGlobals();
14800
0
    xmlCleanupThreads(); /* must be last if called not from the main thread */
14801
0
    xmlCleanupMemory();
14802
0
    xmlParserInitialized = 0;
14803
0
}
14804
14805
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14806
    !defined(_WIN32)
14807
static void
14808
ATTRIBUTE_DESTRUCTOR
14809
xmlDestructor(void) {
14810
    /*
14811
     * Calling custom deallocation functions in a destructor can cause
14812
     * problems, for example with Nokogiri.
14813
     */
14814
    if (xmlFree == free)
14815
        xmlCleanupParser();
14816
}
14817
#endif
14818
14819
/************************************************************************
14820
 *                  *
14821
 *  New set (2.6.0) of simpler and more flexible APIs   *
14822
 *                  *
14823
 ************************************************************************/
14824
14825
/**
14826
 * DICT_FREE:
14827
 * @str:  a string
14828
 *
14829
 * Free a string if it is not owned by the "dict" dictionary in the
14830
 * current scope
14831
 */
14832
#define DICT_FREE(str)            \
14833
0
  if ((str) && ((!dict) ||       \
14834
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14835
0
      xmlFree((char *)(str));
14836
14837
/**
14838
 * xmlCtxtReset:
14839
 * @ctxt: an XML parser context
14840
 *
14841
 * Reset a parser context
14842
 */
14843
void
14844
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14845
0
{
14846
0
    xmlParserInputPtr input;
14847
0
    xmlDictPtr dict;
14848
14849
0
    if (ctxt == NULL)
14850
0
        return;
14851
14852
0
    dict = ctxt->dict;
14853
14854
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14855
0
        xmlFreeInputStream(input);
14856
0
    }
14857
0
    ctxt->inputNr = 0;
14858
0
    ctxt->input = NULL;
14859
14860
0
    ctxt->spaceNr = 0;
14861
0
    if (ctxt->spaceTab != NULL) {
14862
0
  ctxt->spaceTab[0] = -1;
14863
0
  ctxt->space = &ctxt->spaceTab[0];
14864
0
    } else {
14865
0
        ctxt->space = NULL;
14866
0
    }
14867
14868
14869
0
    ctxt->nodeNr = 0;
14870
0
    ctxt->node = NULL;
14871
14872
0
    ctxt->nameNr = 0;
14873
0
    ctxt->name = NULL;
14874
14875
0
    ctxt->nsNr = 0;
14876
14877
0
    DICT_FREE(ctxt->version);
14878
0
    ctxt->version = NULL;
14879
0
    DICT_FREE(ctxt->encoding);
14880
0
    ctxt->encoding = NULL;
14881
0
    DICT_FREE(ctxt->directory);
14882
0
    ctxt->directory = NULL;
14883
0
    DICT_FREE(ctxt->extSubURI);
14884
0
    ctxt->extSubURI = NULL;
14885
0
    DICT_FREE(ctxt->extSubSystem);
14886
0
    ctxt->extSubSystem = NULL;
14887
0
    if (ctxt->myDoc != NULL)
14888
0
        xmlFreeDoc(ctxt->myDoc);
14889
0
    ctxt->myDoc = NULL;
14890
14891
0
    ctxt->standalone = -1;
14892
0
    ctxt->hasExternalSubset = 0;
14893
0
    ctxt->hasPErefs = 0;
14894
0
    ctxt->html = 0;
14895
0
    ctxt->external = 0;
14896
0
    ctxt->instate = XML_PARSER_START;
14897
0
    ctxt->token = 0;
14898
14899
0
    ctxt->wellFormed = 1;
14900
0
    ctxt->nsWellFormed = 1;
14901
0
    ctxt->disableSAX = 0;
14902
0
    ctxt->valid = 1;
14903
#if 0
14904
    ctxt->vctxt.userData = ctxt;
14905
    ctxt->vctxt.error = xmlParserValidityError;
14906
    ctxt->vctxt.warning = xmlParserValidityWarning;
14907
#endif
14908
0
    ctxt->record_info = 0;
14909
0
    ctxt->checkIndex = 0;
14910
0
    ctxt->inSubset = 0;
14911
0
    ctxt->errNo = XML_ERR_OK;
14912
0
    ctxt->depth = 0;
14913
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14914
0
    ctxt->catalogs = NULL;
14915
0
    ctxt->nbentities = 0;
14916
0
    ctxt->sizeentities = 0;
14917
0
    ctxt->sizeentcopy = 0;
14918
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14919
14920
0
    if (ctxt->attsDefault != NULL) {
14921
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14922
0
        ctxt->attsDefault = NULL;
14923
0
    }
14924
0
    if (ctxt->attsSpecial != NULL) {
14925
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14926
0
        ctxt->attsSpecial = NULL;
14927
0
    }
14928
14929
0
#ifdef LIBXML_CATALOG_ENABLED
14930
0
    if (ctxt->catalogs != NULL)
14931
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14932
0
#endif
14933
0
    if (ctxt->lastError.code != XML_ERR_OK)
14934
0
        xmlResetError(&ctxt->lastError);
14935
0
}
14936
14937
/**
14938
 * xmlCtxtResetPush:
14939
 * @ctxt: an XML parser context
14940
 * @chunk:  a pointer to an array of chars
14941
 * @size:  number of chars in the array
14942
 * @filename:  an optional file name or URI
14943
 * @encoding:  the document encoding, or NULL
14944
 *
14945
 * Reset a push parser context
14946
 *
14947
 * Returns 0 in case of success and 1 in case of error
14948
 */
14949
int
14950
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14951
                 int size, const char *filename, const char *encoding)
14952
0
{
14953
0
    xmlParserInputPtr inputStream;
14954
0
    xmlParserInputBufferPtr buf;
14955
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14956
14957
0
    if (ctxt == NULL)
14958
0
        return(1);
14959
14960
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14961
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14962
14963
0
    buf = xmlAllocParserInputBuffer(enc);
14964
0
    if (buf == NULL)
14965
0
        return(1);
14966
14967
0
    if (ctxt == NULL) {
14968
0
        xmlFreeParserInputBuffer(buf);
14969
0
        return(1);
14970
0
    }
14971
14972
0
    xmlCtxtReset(ctxt);
14973
14974
0
    if (filename == NULL) {
14975
0
        ctxt->directory = NULL;
14976
0
    } else {
14977
0
        ctxt->directory = xmlParserGetDirectory(filename);
14978
0
    }
14979
14980
0
    inputStream = xmlNewInputStream(ctxt);
14981
0
    if (inputStream == NULL) {
14982
0
        xmlFreeParserInputBuffer(buf);
14983
0
        return(1);
14984
0
    }
14985
14986
0
    if (filename == NULL)
14987
0
        inputStream->filename = NULL;
14988
0
    else
14989
0
        inputStream->filename = (char *)
14990
0
            xmlCanonicPath((const xmlChar *) filename);
14991
0
    inputStream->buf = buf;
14992
0
    xmlBufResetInput(buf->buffer, inputStream);
14993
14994
0
    inputPush(ctxt, inputStream);
14995
14996
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14997
0
        (ctxt->input->buf != NULL)) {
14998
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14999
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
15000
15001
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15002
15003
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15004
#ifdef DEBUG_PUSH
15005
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15006
#endif
15007
0
    }
15008
15009
0
    if (encoding != NULL) {
15010
0
        xmlCharEncodingHandlerPtr hdlr;
15011
15012
0
        if (ctxt->encoding != NULL)
15013
0
      xmlFree((xmlChar *) ctxt->encoding);
15014
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15015
15016
0
        hdlr = xmlFindCharEncodingHandler(encoding);
15017
0
        if (hdlr != NULL) {
15018
0
            xmlSwitchToEncoding(ctxt, hdlr);
15019
0
  } else {
15020
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15021
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
15022
0
        }
15023
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
15024
0
        xmlSwitchEncoding(ctxt, enc);
15025
0
    }
15026
15027
0
    return(0);
15028
0
}
15029
15030
15031
/**
15032
 * xmlCtxtUseOptionsInternal:
15033
 * @ctxt: an XML parser context
15034
 * @options:  a combination of xmlParserOption
15035
 * @encoding:  the user provided encoding to use
15036
 *
15037
 * Applies the options to the parser context
15038
 *
15039
 * Returns 0 in case of success, the set of unknown or unimplemented options
15040
 *         in case of error.
15041
 */
15042
static int
15043
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15044
1.23M
{
15045
1.23M
    if (ctxt == NULL)
15046
0
        return(-1);
15047
1.23M
    if (encoding != NULL) {
15048
0
        if (ctxt->encoding != NULL)
15049
0
      xmlFree((xmlChar *) ctxt->encoding);
15050
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15051
0
    }
15052
1.23M
    if (options & XML_PARSE_RECOVER) {
15053
626k
        ctxt->recovery = 1;
15054
626k
        options -= XML_PARSE_RECOVER;
15055
626k
  ctxt->options |= XML_PARSE_RECOVER;
15056
626k
    } else
15057
611k
        ctxt->recovery = 0;
15058
1.23M
    if (options & XML_PARSE_DTDLOAD) {
15059
907k
        ctxt->loadsubset = XML_DETECT_IDS;
15060
907k
        options -= XML_PARSE_DTDLOAD;
15061
907k
  ctxt->options |= XML_PARSE_DTDLOAD;
15062
907k
    } else
15063
329k
        ctxt->loadsubset = 0;
15064
1.23M
    if (options & XML_PARSE_DTDATTR) {
15065
445k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15066
445k
        options -= XML_PARSE_DTDATTR;
15067
445k
  ctxt->options |= XML_PARSE_DTDATTR;
15068
445k
    }
15069
1.23M
    if (options & XML_PARSE_NOENT) {
15070
742k
        ctxt->replaceEntities = 1;
15071
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
15072
742k
        options -= XML_PARSE_NOENT;
15073
742k
  ctxt->options |= XML_PARSE_NOENT;
15074
742k
    } else
15075
494k
        ctxt->replaceEntities = 0;
15076
1.23M
    if (options & XML_PARSE_PEDANTIC) {
15077
242k
        ctxt->pedantic = 1;
15078
242k
        options -= XML_PARSE_PEDANTIC;
15079
242k
  ctxt->options |= XML_PARSE_PEDANTIC;
15080
242k
    } else
15081
995k
        ctxt->pedantic = 0;
15082
1.23M
    if (options & XML_PARSE_NOBLANKS) {
15083
447k
        ctxt->keepBlanks = 0;
15084
447k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15085
447k
        options -= XML_PARSE_NOBLANKS;
15086
447k
  ctxt->options |= XML_PARSE_NOBLANKS;
15087
447k
    } else
15088
789k
        ctxt->keepBlanks = 1;
15089
1.23M
    if (options & XML_PARSE_DTDVALID) {
15090
511k
        ctxt->validate = 1;
15091
511k
        if (options & XML_PARSE_NOWARNING)
15092
282k
            ctxt->vctxt.warning = NULL;
15093
511k
        if (options & XML_PARSE_NOERROR)
15094
321k
            ctxt->vctxt.error = NULL;
15095
511k
        options -= XML_PARSE_DTDVALID;
15096
511k
  ctxt->options |= XML_PARSE_DTDVALID;
15097
511k
    } else
15098
725k
        ctxt->validate = 0;
15099
1.23M
    if (options & XML_PARSE_NOWARNING) {
15100
415k
        ctxt->sax->warning = NULL;
15101
415k
        options -= XML_PARSE_NOWARNING;
15102
415k
    }
15103
1.23M
    if (options & XML_PARSE_NOERROR) {
15104
486k
        ctxt->sax->error = NULL;
15105
486k
        ctxt->sax->fatalError = NULL;
15106
486k
        options -= XML_PARSE_NOERROR;
15107
486k
    }
15108
1.23M
#ifdef LIBXML_SAX1_ENABLED
15109
1.23M
    if (options & XML_PARSE_SAX1) {
15110
501k
        ctxt->sax->startElement = xmlSAX2StartElement;
15111
501k
        ctxt->sax->endElement = xmlSAX2EndElement;
15112
501k
        ctxt->sax->startElementNs = NULL;
15113
501k
        ctxt->sax->endElementNs = NULL;
15114
501k
        ctxt->sax->initialized = 1;
15115
501k
        options -= XML_PARSE_SAX1;
15116
501k
  ctxt->options |= XML_PARSE_SAX1;
15117
501k
    }
15118
1.23M
#endif /* LIBXML_SAX1_ENABLED */
15119
1.23M
    if (options & XML_PARSE_NODICT) {
15120
417k
        ctxt->dictNames = 0;
15121
417k
        options -= XML_PARSE_NODICT;
15122
417k
  ctxt->options |= XML_PARSE_NODICT;
15123
819k
    } else {
15124
819k
        ctxt->dictNames = 1;
15125
819k
    }
15126
1.23M
    if (options & XML_PARSE_NOCDATA) {
15127
491k
        ctxt->sax->cdataBlock = NULL;
15128
491k
        options -= XML_PARSE_NOCDATA;
15129
491k
  ctxt->options |= XML_PARSE_NOCDATA;
15130
491k
    }
15131
1.23M
    if (options & XML_PARSE_NSCLEAN) {
15132
599k
  ctxt->options |= XML_PARSE_NSCLEAN;
15133
599k
        options -= XML_PARSE_NSCLEAN;
15134
599k
    }
15135
1.23M
    if (options & XML_PARSE_NONET) {
15136
536k
  ctxt->options |= XML_PARSE_NONET;
15137
536k
        options -= XML_PARSE_NONET;
15138
536k
    }
15139
1.23M
    if (options & XML_PARSE_COMPACT) {
15140
665k
  ctxt->options |= XML_PARSE_COMPACT;
15141
665k
        options -= XML_PARSE_COMPACT;
15142
665k
    }
15143
1.23M
    if (options & XML_PARSE_OLD10) {
15144
459k
  ctxt->options |= XML_PARSE_OLD10;
15145
459k
        options -= XML_PARSE_OLD10;
15146
459k
    }
15147
1.23M
    if (options & XML_PARSE_NOBASEFIX) {
15148
521k
  ctxt->options |= XML_PARSE_NOBASEFIX;
15149
521k
        options -= XML_PARSE_NOBASEFIX;
15150
521k
    }
15151
1.23M
    if (options & XML_PARSE_HUGE) {
15152
447k
  ctxt->options |= XML_PARSE_HUGE;
15153
447k
        options -= XML_PARSE_HUGE;
15154
447k
        if (ctxt->dict != NULL)
15155
447k
            xmlDictSetLimit(ctxt->dict, 0);
15156
447k
    }
15157
1.23M
    if (options & XML_PARSE_OLDSAX) {
15158
369k
  ctxt->options |= XML_PARSE_OLDSAX;
15159
369k
        options -= XML_PARSE_OLDSAX;
15160
369k
    }
15161
1.23M
    if (options & XML_PARSE_IGNORE_ENC) {
15162
519k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
15163
519k
        options -= XML_PARSE_IGNORE_ENC;
15164
519k
    }
15165
1.23M
    if (options & XML_PARSE_BIG_LINES) {
15166
416k
  ctxt->options |= XML_PARSE_BIG_LINES;
15167
416k
        options -= XML_PARSE_BIG_LINES;
15168
416k
    }
15169
1.23M
    ctxt->linenumbers = 1;
15170
1.23M
    return (options);
15171
1.23M
}
15172
15173
/**
15174
 * xmlCtxtUseOptions:
15175
 * @ctxt: an XML parser context
15176
 * @options:  a combination of xmlParserOption
15177
 *
15178
 * Applies the options to the parser context
15179
 *
15180
 * Returns 0 in case of success, the set of unknown or unimplemented options
15181
 *         in case of error.
15182
 */
15183
int
15184
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15185
877k
{
15186
877k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15187
877k
}
15188
15189
/**
15190
 * xmlDoRead:
15191
 * @ctxt:  an XML parser context
15192
 * @URL:  the base URL to use for the document
15193
 * @encoding:  the document encoding, or NULL
15194
 * @options:  a combination of xmlParserOption
15195
 * @reuse:  keep the context for reuse
15196
 *
15197
 * Common front-end for the xmlRead functions
15198
 *
15199
 * Returns the resulting document tree or NULL
15200
 */
15201
static xmlDocPtr
15202
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15203
          int options, int reuse)
15204
360k
{
15205
360k
    xmlDocPtr ret;
15206
15207
360k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15208
360k
    if (encoding != NULL) {
15209
0
        xmlCharEncodingHandlerPtr hdlr;
15210
15211
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15212
0
  if (hdlr != NULL)
15213
0
      xmlSwitchToEncoding(ctxt, hdlr);
15214
0
    }
15215
360k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15216
360k
        (ctxt->input->filename == NULL))
15217
360k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15218
360k
    xmlParseDocument(ctxt);
15219
360k
    if ((ctxt->wellFormed) || ctxt->recovery)
15220
186k
        ret = ctxt->myDoc;
15221
173k
    else {
15222
173k
        ret = NULL;
15223
173k
  if (ctxt->myDoc != NULL) {
15224
152k
      xmlFreeDoc(ctxt->myDoc);
15225
152k
  }
15226
173k
    }
15227
360k
    ctxt->myDoc = NULL;
15228
360k
    if (!reuse) {
15229
360k
  xmlFreeParserCtxt(ctxt);
15230
360k
    }
15231
15232
360k
    return (ret);
15233
360k
}
15234
15235
/**
15236
 * xmlReadDoc:
15237
 * @cur:  a pointer to a zero terminated string
15238
 * @URL:  the base URL to use for the document
15239
 * @encoding:  the document encoding, or NULL
15240
 * @options:  a combination of xmlParserOption
15241
 *
15242
 * parse an XML in-memory document and build a tree.
15243
 *
15244
 * Returns the resulting document tree
15245
 */
15246
xmlDocPtr
15247
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15248
0
{
15249
0
    xmlParserCtxtPtr ctxt;
15250
15251
0
    if (cur == NULL)
15252
0
        return (NULL);
15253
0
    xmlInitParser();
15254
15255
0
    ctxt = xmlCreateDocParserCtxt(cur);
15256
0
    if (ctxt == NULL)
15257
0
        return (NULL);
15258
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15259
0
}
15260
15261
/**
15262
 * xmlReadFile:
15263
 * @filename:  a file or URL
15264
 * @encoding:  the document encoding, or NULL
15265
 * @options:  a combination of xmlParserOption
15266
 *
15267
 * parse an XML file from the filesystem or the network.
15268
 *
15269
 * Returns the resulting document tree
15270
 */
15271
xmlDocPtr
15272
xmlReadFile(const char *filename, const char *encoding, int options)
15273
0
{
15274
0
    xmlParserCtxtPtr ctxt;
15275
15276
0
    xmlInitParser();
15277
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15278
0
    if (ctxt == NULL)
15279
0
        return (NULL);
15280
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15281
0
}
15282
15283
/**
15284
 * xmlReadMemory:
15285
 * @buffer:  a pointer to a char array
15286
 * @size:  the size of the array
15287
 * @URL:  the base URL to use for the document
15288
 * @encoding:  the document encoding, or NULL
15289
 * @options:  a combination of xmlParserOption
15290
 *
15291
 * parse an XML in-memory document and build a tree.
15292
 *
15293
 * Returns the resulting document tree
15294
 */
15295
xmlDocPtr
15296
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15297
364k
{
15298
364k
    xmlParserCtxtPtr ctxt;
15299
15300
364k
    xmlInitParser();
15301
364k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15302
364k
    if (ctxt == NULL)
15303
4.48k
        return (NULL);
15304
360k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15305
364k
}
15306
15307
/**
15308
 * xmlReadFd:
15309
 * @fd:  an open file descriptor
15310
 * @URL:  the base URL to use for the document
15311
 * @encoding:  the document encoding, or NULL
15312
 * @options:  a combination of xmlParserOption
15313
 *
15314
 * parse an XML from a file descriptor and build a tree.
15315
 * NOTE that the file descriptor will not be closed when the
15316
 *      reader is closed or reset.
15317
 *
15318
 * Returns the resulting document tree
15319
 */
15320
xmlDocPtr
15321
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15322
0
{
15323
0
    xmlParserCtxtPtr ctxt;
15324
0
    xmlParserInputBufferPtr input;
15325
0
    xmlParserInputPtr stream;
15326
15327
0
    if (fd < 0)
15328
0
        return (NULL);
15329
0
    xmlInitParser();
15330
15331
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15332
0
    if (input == NULL)
15333
0
        return (NULL);
15334
0
    input->closecallback = NULL;
15335
0
    ctxt = xmlNewParserCtxt();
15336
0
    if (ctxt == NULL) {
15337
0
        xmlFreeParserInputBuffer(input);
15338
0
        return (NULL);
15339
0
    }
15340
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15341
0
    if (stream == NULL) {
15342
0
        xmlFreeParserInputBuffer(input);
15343
0
  xmlFreeParserCtxt(ctxt);
15344
0
        return (NULL);
15345
0
    }
15346
0
    inputPush(ctxt, stream);
15347
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15348
0
}
15349
15350
/**
15351
 * xmlReadIO:
15352
 * @ioread:  an I/O read function
15353
 * @ioclose:  an I/O close function
15354
 * @ioctx:  an I/O handler
15355
 * @URL:  the base URL to use for the document
15356
 * @encoding:  the document encoding, or NULL
15357
 * @options:  a combination of xmlParserOption
15358
 *
15359
 * parse an XML document from I/O functions and source and build a tree.
15360
 *
15361
 * Returns the resulting document tree
15362
 */
15363
xmlDocPtr
15364
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15365
          void *ioctx, const char *URL, const char *encoding, int options)
15366
0
{
15367
0
    xmlParserCtxtPtr ctxt;
15368
0
    xmlParserInputBufferPtr input;
15369
0
    xmlParserInputPtr stream;
15370
15371
0
    if (ioread == NULL)
15372
0
        return (NULL);
15373
0
    xmlInitParser();
15374
15375
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15376
0
                                         XML_CHAR_ENCODING_NONE);
15377
0
    if (input == NULL) {
15378
0
        if (ioclose != NULL)
15379
0
            ioclose(ioctx);
15380
0
        return (NULL);
15381
0
    }
15382
0
    ctxt = xmlNewParserCtxt();
15383
0
    if (ctxt == NULL) {
15384
0
        xmlFreeParserInputBuffer(input);
15385
0
        return (NULL);
15386
0
    }
15387
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15388
0
    if (stream == NULL) {
15389
0
        xmlFreeParserInputBuffer(input);
15390
0
  xmlFreeParserCtxt(ctxt);
15391
0
        return (NULL);
15392
0
    }
15393
0
    inputPush(ctxt, stream);
15394
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15395
0
}
15396
15397
/**
15398
 * xmlCtxtReadDoc:
15399
 * @ctxt:  an XML parser context
15400
 * @cur:  a pointer to a zero terminated string
15401
 * @URL:  the base URL to use for the document
15402
 * @encoding:  the document encoding, or NULL
15403
 * @options:  a combination of xmlParserOption
15404
 *
15405
 * parse an XML in-memory document and build a tree.
15406
 * This reuses the existing @ctxt parser context
15407
 *
15408
 * Returns the resulting document tree
15409
 */
15410
xmlDocPtr
15411
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15412
               const char *URL, const char *encoding, int options)
15413
0
{
15414
0
    if (cur == NULL)
15415
0
        return (NULL);
15416
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15417
0
                              encoding, options));
15418
0
}
15419
15420
/**
15421
 * xmlCtxtReadFile:
15422
 * @ctxt:  an XML parser context
15423
 * @filename:  a file or URL
15424
 * @encoding:  the document encoding, or NULL
15425
 * @options:  a combination of xmlParserOption
15426
 *
15427
 * parse an XML file from the filesystem or the network.
15428
 * This reuses the existing @ctxt parser context
15429
 *
15430
 * Returns the resulting document tree
15431
 */
15432
xmlDocPtr
15433
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15434
                const char *encoding, int options)
15435
0
{
15436
0
    xmlParserInputPtr stream;
15437
15438
0
    if (filename == NULL)
15439
0
        return (NULL);
15440
0
    if (ctxt == NULL)
15441
0
        return (NULL);
15442
0
    xmlInitParser();
15443
15444
0
    xmlCtxtReset(ctxt);
15445
15446
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15447
0
    if (stream == NULL) {
15448
0
        return (NULL);
15449
0
    }
15450
0
    inputPush(ctxt, stream);
15451
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15452
0
}
15453
15454
/**
15455
 * xmlCtxtReadMemory:
15456
 * @ctxt:  an XML parser context
15457
 * @buffer:  a pointer to a char array
15458
 * @size:  the size of the array
15459
 * @URL:  the base URL to use for the document
15460
 * @encoding:  the document encoding, or NULL
15461
 * @options:  a combination of xmlParserOption
15462
 *
15463
 * parse an XML in-memory document and build a tree.
15464
 * This reuses the existing @ctxt parser context
15465
 *
15466
 * Returns the resulting document tree
15467
 */
15468
xmlDocPtr
15469
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15470
                  const char *URL, const char *encoding, int options)
15471
0
{
15472
0
    xmlParserInputBufferPtr input;
15473
0
    xmlParserInputPtr stream;
15474
15475
0
    if (ctxt == NULL)
15476
0
        return (NULL);
15477
0
    if (buffer == NULL)
15478
0
        return (NULL);
15479
0
    xmlInitParser();
15480
15481
0
    xmlCtxtReset(ctxt);
15482
15483
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15484
0
    if (input == NULL) {
15485
0
  return(NULL);
15486
0
    }
15487
15488
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15489
0
    if (stream == NULL) {
15490
0
  xmlFreeParserInputBuffer(input);
15491
0
  return(NULL);
15492
0
    }
15493
15494
0
    inputPush(ctxt, stream);
15495
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15496
0
}
15497
15498
/**
15499
 * xmlCtxtReadFd:
15500
 * @ctxt:  an XML parser context
15501
 * @fd:  an open file descriptor
15502
 * @URL:  the base URL to use for the document
15503
 * @encoding:  the document encoding, or NULL
15504
 * @options:  a combination of xmlParserOption
15505
 *
15506
 * parse an XML from a file descriptor and build a tree.
15507
 * This reuses the existing @ctxt parser context
15508
 * NOTE that the file descriptor will not be closed when the
15509
 *      reader is closed or reset.
15510
 *
15511
 * Returns the resulting document tree
15512
 */
15513
xmlDocPtr
15514
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15515
              const char *URL, const char *encoding, int options)
15516
0
{
15517
0
    xmlParserInputBufferPtr input;
15518
0
    xmlParserInputPtr stream;
15519
15520
0
    if (fd < 0)
15521
0
        return (NULL);
15522
0
    if (ctxt == NULL)
15523
0
        return (NULL);
15524
0
    xmlInitParser();
15525
15526
0
    xmlCtxtReset(ctxt);
15527
15528
15529
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15530
0
    if (input == NULL)
15531
0
        return (NULL);
15532
0
    input->closecallback = NULL;
15533
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15534
0
    if (stream == NULL) {
15535
0
        xmlFreeParserInputBuffer(input);
15536
0
        return (NULL);
15537
0
    }
15538
0
    inputPush(ctxt, stream);
15539
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15540
0
}
15541
15542
/**
15543
 * xmlCtxtReadIO:
15544
 * @ctxt:  an XML parser context
15545
 * @ioread:  an I/O read function
15546
 * @ioclose:  an I/O close function
15547
 * @ioctx:  an I/O handler
15548
 * @URL:  the base URL to use for the document
15549
 * @encoding:  the document encoding, or NULL
15550
 * @options:  a combination of xmlParserOption
15551
 *
15552
 * parse an XML document from I/O functions and source and build a tree.
15553
 * This reuses the existing @ctxt parser context
15554
 *
15555
 * Returns the resulting document tree
15556
 */
15557
xmlDocPtr
15558
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15559
              xmlInputCloseCallback ioclose, void *ioctx,
15560
        const char *URL,
15561
              const char *encoding, int options)
15562
0
{
15563
0
    xmlParserInputBufferPtr input;
15564
0
    xmlParserInputPtr stream;
15565
15566
0
    if (ioread == NULL)
15567
0
        return (NULL);
15568
0
    if (ctxt == NULL)
15569
0
        return (NULL);
15570
0
    xmlInitParser();
15571
15572
0
    xmlCtxtReset(ctxt);
15573
15574
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15575
0
                                         XML_CHAR_ENCODING_NONE);
15576
0
    if (input == NULL) {
15577
0
        if (ioclose != NULL)
15578
0
            ioclose(ioctx);
15579
0
        return (NULL);
15580
0
    }
15581
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15582
0
    if (stream == NULL) {
15583
0
        xmlFreeParserInputBuffer(input);
15584
0
        return (NULL);
15585
0
    }
15586
0
    inputPush(ctxt, stream);
15587
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15588
0
}
15589