Coverage Report

Created: 2024-05-18 02:13

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/enc.h"
80
#include "private/error.h"
81
#include "private/html.h"
82
#include "private/io.h"
83
#include "private/parser.h"
84
#include "private/threads.h"
85
86
struct _xmlStartTag {
87
    const xmlChar *prefix;
88
    const xmlChar *URI;
89
    int line;
90
    int nsNr;
91
};
92
93
static void
94
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
95
96
static xmlParserCtxtPtr
97
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
98
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
99
        xmlParserCtxtPtr pctx);
100
101
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
163M
#define XML_MAX_HUGE_LENGTH 1000000000
116
117
77.7k
#define XML_PARSER_BIG_ENTITY 1000
118
#define XML_PARSER_LOT_ENTITY 5000
119
120
/*
121
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
122
 *    replacement over the size in byte of the input indicates that you have
123
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
124
 *    replacement per byte of input.
125
 */
126
2.60M
#define XML_PARSER_NON_LINEAR 10
127
128
/*
129
 * xmlParserEntityCheck
130
 *
131
 * Function to check non-linear entity expansion behaviour
132
 * This is here to detect and stop exponential linear entity expansion
133
 * This is not a limitation of the parser but a safety
134
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
135
 * parser option.
136
 */
137
static int
138
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
139
                     xmlEntityPtr ent, size_t replacement)
140
25.6M
{
141
25.6M
    size_t consumed = 0;
142
25.6M
    int i;
143
144
25.6M
    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
145
20.4M
        return (0);
146
5.23M
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
147
4.97k
        return (1);
148
149
    /*
150
     * This may look absurd but is needed to detect
151
     * entities problems
152
     */
153
5.23M
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
154
5.23M
  (ent->content != NULL) && (ent->checked == 0) &&
155
5.23M
  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
156
433k
  unsigned long oldnbent = ctxt->nbentities, diff;
157
433k
  xmlChar *rep;
158
159
433k
  ent->checked = 1;
160
161
433k
        ++ctxt->depth;
162
433k
  rep = xmlStringDecodeEntities(ctxt, ent->content,
163
433k
          XML_SUBSTITUTE_REF, 0, 0, 0);
164
433k
        --ctxt->depth;
165
433k
  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
166
1.49k
      ent->content[0] = 0;
167
1.49k
  }
168
169
433k
        diff = ctxt->nbentities - oldnbent + 1;
170
433k
        if (diff > INT_MAX / 2)
171
0
            diff = INT_MAX / 2;
172
433k
  ent->checked = diff * 2;
173
433k
  if (rep != NULL) {
174
432k
      if (xmlStrchr(rep, '<'))
175
8.35k
    ent->checked |= 1;
176
432k
      xmlFree(rep);
177
432k
      rep = NULL;
178
432k
  }
179
433k
    }
180
181
    /*
182
     * Prevent entity exponential check, not just replacement while
183
     * parsing the DTD
184
     * The check is potentially costly so do that only once in a thousand
185
     */
186
5.23M
    if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
187
5.23M
        (ctxt->nbentities % 1024 == 0)) {
188
0
  for (i = 0;i < ctxt->inputNr;i++) {
189
0
      consumed += ctxt->inputTab[i]->consumed +
190
0
                 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
191
0
  }
192
0
  if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
193
0
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
194
0
      ctxt->instate = XML_PARSER_EOF;
195
0
      return (1);
196
0
  }
197
0
  consumed = 0;
198
0
    }
199
200
201
202
5.23M
    if (replacement != 0) {
203
50.4k
  if (replacement < XML_MAX_TEXT_LENGTH)
204
50.4k
      return(0);
205
206
        /*
207
   * If the volume of entity copy reaches 10 times the
208
   * amount of parsed data and over the large text threshold
209
   * then that's very likely to be an abuse.
210
   */
211
0
        if (ctxt->input != NULL) {
212
0
      consumed = ctxt->input->consumed +
213
0
                 (ctxt->input->cur - ctxt->input->base);
214
0
  }
215
0
        consumed += ctxt->sizeentities;
216
217
0
        if (replacement < XML_PARSER_NON_LINEAR * consumed)
218
0
      return(0);
219
5.18M
    } else if (size != 0) {
220
        /*
221
         * Do the check based on the replacement size of the entity
222
         */
223
77.7k
        if (size < XML_PARSER_BIG_ENTITY)
224
73.9k
      return(0);
225
226
        /*
227
         * A limit on the amount of text data reasonably used
228
         */
229
3.75k
        if (ctxt->input != NULL) {
230
3.75k
            consumed = ctxt->input->consumed +
231
3.75k
                (ctxt->input->cur - ctxt->input->base);
232
3.75k
        }
233
3.75k
        consumed += ctxt->sizeentities;
234
235
3.75k
        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
236
3.75k
      (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
237
2.88k
            return (0);
238
5.10M
    } else if (ent != NULL) {
239
        /*
240
         * use the number of parsed entities in the replacement
241
         */
242
2.59M
        size = ent->checked / 2;
243
244
        /*
245
         * The amount of data parsed counting entities size only once
246
         */
247
2.59M
        if (ctxt->input != NULL) {
248
2.59M
            consumed = ctxt->input->consumed +
249
2.59M
                (ctxt->input->cur - ctxt->input->base);
250
2.59M
        }
251
2.59M
        consumed += ctxt->sizeentities;
252
253
        /*
254
         * Check the density of entities for the amount of data
255
   * knowing an entity reference will take at least 3 bytes
256
         */
257
2.59M
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
258
2.59M
            return (0);
259
2.59M
    } else {
260
        /*
261
         * strange we got no data for checking
262
         */
263
2.50M
  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
264
2.50M
       (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
265
2.50M
      (ctxt->nbentities <= 10000))
266
2.40M
      return (0);
267
2.50M
    }
268
104k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
269
104k
    return (1);
270
5.23M
}
271
272
/**
273
 * xmlParserMaxDepth:
274
 *
275
 * arbitrary depth limit for the XML documents that we allow to
276
 * process. This is not a limitation of the parser but a safety
277
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
278
 * parser option.
279
 */
280
unsigned int xmlParserMaxDepth = 256;
281
282
283
284
#define SAX2 1
285
1.38G
#define XML_PARSER_BIG_BUFFER_SIZE 300
286
851M
#define XML_PARSER_BUFFER_SIZE 100
287
1.47M
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
288
289
/**
290
 * XML_PARSER_CHUNK_SIZE
291
 *
292
 * When calling GROW that's the minimal amount of data
293
 * the parser expected to have received. It is not a hard
294
 * limit but an optimization when reading strings like Names
295
 * It is not strictly needed as long as inputs available characters
296
 * are followed by 0, which should be provided by the I/O level
297
 */
298
230M
#define XML_PARSER_CHUNK_SIZE 100
299
300
/*
301
 * List of XML prefixed PI allowed by W3C specs
302
 */
303
304
static const char* const xmlW3CPIs[] = {
305
    "xml-stylesheet",
306
    "xml-model",
307
    NULL
308
};
309
310
311
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
312
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
313
                                              const xmlChar **str);
314
315
static xmlParserErrors
316
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
317
                xmlSAXHandlerPtr sax,
318
          void *user_data, int depth, const xmlChar *URL,
319
          const xmlChar *ID, xmlNodePtr *list);
320
321
static int
322
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
323
                          const char *encoding);
324
#ifdef LIBXML_LEGACY_ENABLED
325
static void
326
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
327
                      xmlNodePtr lastNode);
328
#endif /* LIBXML_LEGACY_ENABLED */
329
330
static xmlParserErrors
331
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
332
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
333
334
static int
335
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
336
337
/************************************************************************
338
 *                  *
339
 *    Some factorized error routines        *
340
 *                  *
341
 ************************************************************************/
342
343
/**
344
 * xmlErrAttributeDup:
345
 * @ctxt:  an XML parser context
346
 * @prefix:  the attribute prefix
347
 * @localname:  the attribute localname
348
 *
349
 * Handle a redefinition of attribute error
350
 */
351
static void
352
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
353
                   const xmlChar * localname)
354
63.7k
{
355
63.7k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
356
63.7k
        (ctxt->instate == XML_PARSER_EOF))
357
0
  return;
358
63.7k
    if (ctxt != NULL)
359
63.7k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
360
361
63.7k
    if (prefix == NULL)
362
57.7k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
363
57.7k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
364
57.7k
                        (const char *) localname, NULL, NULL, 0, 0,
365
57.7k
                        "Attribute %s redefined\n", localname);
366
6.03k
    else
367
6.03k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
368
6.03k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
369
6.03k
                        (const char *) prefix, (const char *) localname,
370
6.03k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
371
6.03k
                        localname);
372
63.7k
    if (ctxt != NULL) {
373
63.7k
  ctxt->wellFormed = 0;
374
63.7k
  if (ctxt->recovery == 0)
375
50.5k
      ctxt->disableSAX = 1;
376
63.7k
    }
377
63.7k
}
378
379
/**
380
 * xmlFatalErr:
381
 * @ctxt:  an XML parser context
382
 * @error:  the error number
383
 * @extra:  extra information string
384
 *
385
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
386
 */
387
static void
388
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
389
17.4M
{
390
17.4M
    const char *errmsg;
391
392
17.4M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393
17.4M
        (ctxt->instate == XML_PARSER_EOF))
394
757k
  return;
395
16.6M
    switch (error) {
396
1.13M
        case XML_ERR_INVALID_HEX_CHARREF:
397
1.13M
            errmsg = "CharRef: invalid hexadecimal value";
398
1.13M
            break;
399
1.35M
        case XML_ERR_INVALID_DEC_CHARREF:
400
1.35M
            errmsg = "CharRef: invalid decimal value";
401
1.35M
            break;
402
0
        case XML_ERR_INVALID_CHARREF:
403
0
            errmsg = "CharRef: invalid value";
404
0
            break;
405
354k
        case XML_ERR_INTERNAL_ERROR:
406
354k
            errmsg = "internal error";
407
354k
            break;
408
0
        case XML_ERR_PEREF_AT_EOF:
409
0
            errmsg = "PEReference at end of document";
410
0
            break;
411
0
        case XML_ERR_PEREF_IN_PROLOG:
412
0
            errmsg = "PEReference in prolog";
413
0
            break;
414
0
        case XML_ERR_PEREF_IN_EPILOG:
415
0
            errmsg = "PEReference in epilog";
416
0
            break;
417
0
        case XML_ERR_PEREF_NO_NAME:
418
0
            errmsg = "PEReference: no name";
419
0
            break;
420
52.6k
        case XML_ERR_PEREF_SEMICOL_MISSING:
421
52.6k
            errmsg = "PEReference: expecting ';'";
422
52.6k
            break;
423
956k
        case XML_ERR_ENTITY_LOOP:
424
956k
            errmsg = "Detected an entity reference loop";
425
956k
            break;
426
0
        case XML_ERR_ENTITY_NOT_STARTED:
427
0
            errmsg = "EntityValue: \" or ' expected";
428
0
            break;
429
1.15k
        case XML_ERR_ENTITY_PE_INTERNAL:
430
1.15k
            errmsg = "PEReferences forbidden in internal subset";
431
1.15k
            break;
432
3.79k
        case XML_ERR_ENTITY_NOT_FINISHED:
433
3.79k
            errmsg = "EntityValue: \" or ' expected";
434
3.79k
            break;
435
449k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
436
449k
            errmsg = "AttValue: \" or ' expected";
437
449k
            break;
438
1.70M
        case XML_ERR_LT_IN_ATTRIBUTE:
439
1.70M
            errmsg = "Unescaped '<' not allowed in attributes values";
440
1.70M
            break;
441
9.45k
        case XML_ERR_LITERAL_NOT_STARTED:
442
9.45k
            errmsg = "SystemLiteral \" or ' expected";
443
9.45k
            break;
444
15.1k
        case XML_ERR_LITERAL_NOT_FINISHED:
445
15.1k
            errmsg = "Unfinished System or Public ID \" or ' expected";
446
15.1k
            break;
447
1.06M
        case XML_ERR_MISPLACED_CDATA_END:
448
1.06M
            errmsg = "Sequence ']]>' not allowed in content";
449
1.06M
            break;
450
8.50k
        case XML_ERR_URI_REQUIRED:
451
8.50k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
452
8.50k
            break;
453
1.00k
        case XML_ERR_PUBID_REQUIRED:
454
1.00k
            errmsg = "PUBLIC, the Public Identifier is missing";
455
1.00k
            break;
456
757k
        case XML_ERR_HYPHEN_IN_COMMENT:
457
757k
            errmsg = "Comment must not contain '--' (double-hyphen)";
458
757k
            break;
459
133k
        case XML_ERR_PI_NOT_STARTED:
460
133k
            errmsg = "xmlParsePI : no target name";
461
133k
            break;
462
6.13k
        case XML_ERR_RESERVED_XML_NAME:
463
6.13k
            errmsg = "Invalid PI name";
464
6.13k
            break;
465
628
        case XML_ERR_NOTATION_NOT_STARTED:
466
628
            errmsg = "NOTATION: Name expected here";
467
628
            break;
468
4.60k
        case XML_ERR_NOTATION_NOT_FINISHED:
469
4.60k
            errmsg = "'>' required to close NOTATION declaration";
470
4.60k
            break;
471
12.0k
        case XML_ERR_VALUE_REQUIRED:
472
12.0k
            errmsg = "Entity value required";
473
12.0k
            break;
474
1.24k
        case XML_ERR_URI_FRAGMENT:
475
1.24k
            errmsg = "Fragment not allowed";
476
1.24k
            break;
477
17.3k
        case XML_ERR_ATTLIST_NOT_STARTED:
478
17.3k
            errmsg = "'(' required to start ATTLIST enumeration";
479
17.3k
            break;
480
730
        case XML_ERR_NMTOKEN_REQUIRED:
481
730
            errmsg = "NmToken expected in ATTLIST enumeration";
482
730
            break;
483
2.08k
        case XML_ERR_ATTLIST_NOT_FINISHED:
484
2.08k
            errmsg = "')' required to finish ATTLIST enumeration";
485
2.08k
            break;
486
3.40k
        case XML_ERR_MIXED_NOT_STARTED:
487
3.40k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
488
3.40k
            break;
489
0
        case XML_ERR_PCDATA_REQUIRED:
490
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
491
0
            break;
492
8.72k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
493
8.72k
            errmsg = "ContentDecl : Name or '(' expected";
494
8.72k
            break;
495
15.5k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
496
15.5k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
497
15.5k
            break;
498
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
499
0
            errmsg =
500
0
                "PEReference: forbidden within markup decl in internal subset";
501
0
            break;
502
1.77M
        case XML_ERR_GT_REQUIRED:
503
1.77M
            errmsg = "expected '>'";
504
1.77M
            break;
505
539
        case XML_ERR_CONDSEC_INVALID:
506
539
            errmsg = "XML conditional section '[' expected";
507
539
            break;
508
39.0k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
509
39.0k
            errmsg = "Content error in the external subset";
510
39.0k
            break;
511
2.87k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
512
2.87k
            errmsg =
513
2.87k
                "conditional section INCLUDE or IGNORE keyword expected";
514
2.87k
            break;
515
3.00k
        case XML_ERR_CONDSEC_NOT_FINISHED:
516
3.00k
            errmsg = "XML conditional section not closed";
517
3.00k
            break;
518
275
        case XML_ERR_XMLDECL_NOT_STARTED:
519
275
            errmsg = "Text declaration '<?xml' required";
520
275
            break;
521
169k
        case XML_ERR_XMLDECL_NOT_FINISHED:
522
169k
            errmsg = "parsing XML declaration: '?>' expected";
523
169k
            break;
524
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
525
0
            errmsg = "external parsed entities cannot be standalone";
526
0
            break;
527
1.97M
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
528
1.97M
            errmsg = "EntityRef: expecting ';'";
529
1.97M
            break;
530
124k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
531
124k
            errmsg = "DOCTYPE improperly terminated";
532
124k
            break;
533
0
        case XML_ERR_LTSLASH_REQUIRED:
534
0
            errmsg = "EndTag: '</' not found";
535
0
            break;
536
11.0k
        case XML_ERR_EQUAL_REQUIRED:
537
11.0k
            errmsg = "expected '='";
538
11.0k
            break;
539
36.3k
        case XML_ERR_STRING_NOT_CLOSED:
540
36.3k
            errmsg = "String not closed expecting \" or '";
541
36.3k
            break;
542
11.3k
        case XML_ERR_STRING_NOT_STARTED:
543
11.3k
            errmsg = "String not started expecting ' or \"";
544
11.3k
            break;
545
1.37k
        case XML_ERR_ENCODING_NAME:
546
1.37k
            errmsg = "Invalid XML encoding name";
547
1.37k
            break;
548
1.38k
        case XML_ERR_STANDALONE_VALUE:
549
1.38k
            errmsg = "standalone accepts only 'yes' or 'no'";
550
1.38k
            break;
551
55.3k
        case XML_ERR_DOCUMENT_EMPTY:
552
55.3k
            errmsg = "Document is empty";
553
55.3k
            break;
554
260k
        case XML_ERR_DOCUMENT_END:
555
260k
            errmsg = "Extra content at the end of the document";
556
260k
            break;
557
3.90M
        case XML_ERR_NOT_WELL_BALANCED:
558
3.90M
            errmsg = "chunk is not well balanced";
559
3.90M
            break;
560
0
        case XML_ERR_EXTRA_CONTENT:
561
0
            errmsg = "extra content at the end of well balanced chunk";
562
0
            break;
563
87.9k
        case XML_ERR_VERSION_MISSING:
564
87.9k
            errmsg = "Malformed declaration expecting version";
565
87.9k
            break;
566
242
        case XML_ERR_NAME_TOO_LONG:
567
242
            errmsg = "Name too long";
568
242
            break;
569
#if 0
570
        case:
571
            errmsg = "";
572
            break;
573
#endif
574
107k
        default:
575
107k
            errmsg = "Unregistered error message";
576
16.6M
    }
577
16.6M
    if (ctxt != NULL)
578
16.6M
  ctxt->errNo = error;
579
16.6M
    if (info == NULL) {
580
16.2M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581
16.2M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
582
16.2M
                        errmsg);
583
16.2M
    } else {
584
354k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
585
354k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
586
354k
                        errmsg, info);
587
354k
    }
588
16.6M
    if (ctxt != NULL) {
589
16.6M
  ctxt->wellFormed = 0;
590
16.6M
  if (ctxt->recovery == 0)
591
14.5M
      ctxt->disableSAX = 1;
592
16.6M
    }
593
16.6M
}
594
595
/**
596
 * xmlFatalErrMsg:
597
 * @ctxt:  an XML parser context
598
 * @error:  the error number
599
 * @msg:  the error message
600
 *
601
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
602
 */
603
static void LIBXML_ATTR_FORMAT(3,0)
604
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
605
               const char *msg)
606
162M
{
607
162M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
608
162M
        (ctxt->instate == XML_PARSER_EOF))
609
0
  return;
610
162M
    if (ctxt != NULL)
611
162M
  ctxt->errNo = error;
612
162M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
613
162M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
614
162M
    if (ctxt != NULL) {
615
162M
  ctxt->wellFormed = 0;
616
162M
  if (ctxt->recovery == 0)
617
141M
      ctxt->disableSAX = 1;
618
162M
    }
619
162M
}
620
621
/**
622
 * xmlWarningMsg:
623
 * @ctxt:  an XML parser context
624
 * @error:  the error number
625
 * @msg:  the error message
626
 * @str1:  extra data
627
 * @str2:  extra data
628
 *
629
 * Handle a warning.
630
 */
631
static void LIBXML_ATTR_FORMAT(3,0)
632
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633
              const char *msg, const xmlChar *str1, const xmlChar *str2)
634
228k
{
635
228k
    xmlStructuredErrorFunc schannel = NULL;
636
637
228k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638
228k
        (ctxt->instate == XML_PARSER_EOF))
639
0
  return;
640
228k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
641
228k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
642
155k
        schannel = ctxt->sax->serror;
643
228k
    if (ctxt != NULL) {
644
228k
        __xmlRaiseError(schannel,
645
228k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
646
228k
                    ctxt->userData,
647
228k
                    ctxt, NULL, XML_FROM_PARSER, error,
648
228k
                    XML_ERR_WARNING, NULL, 0,
649
228k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
650
228k
        msg, (const char *) str1, (const char *) str2);
651
228k
    } else {
652
0
        __xmlRaiseError(schannel, NULL, NULL,
653
0
                    ctxt, NULL, XML_FROM_PARSER, error,
654
0
                    XML_ERR_WARNING, NULL, 0,
655
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
656
0
        msg, (const char *) str1, (const char *) str2);
657
0
    }
658
228k
}
659
660
/**
661
 * xmlValidityError:
662
 * @ctxt:  an XML parser context
663
 * @error:  the error number
664
 * @msg:  the error message
665
 * @str1:  extra data
666
 *
667
 * Handle a validity error.
668
 */
669
static void LIBXML_ATTR_FORMAT(3,0)
670
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
671
              const char *msg, const xmlChar *str1, const xmlChar *str2)
672
17.9k
{
673
17.9k
    xmlStructuredErrorFunc schannel = NULL;
674
675
17.9k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676
17.9k
        (ctxt->instate == XML_PARSER_EOF))
677
0
  return;
678
17.9k
    if (ctxt != NULL) {
679
17.9k
  ctxt->errNo = error;
680
17.9k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
681
13.7k
      schannel = ctxt->sax->serror;
682
17.9k
    }
683
17.9k
    if (ctxt != NULL) {
684
17.9k
        __xmlRaiseError(schannel,
685
17.9k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
686
17.9k
                    ctxt, NULL, XML_FROM_DTD, error,
687
17.9k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
688
17.9k
        (const char *) str2, NULL, 0, 0,
689
17.9k
        msg, (const char *) str1, (const char *) str2);
690
17.9k
  ctxt->valid = 0;
691
17.9k
    } else {
692
0
        __xmlRaiseError(schannel, NULL, NULL,
693
0
                    ctxt, NULL, XML_FROM_DTD, error,
694
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
695
0
        (const char *) str2, NULL, 0, 0,
696
0
        msg, (const char *) str1, (const char *) str2);
697
0
    }
698
17.9k
}
699
700
/**
701
 * xmlFatalErrMsgInt:
702
 * @ctxt:  an XML parser context
703
 * @error:  the error number
704
 * @msg:  the error message
705
 * @val:  an integer value
706
 *
707
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
708
 */
709
static void LIBXML_ATTR_FORMAT(3,0)
710
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
711
                  const char *msg, int val)
712
161M
{
713
161M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714
161M
        (ctxt->instate == XML_PARSER_EOF))
715
0
  return;
716
161M
    if (ctxt != NULL)
717
161M
  ctxt->errNo = error;
718
161M
    __xmlRaiseError(NULL, NULL, NULL,
719
161M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
720
161M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
721
161M
    if (ctxt != NULL) {
722
161M
  ctxt->wellFormed = 0;
723
161M
  if (ctxt->recovery == 0)
724
154M
      ctxt->disableSAX = 1;
725
161M
    }
726
161M
}
727
728
/**
729
 * xmlFatalErrMsgStrIntStr:
730
 * @ctxt:  an XML parser context
731
 * @error:  the error number
732
 * @msg:  the error message
733
 * @str1:  an string info
734
 * @val:  an integer value
735
 * @str2:  an string info
736
 *
737
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
738
 */
739
static void LIBXML_ATTR_FORMAT(3,0)
740
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
741
                  const char *msg, const xmlChar *str1, int val,
742
      const xmlChar *str2)
743
23.5M
{
744
23.5M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
745
23.5M
        (ctxt->instate == XML_PARSER_EOF))
746
0
  return;
747
23.5M
    if (ctxt != NULL)
748
23.5M
  ctxt->errNo = error;
749
23.5M
    __xmlRaiseError(NULL, NULL, NULL,
750
23.5M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
751
23.5M
                    NULL, 0, (const char *) str1, (const char *) str2,
752
23.5M
        NULL, val, 0, msg, str1, val, str2);
753
23.5M
    if (ctxt != NULL) {
754
23.5M
  ctxt->wellFormed = 0;
755
23.5M
  if (ctxt->recovery == 0)
756
21.2M
      ctxt->disableSAX = 1;
757
23.5M
    }
758
23.5M
}
759
760
/**
761
 * xmlFatalErrMsgStr:
762
 * @ctxt:  an XML parser context
763
 * @error:  the error number
764
 * @msg:  the error message
765
 * @val:  a string value
766
 *
767
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
768
 */
769
static void LIBXML_ATTR_FORMAT(3,0)
770
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
771
                  const char *msg, const xmlChar * val)
772
32.3M
{
773
32.3M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
774
32.3M
        (ctxt->instate == XML_PARSER_EOF))
775
0
  return;
776
32.3M
    if (ctxt != NULL)
777
32.3M
  ctxt->errNo = error;
778
32.3M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
779
32.3M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
780
32.3M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
781
32.3M
                    val);
782
32.3M
    if (ctxt != NULL) {
783
32.3M
  ctxt->wellFormed = 0;
784
32.3M
  if (ctxt->recovery == 0)
785
28.4M
      ctxt->disableSAX = 1;
786
32.3M
    }
787
32.3M
}
788
789
/**
790
 * xmlErrMsgStr:
791
 * @ctxt:  an XML parser context
792
 * @error:  the error number
793
 * @msg:  the error message
794
 * @val:  a string value
795
 *
796
 * Handle a non fatal parser error
797
 */
798
static void LIBXML_ATTR_FORMAT(3,0)
799
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
800
                  const char *msg, const xmlChar * val)
801
129k
{
802
129k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
803
129k
        (ctxt->instate == XML_PARSER_EOF))
804
0
  return;
805
129k
    if (ctxt != NULL)
806
129k
  ctxt->errNo = error;
807
129k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
808
129k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
809
129k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
810
129k
                    val);
811
129k
}
812
813
/**
814
 * xmlNsErr:
815
 * @ctxt:  an XML parser context
816
 * @error:  the error number
817
 * @msg:  the message
818
 * @info1:  extra information string
819
 * @info2:  extra information string
820
 *
821
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
822
 */
823
static void LIBXML_ATTR_FORMAT(3,0)
824
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
825
         const char *msg,
826
         const xmlChar * info1, const xmlChar * info2,
827
         const xmlChar * info3)
828
1.32M
{
829
1.32M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
830
1.32M
        (ctxt->instate == XML_PARSER_EOF))
831
0
  return;
832
1.32M
    if (ctxt != NULL)
833
1.32M
  ctxt->errNo = error;
834
1.32M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
835
1.32M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
836
1.32M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
837
1.32M
                    info1, info2, info3);
838
1.32M
    if (ctxt != NULL)
839
1.32M
  ctxt->nsWellFormed = 0;
840
1.32M
}
841
842
/**
843
 * xmlNsWarn
844
 * @ctxt:  an XML parser context
845
 * @error:  the error number
846
 * @msg:  the message
847
 * @info1:  extra information string
848
 * @info2:  extra information string
849
 *
850
 * Handle a namespace warning error
851
 */
852
static void LIBXML_ATTR_FORMAT(3,0)
853
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
854
         const char *msg,
855
         const xmlChar * info1, const xmlChar * info2,
856
         const xmlChar * info3)
857
34.1k
{
858
34.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
859
34.1k
        (ctxt->instate == XML_PARSER_EOF))
860
0
  return;
861
34.1k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
862
34.1k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
863
34.1k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
864
34.1k
                    info1, info2, info3);
865
34.1k
}
866
867
/************************************************************************
868
 *                  *
869
 *    Library wide options          *
870
 *                  *
871
 ************************************************************************/
872
873
/**
874
  * xmlHasFeature:
875
  * @feature: the feature to be examined
876
  *
877
  * Examines if the library has been compiled with a given feature.
878
  *
879
  * Returns a non-zero value if the feature exist, otherwise zero.
880
  * Returns zero (0) if the feature does not exist or an unknown
881
  * unknown feature is requested, non-zero otherwise.
882
  */
883
int
884
xmlHasFeature(xmlFeature feature)
885
0
{
886
0
    switch (feature) {
887
0
  case XML_WITH_THREAD:
888
0
#ifdef LIBXML_THREAD_ENABLED
889
0
      return(1);
890
#else
891
      return(0);
892
#endif
893
0
        case XML_WITH_TREE:
894
0
#ifdef LIBXML_TREE_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_OUTPUT:
900
0
#ifdef LIBXML_OUTPUT_ENABLED
901
0
            return(1);
902
#else
903
            return(0);
904
#endif
905
0
        case XML_WITH_PUSH:
906
0
#ifdef LIBXML_PUSH_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_READER:
912
0
#ifdef LIBXML_READER_ENABLED
913
0
            return(1);
914
#else
915
            return(0);
916
#endif
917
0
        case XML_WITH_PATTERN:
918
0
#ifdef LIBXML_PATTERN_ENABLED
919
0
            return(1);
920
#else
921
            return(0);
922
#endif
923
0
        case XML_WITH_WRITER:
924
0
#ifdef LIBXML_WRITER_ENABLED
925
0
            return(1);
926
#else
927
            return(0);
928
#endif
929
0
        case XML_WITH_SAX1:
930
0
#ifdef LIBXML_SAX1_ENABLED
931
0
            return(1);
932
#else
933
            return(0);
934
#endif
935
0
        case XML_WITH_FTP:
936
#ifdef LIBXML_FTP_ENABLED
937
            return(1);
938
#else
939
0
            return(0);
940
0
#endif
941
0
        case XML_WITH_HTTP:
942
#ifdef LIBXML_HTTP_ENABLED
943
            return(1);
944
#else
945
0
            return(0);
946
0
#endif
947
0
        case XML_WITH_VALID:
948
0
#ifdef LIBXML_VALID_ENABLED
949
0
            return(1);
950
#else
951
            return(0);
952
#endif
953
0
        case XML_WITH_HTML:
954
0
#ifdef LIBXML_HTML_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_LEGACY:
960
#ifdef LIBXML_LEGACY_ENABLED
961
            return(1);
962
#else
963
0
            return(0);
964
0
#endif
965
0
        case XML_WITH_C14N:
966
0
#ifdef LIBXML_C14N_ENABLED
967
0
            return(1);
968
#else
969
            return(0);
970
#endif
971
0
        case XML_WITH_CATALOG:
972
0
#ifdef LIBXML_CATALOG_ENABLED
973
0
            return(1);
974
#else
975
            return(0);
976
#endif
977
0
        case XML_WITH_XPATH:
978
0
#ifdef LIBXML_XPATH_ENABLED
979
0
            return(1);
980
#else
981
            return(0);
982
#endif
983
0
        case XML_WITH_XPTR:
984
0
#ifdef LIBXML_XPTR_ENABLED
985
0
            return(1);
986
#else
987
            return(0);
988
#endif
989
0
        case XML_WITH_XINCLUDE:
990
0
#ifdef LIBXML_XINCLUDE_ENABLED
991
0
            return(1);
992
#else
993
            return(0);
994
#endif
995
0
        case XML_WITH_ICONV:
996
0
#ifdef LIBXML_ICONV_ENABLED
997
0
            return(1);
998
#else
999
            return(0);
1000
#endif
1001
0
        case XML_WITH_ISO8859X:
1002
0
#ifdef LIBXML_ISO8859X_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_UNICODE:
1008
0
#ifdef LIBXML_UNICODE_ENABLED
1009
0
            return(1);
1010
#else
1011
            return(0);
1012
#endif
1013
0
        case XML_WITH_REGEXP:
1014
0
#ifdef LIBXML_REGEXP_ENABLED
1015
0
            return(1);
1016
#else
1017
            return(0);
1018
#endif
1019
0
        case XML_WITH_AUTOMATA:
1020
0
#ifdef LIBXML_AUTOMATA_ENABLED
1021
0
            return(1);
1022
#else
1023
            return(0);
1024
#endif
1025
0
        case XML_WITH_EXPR:
1026
#ifdef LIBXML_EXPR_ENABLED
1027
            return(1);
1028
#else
1029
0
            return(0);
1030
0
#endif
1031
0
        case XML_WITH_SCHEMAS:
1032
0
#ifdef LIBXML_SCHEMAS_ENABLED
1033
0
            return(1);
1034
#else
1035
            return(0);
1036
#endif
1037
0
        case XML_WITH_SCHEMATRON:
1038
0
#ifdef LIBXML_SCHEMATRON_ENABLED
1039
0
            return(1);
1040
#else
1041
            return(0);
1042
#endif
1043
0
        case XML_WITH_MODULES:
1044
0
#ifdef LIBXML_MODULES_ENABLED
1045
0
            return(1);
1046
#else
1047
            return(0);
1048
#endif
1049
0
        case XML_WITH_DEBUG:
1050
#ifdef LIBXML_DEBUG_ENABLED
1051
            return(1);
1052
#else
1053
0
            return(0);
1054
0
#endif
1055
0
        case XML_WITH_DEBUG_MEM:
1056
#ifdef DEBUG_MEMORY_LOCATION
1057
            return(1);
1058
#else
1059
0
            return(0);
1060
0
#endif
1061
0
        case XML_WITH_DEBUG_RUN:
1062
0
            return(0);
1063
0
        case XML_WITH_ZLIB:
1064
0
#ifdef LIBXML_ZLIB_ENABLED
1065
0
            return(1);
1066
#else
1067
            return(0);
1068
#endif
1069
0
        case XML_WITH_LZMA:
1070
0
#ifdef LIBXML_LZMA_ENABLED
1071
0
            return(1);
1072
#else
1073
            return(0);
1074
#endif
1075
0
        case XML_WITH_ICU:
1076
#ifdef LIBXML_ICU_ENABLED
1077
            return(1);
1078
#else
1079
0
            return(0);
1080
0
#endif
1081
0
        default:
1082
0
      break;
1083
0
     }
1084
0
     return(0);
1085
0
}
1086
1087
/************************************************************************
1088
 *                  *
1089
 *    SAX2 defaulted attributes handling      *
1090
 *                  *
1091
 ************************************************************************/
1092
1093
/**
1094
 * xmlDetectSAX2:
1095
 * @ctxt:  an XML parser context
1096
 *
1097
 * Do the SAX2 detection and specific initialization
1098
 */
1099
static void
1100
8.12M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1101
8.12M
    xmlSAXHandlerPtr sax;
1102
1103
    /* Avoid unused variable warning if features are disabled. */
1104
8.12M
    (void) sax;
1105
1106
8.12M
    if (ctxt == NULL) return;
1107
8.12M
    sax = ctxt->sax;
1108
8.12M
#ifdef LIBXML_SAX1_ENABLED
1109
8.12M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1110
8.12M
        ((sax->startElementNs != NULL) ||
1111
1.80M
         (sax->endElementNs != NULL) ||
1112
1.80M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1113
1.80M
        ctxt->sax2 = 1;
1114
#else
1115
    ctxt->sax2 = 1;
1116
#endif /* LIBXML_SAX1_ENABLED */
1117
1118
8.12M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1119
8.12M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1120
8.12M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1121
8.12M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1122
8.12M
    (ctxt->str_xml_ns == NULL)) {
1123
0
        xmlErrMemory(ctxt, NULL);
1124
0
    }
1125
8.12M
}
1126
1127
typedef struct _xmlDefAttrs xmlDefAttrs;
1128
typedef xmlDefAttrs *xmlDefAttrsPtr;
1129
struct _xmlDefAttrs {
1130
    int nbAttrs;  /* number of defaulted attributes on that element */
1131
    int maxAttrs;       /* the size of the array */
1132
#if __STDC_VERSION__ >= 199901L
1133
    /* Using a C99 flexible array member avoids UBSan errors. */
1134
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1135
#else
1136
    const xmlChar *values[5];
1137
#endif
1138
};
1139
1140
/**
1141
 * xmlAttrNormalizeSpace:
1142
 * @src: the source string
1143
 * @dst: the target string
1144
 *
1145
 * Normalize the space in non CDATA attribute values:
1146
 * If the attribute type is not CDATA, then the XML processor MUST further
1147
 * process the normalized attribute value by discarding any leading and
1148
 * trailing space (#x20) characters, and by replacing sequences of space
1149
 * (#x20) characters by a single space (#x20) character.
1150
 * Note that the size of dst need to be at least src, and if one doesn't need
1151
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1152
 * passing src as dst is just fine.
1153
 *
1154
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1155
 *         is needed.
1156
 */
1157
static xmlChar *
1158
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1159
126k
{
1160
126k
    if ((src == NULL) || (dst == NULL))
1161
0
        return(NULL);
1162
1163
219k
    while (*src == 0x20) src++;
1164
3.75M
    while (*src != 0) {
1165
3.62M
  if (*src == 0x20) {
1166
502k
      while (*src == 0x20) src++;
1167
87.4k
      if (*src != 0)
1168
74.1k
    *dst++ = 0x20;
1169
3.54M
  } else {
1170
3.54M
      *dst++ = *src++;
1171
3.54M
  }
1172
3.62M
    }
1173
126k
    *dst = 0;
1174
126k
    if (dst == src)
1175
101k
       return(NULL);
1176
24.9k
    return(dst);
1177
126k
}
1178
1179
/**
1180
 * xmlAttrNormalizeSpace2:
1181
 * @src: the source string
1182
 *
1183
 * Normalize the space in non CDATA attribute values, a slightly more complex
1184
 * front end to avoid allocation problems when running on attribute values
1185
 * coming from the input.
1186
 *
1187
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1188
 *         is needed.
1189
 */
1190
static const xmlChar *
1191
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1192
83.9k
{
1193
83.9k
    int i;
1194
83.9k
    int remove_head = 0;
1195
83.9k
    int need_realloc = 0;
1196
83.9k
    const xmlChar *cur;
1197
1198
83.9k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1199
0
        return(NULL);
1200
83.9k
    i = *len;
1201
83.9k
    if (i <= 0)
1202
3.26k
        return(NULL);
1203
1204
80.7k
    cur = src;
1205
109k
    while (*cur == 0x20) {
1206
28.4k
        cur++;
1207
28.4k
  remove_head++;
1208
28.4k
    }
1209
1.54M
    while (*cur != 0) {
1210
1.47M
  if (*cur == 0x20) {
1211
78.4k
      cur++;
1212
78.4k
      if ((*cur == 0x20) || (*cur == 0)) {
1213
11.0k
          need_realloc = 1;
1214
11.0k
    break;
1215
11.0k
      }
1216
78.4k
  } else
1217
1.39M
      cur++;
1218
1.47M
    }
1219
80.7k
    if (need_realloc) {
1220
11.0k
        xmlChar *ret;
1221
1222
11.0k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1223
11.0k
  if (ret == NULL) {
1224
0
      xmlErrMemory(ctxt, NULL);
1225
0
      return(NULL);
1226
0
  }
1227
11.0k
  xmlAttrNormalizeSpace(ret, ret);
1228
11.0k
  *len = strlen((const char *)ret);
1229
11.0k
        return(ret);
1230
69.7k
    } else if (remove_head) {
1231
1.68k
        *len -= remove_head;
1232
1.68k
        memmove(src, src + remove_head, 1 + *len);
1233
1.68k
  return(src);
1234
1.68k
    }
1235
68.0k
    return(NULL);
1236
80.7k
}
1237
1238
/**
1239
 * xmlAddDefAttrs:
1240
 * @ctxt:  an XML parser context
1241
 * @fullname:  the element fullname
1242
 * @fullattr:  the attribute fullname
1243
 * @value:  the attribute value
1244
 *
1245
 * Add a defaulted attribute for an element
1246
 */
1247
static void
1248
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1249
               const xmlChar *fullname,
1250
               const xmlChar *fullattr,
1251
304k
               const xmlChar *value) {
1252
304k
    xmlDefAttrsPtr defaults;
1253
304k
    int len;
1254
304k
    const xmlChar *name;
1255
304k
    const xmlChar *prefix;
1256
1257
    /*
1258
     * Allows to detect attribute redefinitions
1259
     */
1260
304k
    if (ctxt->attsSpecial != NULL) {
1261
254k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1262
89.8k
      return;
1263
254k
    }
1264
1265
214k
    if (ctxt->attsDefault == NULL) {
1266
58.3k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1267
58.3k
  if (ctxt->attsDefault == NULL)
1268
0
      goto mem_error;
1269
58.3k
    }
1270
1271
    /*
1272
     * split the element name into prefix:localname , the string found
1273
     * are within the DTD and then not associated to namespace names.
1274
     */
1275
214k
    name = xmlSplitQName3(fullname, &len);
1276
214k
    if (name == NULL) {
1277
182k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1278
182k
  prefix = NULL;
1279
182k
    } else {
1280
32.1k
        name = xmlDictLookup(ctxt->dict, name, -1);
1281
32.1k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1282
32.1k
    }
1283
1284
    /*
1285
     * make sure there is some storage
1286
     */
1287
214k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1288
214k
    if (defaults == NULL) {
1289
119k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1290
119k
                     (4 * 5) * sizeof(const xmlChar *));
1291
119k
  if (defaults == NULL)
1292
0
      goto mem_error;
1293
119k
  defaults->nbAttrs = 0;
1294
119k
  defaults->maxAttrs = 4;
1295
119k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1296
119k
                          defaults, NULL) < 0) {
1297
0
      xmlFree(defaults);
1298
0
      goto mem_error;
1299
0
  }
1300
119k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1301
2.79k
        xmlDefAttrsPtr temp;
1302
1303
2.79k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1304
2.79k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1305
2.79k
  if (temp == NULL)
1306
0
      goto mem_error;
1307
2.79k
  defaults = temp;
1308
2.79k
  defaults->maxAttrs *= 2;
1309
2.79k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1310
2.79k
                          defaults, NULL) < 0) {
1311
0
      xmlFree(defaults);
1312
0
      goto mem_error;
1313
0
  }
1314
2.79k
    }
1315
1316
    /*
1317
     * Split the element name into prefix:localname , the string found
1318
     * are within the DTD and hen not associated to namespace names.
1319
     */
1320
214k
    name = xmlSplitQName3(fullattr, &len);
1321
214k
    if (name == NULL) {
1322
145k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1323
145k
  prefix = NULL;
1324
145k
    } else {
1325
68.5k
        name = xmlDictLookup(ctxt->dict, name, -1);
1326
68.5k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1327
68.5k
    }
1328
1329
214k
    defaults->values[5 * defaults->nbAttrs] = name;
1330
214k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1331
    /* intern the string and precompute the end */
1332
214k
    len = xmlStrlen(value);
1333
214k
    value = xmlDictLookup(ctxt->dict, value, len);
1334
214k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1335
214k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1336
214k
    if (ctxt->external)
1337
114k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1338
100k
    else
1339
100k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1340
214k
    defaults->nbAttrs++;
1341
1342
214k
    return;
1343
1344
0
mem_error:
1345
0
    xmlErrMemory(ctxt, NULL);
1346
0
    return;
1347
214k
}
1348
1349
/**
1350
 * xmlAddSpecialAttr:
1351
 * @ctxt:  an XML parser context
1352
 * @fullname:  the element fullname
1353
 * @fullattr:  the attribute fullname
1354
 * @type:  the attribute type
1355
 *
1356
 * Register this attribute type
1357
 */
1358
static void
1359
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1360
      const xmlChar *fullname,
1361
      const xmlChar *fullattr,
1362
      int type)
1363
2.72M
{
1364
2.72M
    if (ctxt->attsSpecial == NULL) {
1365
123k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1366
123k
  if (ctxt->attsSpecial == NULL)
1367
0
      goto mem_error;
1368
123k
    }
1369
1370
2.72M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1371
272k
        return;
1372
1373
2.45M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1374
2.45M
                     (void *) (ptrdiff_t) type);
1375
2.45M
    return;
1376
1377
0
mem_error:
1378
0
    xmlErrMemory(ctxt, NULL);
1379
0
    return;
1380
2.72M
}
1381
1382
/**
1383
 * xmlCleanSpecialAttrCallback:
1384
 *
1385
 * Removes CDATA attributes from the special attribute table
1386
 */
1387
static void
1388
xmlCleanSpecialAttrCallback(void *payload, void *data,
1389
                            const xmlChar *fullname, const xmlChar *fullattr,
1390
2.44M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1391
2.44M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1392
1393
2.44M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1394
846k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1395
846k
    }
1396
2.44M
}
1397
1398
/**
1399
 * xmlCleanSpecialAttr:
1400
 * @ctxt:  an XML parser context
1401
 *
1402
 * Trim the list of attributes defined to remove all those of type
1403
 * CDATA as they are not special. This call should be done when finishing
1404
 * to parse the DTD and before starting to parse the document root.
1405
 */
1406
static void
1407
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1408
476k
{
1409
476k
    if (ctxt->attsSpecial == NULL)
1410
356k
        return;
1411
1412
120k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1413
1414
120k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1415
19.4k
        xmlHashFree(ctxt->attsSpecial, NULL);
1416
19.4k
        ctxt->attsSpecial = NULL;
1417
19.4k
    }
1418
120k
    return;
1419
476k
}
1420
1421
/**
1422
 * xmlCheckLanguageID:
1423
 * @lang:  pointer to the string value
1424
 *
1425
 * Checks that the value conforms to the LanguageID production:
1426
 *
1427
 * NOTE: this is somewhat deprecated, those productions were removed from
1428
 *       the XML Second edition.
1429
 *
1430
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1431
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1432
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1433
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1434
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1435
 * [38] Subcode ::= ([a-z] | [A-Z])+
1436
 *
1437
 * The current REC reference the successors of RFC 1766, currently 5646
1438
 *
1439
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1440
 * langtag       = language
1441
 *                 ["-" script]
1442
 *                 ["-" region]
1443
 *                 *("-" variant)
1444
 *                 *("-" extension)
1445
 *                 ["-" privateuse]
1446
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1447
 *                 ["-" extlang]       ; sometimes followed by
1448
 *                                     ; extended language subtags
1449
 *               / 4ALPHA              ; or reserved for future use
1450
 *               / 5*8ALPHA            ; or registered language subtag
1451
 *
1452
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1453
 *                 *2("-" 3ALPHA)      ; permanently reserved
1454
 *
1455
 * script        = 4ALPHA              ; ISO 15924 code
1456
 *
1457
 * region        = 2ALPHA              ; ISO 3166-1 code
1458
 *               / 3DIGIT              ; UN M.49 code
1459
 *
1460
 * variant       = 5*8alphanum         ; registered variants
1461
 *               / (DIGIT 3alphanum)
1462
 *
1463
 * extension     = singleton 1*("-" (2*8alphanum))
1464
 *
1465
 *                                     ; Single alphanumerics
1466
 *                                     ; "x" reserved for private use
1467
 * singleton     = DIGIT               ; 0 - 9
1468
 *               / %x41-57             ; A - W
1469
 *               / %x59-5A             ; Y - Z
1470
 *               / %x61-77             ; a - w
1471
 *               / %x79-7A             ; y - z
1472
 *
1473
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1474
 * The parser below doesn't try to cope with extension or privateuse
1475
 * that could be added but that's not interoperable anyway
1476
 *
1477
 * Returns 1 if correct 0 otherwise
1478
 **/
1479
int
1480
xmlCheckLanguageID(const xmlChar * lang)
1481
38.2k
{
1482
38.2k
    const xmlChar *cur = lang, *nxt;
1483
1484
38.2k
    if (cur == NULL)
1485
1.02k
        return (0);
1486
37.2k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1487
37.2k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1488
37.2k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1489
37.2k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1490
        /*
1491
         * Still allow IANA code and user code which were coming
1492
         * from the previous version of the XML-1.0 specification
1493
         * it's deprecated but we should not fail
1494
         */
1495
1.44k
        cur += 2;
1496
16.8k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1497
16.8k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1498
15.4k
            cur++;
1499
1.44k
        return(cur[0] == 0);
1500
1.44k
    }
1501
35.8k
    nxt = cur;
1502
142k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1503
142k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1504
106k
           nxt++;
1505
35.8k
    if (nxt - cur >= 4) {
1506
        /*
1507
         * Reserved
1508
         */
1509
1.84k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1510
1.65k
            return(0);
1511
189
        return(1);
1512
1.84k
    }
1513
33.9k
    if (nxt - cur < 2)
1514
2.19k
        return(0);
1515
    /* we got an ISO 639 code */
1516
31.7k
    if (nxt[0] == 0)
1517
17.5k
        return(1);
1518
14.2k
    if (nxt[0] != '-')
1519
2.20k
        return(0);
1520
1521
11.9k
    nxt++;
1522
11.9k
    cur = nxt;
1523
    /* now we can have extlang or script or region or variant */
1524
11.9k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1525
2.11k
        goto region_m49;
1526
1527
53.9k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1528
53.9k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1529
44.0k
           nxt++;
1530
9.88k
    if (nxt - cur == 4)
1531
2.59k
        goto script;
1532
7.28k
    if (nxt - cur == 2)
1533
3.38k
        goto region;
1534
3.90k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535
372
        goto variant;
1536
3.52k
    if (nxt - cur != 3)
1537
1.46k
        return(0);
1538
    /* we parsed an extlang */
1539
2.06k
    if (nxt[0] == 0)
1540
15
        return(1);
1541
2.05k
    if (nxt[0] != '-')
1542
359
        return(0);
1543
1544
1.69k
    nxt++;
1545
1.69k
    cur = nxt;
1546
    /* now we can have script or region or variant */
1547
1.69k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1548
241
        goto region_m49;
1549
1550
27.1k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1551
27.1k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1552
25.6k
           nxt++;
1553
1.45k
    if (nxt - cur == 2)
1554
467
        goto region;
1555
985
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1556
375
        goto variant;
1557
610
    if (nxt - cur != 4)
1558
482
        return(0);
1559
    /* we parsed a script */
1560
2.72k
script:
1561
2.72k
    if (nxt[0] == 0)
1562
68
        return(1);
1563
2.65k
    if (nxt[0] != '-')
1564
861
        return(0);
1565
1566
1.79k
    nxt++;
1567
1.79k
    cur = nxt;
1568
    /* now we can have region or variant */
1569
1.79k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1570
367
        goto region_m49;
1571
1572
18.9k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1573
18.9k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1574
17.5k
           nxt++;
1575
1576
1.42k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1577
422
        goto variant;
1578
1.00k
    if (nxt - cur != 2)
1579
690
        return(0);
1580
    /* we parsed a region */
1581
5.70k
region:
1582
5.70k
    if (nxt[0] == 0)
1583
454
        return(1);
1584
5.24k
    if (nxt[0] != '-')
1585
2.75k
        return(0);
1586
1587
2.49k
    nxt++;
1588
2.49k
    cur = nxt;
1589
    /* now we can just have a variant */
1590
44.8k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1591
44.8k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1592
42.3k
           nxt++;
1593
1594
2.49k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1595
1.34k
        return(0);
1596
1597
    /* we parsed a variant */
1598
2.31k
variant:
1599
2.31k
    if (nxt[0] == 0)
1600
214
        return(1);
1601
2.09k
    if (nxt[0] != '-')
1602
1.15k
        return(0);
1603
    /* extensions and private use subtags not checked */
1604
943
    return (1);
1605
1606
2.72k
region_m49:
1607
2.72k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1608
2.72k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1609
1.53k
        nxt += 3;
1610
1.53k
        goto region;
1611
1.53k
    }
1612
1.18k
    return(0);
1613
2.72k
}
1614
1615
/************************************************************************
1616
 *                  *
1617
 *    Parser stacks related functions and macros    *
1618
 *                  *
1619
 ************************************************************************/
1620
1621
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1622
                                            const xmlChar ** str);
1623
1624
#ifdef SAX2
1625
/**
1626
 * nsPush:
1627
 * @ctxt:  an XML parser context
1628
 * @prefix:  the namespace prefix or NULL
1629
 * @URL:  the namespace name
1630
 *
1631
 * Pushes a new parser namespace on top of the ns stack
1632
 *
1633
 * Returns -1 in case of error, -2 if the namespace should be discarded
1634
 *     and the index in the stack otherwise.
1635
 */
1636
static int
1637
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1638
3.31M
{
1639
3.31M
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1640
258k
        int i;
1641
403k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1642
243k
      if (ctxt->nsTab[i] == prefix) {
1643
    /* in scope */
1644
98.7k
          if (ctxt->nsTab[i + 1] == URL)
1645
48.5k
        return(-2);
1646
    /* out of scope keep it */
1647
50.2k
    break;
1648
98.7k
      }
1649
243k
  }
1650
258k
    }
1651
3.26M
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1652
269k
  ctxt->nsMax = 10;
1653
269k
  ctxt->nsNr = 0;
1654
269k
  ctxt->nsTab = (const xmlChar **)
1655
269k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1656
269k
  if (ctxt->nsTab == NULL) {
1657
0
      xmlErrMemory(ctxt, NULL);
1658
0
      ctxt->nsMax = 0;
1659
0
            return (-1);
1660
0
  }
1661
2.99M
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1662
69.4k
        const xmlChar ** tmp;
1663
69.4k
        ctxt->nsMax *= 2;
1664
69.4k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1665
69.4k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1666
69.4k
        if (tmp == NULL) {
1667
0
            xmlErrMemory(ctxt, NULL);
1668
0
      ctxt->nsMax /= 2;
1669
0
            return (-1);
1670
0
        }
1671
69.4k
  ctxt->nsTab = tmp;
1672
69.4k
    }
1673
3.26M
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1674
3.26M
    ctxt->nsTab[ctxt->nsNr++] = URL;
1675
3.26M
    return (ctxt->nsNr);
1676
3.26M
}
1677
/**
1678
 * nsPop:
1679
 * @ctxt: an XML parser context
1680
 * @nr:  the number to pop
1681
 *
1682
 * Pops the top @nr parser prefix/namespace from the ns stack
1683
 *
1684
 * Returns the number of namespaces removed
1685
 */
1686
static int
1687
nsPop(xmlParserCtxtPtr ctxt, int nr)
1688
242k
{
1689
242k
    int i;
1690
1691
242k
    if (ctxt->nsTab == NULL) return(0);
1692
242k
    if (ctxt->nsNr < nr) {
1693
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1694
0
        nr = ctxt->nsNr;
1695
0
    }
1696
242k
    if (ctxt->nsNr <= 0)
1697
0
        return (0);
1698
1699
771k
    for (i = 0;i < nr;i++) {
1700
528k
         ctxt->nsNr--;
1701
528k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1702
528k
    }
1703
242k
    return(nr);
1704
242k
}
1705
#endif
1706
1707
static int
1708
360k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1709
360k
    const xmlChar **atts;
1710
360k
    int *attallocs;
1711
360k
    int maxatts;
1712
1713
360k
    if (ctxt->atts == NULL) {
1714
359k
  maxatts = 55; /* allow for 10 attrs by default */
1715
359k
  atts = (const xmlChar **)
1716
359k
         xmlMalloc(maxatts * sizeof(xmlChar *));
1717
359k
  if (atts == NULL) goto mem_error;
1718
359k
  ctxt->atts = atts;
1719
359k
  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1720
359k
  if (attallocs == NULL) goto mem_error;
1721
359k
  ctxt->attallocs = attallocs;
1722
359k
  ctxt->maxatts = maxatts;
1723
359k
    } else if (nr + 5 > ctxt->maxatts) {
1724
621
  maxatts = (nr + 5) * 2;
1725
621
  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1726
621
             maxatts * sizeof(const xmlChar *));
1727
621
  if (atts == NULL) goto mem_error;
1728
621
  ctxt->atts = atts;
1729
621
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1730
621
                               (maxatts / 5) * sizeof(int));
1731
621
  if (attallocs == NULL) goto mem_error;
1732
621
  ctxt->attallocs = attallocs;
1733
621
  ctxt->maxatts = maxatts;
1734
621
    }
1735
360k
    return(ctxt->maxatts);
1736
0
mem_error:
1737
0
    xmlErrMemory(ctxt, NULL);
1738
0
    return(-1);
1739
360k
}
1740
1741
/**
1742
 * inputPush:
1743
 * @ctxt:  an XML parser context
1744
 * @value:  the parser input
1745
 *
1746
 * Pushes a new parser input on top of the input stack
1747
 *
1748
 * Returns -1 in case of error, the index in the stack otherwise
1749
 */
1750
int
1751
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1752
9.07M
{
1753
9.07M
    if ((ctxt == NULL) || (value == NULL))
1754
0
        return(-1);
1755
9.07M
    if (ctxt->inputNr >= ctxt->inputMax) {
1756
6.73k
        ctxt->inputMax *= 2;
1757
6.73k
        ctxt->inputTab =
1758
6.73k
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1759
6.73k
                                             ctxt->inputMax *
1760
6.73k
                                             sizeof(ctxt->inputTab[0]));
1761
6.73k
        if (ctxt->inputTab == NULL) {
1762
0
            xmlErrMemory(ctxt, NULL);
1763
0
      ctxt->inputMax /= 2;
1764
0
            return (-1);
1765
0
        }
1766
6.73k
    }
1767
9.07M
    ctxt->inputTab[ctxt->inputNr] = value;
1768
9.07M
    ctxt->input = value;
1769
9.07M
    return (ctxt->inputNr++);
1770
9.07M
}
1771
/**
1772
 * inputPop:
1773
 * @ctxt: an XML parser context
1774
 *
1775
 * Pops the top parser input from the input stack
1776
 *
1777
 * Returns the input just removed
1778
 */
1779
xmlParserInputPtr
1780
inputPop(xmlParserCtxtPtr ctxt)
1781
24.0M
{
1782
24.0M
    xmlParserInputPtr ret;
1783
1784
24.0M
    if (ctxt == NULL)
1785
0
        return(NULL);
1786
24.0M
    if (ctxt->inputNr <= 0)
1787
15.1M
        return (NULL);
1788
8.98M
    ctxt->inputNr--;
1789
8.98M
    if (ctxt->inputNr > 0)
1790
1.70M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1791
7.27M
    else
1792
7.27M
        ctxt->input = NULL;
1793
8.98M
    ret = ctxt->inputTab[ctxt->inputNr];
1794
8.98M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1795
8.98M
    return (ret);
1796
24.0M
}
1797
/**
1798
 * nodePush:
1799
 * @ctxt:  an XML parser context
1800
 * @value:  the element node
1801
 *
1802
 * Pushes a new element node on top of the node stack
1803
 *
1804
 * Returns -1 in case of error, the index in the stack otherwise
1805
 */
1806
int
1807
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1808
35.9M
{
1809
35.9M
    if (ctxt == NULL) return(0);
1810
35.9M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1811
106k
        xmlNodePtr *tmp;
1812
1813
106k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1814
106k
                                      ctxt->nodeMax * 2 *
1815
106k
                                      sizeof(ctxt->nodeTab[0]));
1816
106k
        if (tmp == NULL) {
1817
0
            xmlErrMemory(ctxt, NULL);
1818
0
            return (-1);
1819
0
        }
1820
106k
        ctxt->nodeTab = tmp;
1821
106k
  ctxt->nodeMax *= 2;
1822
106k
    }
1823
35.9M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1824
35.9M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1825
30
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1826
30
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1827
30
        xmlParserMaxDepth);
1828
30
  xmlHaltParser(ctxt);
1829
30
  return(-1);
1830
30
    }
1831
35.9M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1832
35.9M
    ctxt->node = value;
1833
35.9M
    return (ctxt->nodeNr++);
1834
35.9M
}
1835
1836
/**
1837
 * nodePop:
1838
 * @ctxt: an XML parser context
1839
 *
1840
 * Pops the top element node from the node stack
1841
 *
1842
 * Returns the node just removed
1843
 */
1844
xmlNodePtr
1845
nodePop(xmlParserCtxtPtr ctxt)
1846
34.8M
{
1847
34.8M
    xmlNodePtr ret;
1848
1849
34.8M
    if (ctxt == NULL) return(NULL);
1850
34.8M
    if (ctxt->nodeNr <= 0)
1851
7.51M
        return (NULL);
1852
27.3M
    ctxt->nodeNr--;
1853
27.3M
    if (ctxt->nodeNr > 0)
1854
24.5M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1855
2.74M
    else
1856
2.74M
        ctxt->node = NULL;
1857
27.3M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1858
27.3M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1859
27.3M
    return (ret);
1860
34.8M
}
1861
1862
/**
1863
 * nameNsPush:
1864
 * @ctxt:  an XML parser context
1865
 * @value:  the element name
1866
 * @prefix:  the element prefix
1867
 * @URI:  the element namespace name
1868
 * @line:  the current line number for error messages
1869
 * @nsNr:  the number of namespaces pushed on the namespace table
1870
 *
1871
 * Pushes a new element name/prefix/URL on top of the name stack
1872
 *
1873
 * Returns -1 in case of error, the index in the stack otherwise
1874
 */
1875
static int
1876
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1877
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1878
183M
{
1879
183M
    xmlStartTag *tag;
1880
1881
183M
    if (ctxt->nameNr >= ctxt->nameMax) {
1882
642k
        const xmlChar * *tmp;
1883
642k
        xmlStartTag *tmp2;
1884
642k
        ctxt->nameMax *= 2;
1885
642k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1886
642k
                                    ctxt->nameMax *
1887
642k
                                    sizeof(ctxt->nameTab[0]));
1888
642k
        if (tmp == NULL) {
1889
0
      ctxt->nameMax /= 2;
1890
0
      goto mem_error;
1891
0
        }
1892
642k
  ctxt->nameTab = tmp;
1893
642k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1894
642k
                                    ctxt->nameMax *
1895
642k
                                    sizeof(ctxt->pushTab[0]));
1896
642k
        if (tmp2 == NULL) {
1897
0
      ctxt->nameMax /= 2;
1898
0
      goto mem_error;
1899
0
        }
1900
642k
  ctxt->pushTab = tmp2;
1901
183M
    } else if (ctxt->pushTab == NULL) {
1902
6.61M
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1903
6.61M
                                            sizeof(ctxt->pushTab[0]));
1904
6.61M
        if (ctxt->pushTab == NULL)
1905
0
            goto mem_error;
1906
6.61M
    }
1907
183M
    ctxt->nameTab[ctxt->nameNr] = value;
1908
183M
    ctxt->name = value;
1909
183M
    tag = &ctxt->pushTab[ctxt->nameNr];
1910
183M
    tag->prefix = prefix;
1911
183M
    tag->URI = URI;
1912
183M
    tag->line = line;
1913
183M
    tag->nsNr = nsNr;
1914
183M
    return (ctxt->nameNr++);
1915
0
mem_error:
1916
0
    xmlErrMemory(ctxt, NULL);
1917
0
    return (-1);
1918
183M
}
1919
#ifdef LIBXML_PUSH_ENABLED
1920
/**
1921
 * nameNsPop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element/prefix/URI name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
static const xmlChar *
1929
nameNsPop(xmlParserCtxtPtr ctxt)
1930
1.51M
{
1931
1.51M
    const xmlChar *ret;
1932
1933
1.51M
    if (ctxt->nameNr <= 0)
1934
0
        return (NULL);
1935
1.51M
    ctxt->nameNr--;
1936
1.51M
    if (ctxt->nameNr > 0)
1937
1.47M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
45.0k
    else
1939
45.0k
        ctxt->name = NULL;
1940
1.51M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
1.51M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
1.51M
    return (ret);
1943
1.51M
}
1944
#endif /* LIBXML_PUSH_ENABLED */
1945
1946
/**
1947
 * namePush:
1948
 * @ctxt:  an XML parser context
1949
 * @value:  the element name
1950
 *
1951
 * Pushes a new element name on top of the name stack
1952
 *
1953
 * Returns -1 in case of error, the index in the stack otherwise
1954
 */
1955
int
1956
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1957
0
{
1958
0
    if (ctxt == NULL) return (-1);
1959
1960
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1961
0
        const xmlChar * *tmp;
1962
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1963
0
                                    ctxt->nameMax * 2 *
1964
0
                                    sizeof(ctxt->nameTab[0]));
1965
0
        if (tmp == NULL) {
1966
0
      goto mem_error;
1967
0
        }
1968
0
  ctxt->nameTab = tmp;
1969
0
        ctxt->nameMax *= 2;
1970
0
    }
1971
0
    ctxt->nameTab[ctxt->nameNr] = value;
1972
0
    ctxt->name = value;
1973
0
    return (ctxt->nameNr++);
1974
0
mem_error:
1975
0
    xmlErrMemory(ctxt, NULL);
1976
0
    return (-1);
1977
0
}
1978
/**
1979
 * namePop:
1980
 * @ctxt: an XML parser context
1981
 *
1982
 * Pops the top element name from the name stack
1983
 *
1984
 * Returns the name just removed
1985
 */
1986
const xmlChar *
1987
namePop(xmlParserCtxtPtr ctxt)
1988
166M
{
1989
166M
    const xmlChar *ret;
1990
1991
166M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1992
0
        return (NULL);
1993
166M
    ctxt->nameNr--;
1994
166M
    if (ctxt->nameNr > 0)
1995
156M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1996
10.0M
    else
1997
10.0M
        ctxt->name = NULL;
1998
166M
    ret = ctxt->nameTab[ctxt->nameNr];
1999
166M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2000
166M
    return (ret);
2001
166M
}
2002
2003
276M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2004
276M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2005
758k
        int *tmp;
2006
2007
758k
  ctxt->spaceMax *= 2;
2008
758k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2009
758k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2010
758k
        if (tmp == NULL) {
2011
0
      xmlErrMemory(ctxt, NULL);
2012
0
      ctxt->spaceMax /=2;
2013
0
      return(-1);
2014
0
  }
2015
758k
  ctxt->spaceTab = tmp;
2016
758k
    }
2017
276M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2018
276M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2019
276M
    return(ctxt->spaceNr++);
2020
276M
}
2021
2022
261M
static int spacePop(xmlParserCtxtPtr ctxt) {
2023
261M
    int ret;
2024
261M
    if (ctxt->spaceNr <= 0) return(0);
2025
261M
    ctxt->spaceNr--;
2026
261M
    if (ctxt->spaceNr > 0)
2027
261M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2028
109k
    else
2029
109k
        ctxt->space = &ctxt->spaceTab[0];
2030
261M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2031
261M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2032
261M
    return(ret);
2033
261M
}
2034
2035
/*
2036
 * Macros for accessing the content. Those should be used only by the parser,
2037
 * and not exported.
2038
 *
2039
 * Dirty macros, i.e. one often need to make assumption on the context to
2040
 * use them
2041
 *
2042
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2043
 *           To be used with extreme caution since operations consuming
2044
 *           characters may move the input buffer to a different location !
2045
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2046
 *           This should be used internally by the parser
2047
 *           only to compare to ASCII values otherwise it would break when
2048
 *           running with UTF-8 encoding.
2049
 *   RAW     same as CUR but in the input buffer, bypass any token
2050
 *           extraction that may have been done
2051
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2052
 *           to compare on ASCII based substring.
2053
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2054
 *           strings without newlines within the parser.
2055
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2056
 *           defined char within the parser.
2057
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2058
 *
2059
 *   NEXT    Skip to the next character, this does the proper decoding
2060
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2061
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2062
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2063
 *           to the number of xmlChars used for the encoding [0-5].
2064
 *   CUR_SCHAR  same but operate on a string instead of the context
2065
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2066
 *            the index
2067
 *   GROW, SHRINK  handling of input buffers
2068
 */
2069
2070
3.44G
#define RAW (*ctxt->input->cur)
2071
270M
#define CUR (*ctxt->input->cur)
2072
1.89G
#define NXT(val) ctxt->input->cur[(val)]
2073
281M
#define CUR_PTR ctxt->input->cur
2074
3.22M
#define BASE_PTR ctxt->input->base
2075
2076
#define CMP4( s, c1, c2, c3, c4 ) \
2077
2.05G
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2078
1.03G
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2079
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2080
2.04G
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2081
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2082
2.02G
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2083
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2084
2.01G
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2085
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2086
2.00G
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2087
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2088
996M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2089
996M
    ((unsigned char *) s)[ 8 ] == c9 )
2090
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2091
324k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2092
324k
    ((unsigned char *) s)[ 9 ] == c10 )
2093
2094
280M
#define SKIP(val) do {             \
2095
280M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2096
280M
    if (*ctxt->input->cur == 0)           \
2097
280M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2098
280M
  } while (0)
2099
2100
875k
#define SKIPL(val) do {             \
2101
875k
    int skipl;                \
2102
20.2M
    for(skipl=0; skipl<val; skipl++) {         \
2103
19.3M
  if (*(ctxt->input->cur) == '\n') {       \
2104
204k
  ctxt->input->line++; ctxt->input->col = 1;      \
2105
19.1M
  } else ctxt->input->col++;         \
2106
19.3M
  ctxt->input->cur++;           \
2107
19.3M
    }                  \
2108
875k
    if (*ctxt->input->cur == 0)           \
2109
875k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2110
875k
  } while (0)
2111
2112
2.09G
#define SHRINK if ((ctxt->progressive == 0) &&       \
2113
2.09G
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2114
2.09G
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2115
2.09G
  xmlSHRINK (ctxt);
2116
2117
2.23M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2118
2.23M
    xmlParserInputShrink(ctxt->input);
2119
2.23M
    if (*ctxt->input->cur == 0)
2120
162k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2121
2.23M
}
2122
2123
4.33G
#define GROW if ((ctxt->progressive == 0) &&       \
2124
4.33G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2125
4.33G
  xmlGROW (ctxt);
2126
2127
215M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2128
215M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2129
215M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2130
2131
215M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2132
215M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2133
215M
         ((ctxt->input->buf) &&
2134
0
          (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2135
215M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2136
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2137
0
        xmlHaltParser(ctxt);
2138
0
  return;
2139
0
    }
2140
215M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2141
215M
    if ((ctxt->input->cur > ctxt->input->end) ||
2142
215M
        (ctxt->input->cur < ctxt->input->base)) {
2143
0
        xmlHaltParser(ctxt);
2144
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2145
0
  return;
2146
0
    }
2147
215M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2148
7.71M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2149
215M
}
2150
2151
720M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2152
2153
572M
#define NEXT xmlNextChar(ctxt)
2154
2155
486M
#define NEXT1 {               \
2156
486M
  ctxt->input->col++;           \
2157
486M
  ctxt->input->cur++;           \
2158
486M
  if (*ctxt->input->cur == 0)         \
2159
486M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2160
486M
    }
2161
2162
4.11G
#define NEXTL(l) do {             \
2163
4.11G
    if (*(ctxt->input->cur) == '\n') {         \
2164
76.7M
  ctxt->input->line++; ctxt->input->col = 1;      \
2165
4.04G
    } else ctxt->input->col++;           \
2166
4.11G
    ctxt->input->cur += l;        \
2167
4.11G
  } while (0)
2168
2169
4.29G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2170
793M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2171
2172
#define COPY_BUF(l,b,i,v)           \
2173
4.52G
    if (l == 1) b[i++] = v;           \
2174
4.52G
    else i += xmlCopyCharMultiByte(&b[i],v)
2175
2176
#define CUR_CONSUMED \
2177
2.28G
    (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2178
2179
/**
2180
 * xmlSkipBlankChars:
2181
 * @ctxt:  the XML parser context
2182
 *
2183
 * skip all blanks character found at that point in the input streams.
2184
 * It pops up finished entities in the process if allowable at that point.
2185
 *
2186
 * Returns the number of space chars skipped
2187
 */
2188
2189
int
2190
720M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2191
720M
    int res = 0;
2192
2193
    /*
2194
     * It's Okay to use CUR/NEXT here since all the blanks are on
2195
     * the ASCII range.
2196
     */
2197
720M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2198
720M
        (ctxt->instate == XML_PARSER_START)) {
2199
664M
  const xmlChar *cur;
2200
  /*
2201
   * if we are in the document content, go really fast
2202
   */
2203
664M
  cur = ctxt->input->cur;
2204
664M
  while (IS_BLANK_CH(*cur)) {
2205
184M
      if (*cur == '\n') {
2206
6.80M
    ctxt->input->line++; ctxt->input->col = 1;
2207
177M
      } else {
2208
177M
    ctxt->input->col++;
2209
177M
      }
2210
184M
      cur++;
2211
184M
      if (res < INT_MAX)
2212
184M
    res++;
2213
184M
      if (*cur == 0) {
2214
214k
    ctxt->input->cur = cur;
2215
214k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2216
214k
    cur = ctxt->input->cur;
2217
214k
      }
2218
184M
  }
2219
664M
  ctxt->input->cur = cur;
2220
664M
    } else {
2221
56.3M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2222
2223
233M
  while (1) {
2224
233M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2225
174M
    NEXT;
2226
174M
      } else if (CUR == '%') {
2227
                /*
2228
                 * Need to handle support of entities branching here
2229
                 */
2230
3.35M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2231
1.52M
                    break;
2232
1.82M
          xmlParsePEReference(ctxt);
2233
56.2M
            } else if (CUR == 0) {
2234
1.51M
                if (ctxt->inputNr <= 1)
2235
55.1k
                    break;
2236
1.46M
                xmlPopInput(ctxt);
2237
54.7M
            } else {
2238
54.7M
                break;
2239
54.7M
            }
2240
2241
            /*
2242
             * Also increase the counter when entering or exiting a PERef.
2243
             * The spec says: "When a parameter-entity reference is recognized
2244
             * in the DTD and included, its replacement text MUST be enlarged
2245
             * by the attachment of one leading and one following space (#x20)
2246
             * character."
2247
             */
2248
177M
      if (res < INT_MAX)
2249
177M
    res++;
2250
177M
        }
2251
56.3M
    }
2252
720M
    return(res);
2253
720M
}
2254
2255
/************************************************************************
2256
 *                  *
2257
 *    Commodity functions to handle entities      *
2258
 *                  *
2259
 ************************************************************************/
2260
2261
/**
2262
 * xmlPopInput:
2263
 * @ctxt:  an XML parser context
2264
 *
2265
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2266
 *          pop it and return the next char.
2267
 *
2268
 * Returns the current xmlChar in the parser context
2269
 */
2270
xmlChar
2271
1.47M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2272
1.47M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2273
1.47M
    if (xmlParserDebugEntities)
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Popping input %d\n", ctxt->inputNr);
2276
1.47M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2277
1.47M
        (ctxt->instate != XML_PARSER_EOF))
2278
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2279
0
                    "Unfinished entity outside the DTD");
2280
1.47M
    xmlFreeInputStream(inputPop(ctxt));
2281
1.47M
    if (*ctxt->input->cur == 0)
2282
343
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2283
1.47M
    return(CUR);
2284
1.47M
}
2285
2286
/**
2287
 * xmlPushInput:
2288
 * @ctxt:  an XML parser context
2289
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2290
 *
2291
 * xmlPushInput: switch to a new input stream which is stacked on top
2292
 *               of the previous one(s).
2293
 * Returns -1 in case of error or the index in the input stack
2294
 */
2295
int
2296
1.78M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2297
1.78M
    int ret;
2298
1.78M
    if (input == NULL) return(-1);
2299
2300
1.78M
    if (xmlParserDebugEntities) {
2301
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2302
0
      xmlGenericError(xmlGenericErrorContext,
2303
0
        "%s(%d): ", ctxt->input->filename,
2304
0
        ctxt->input->line);
2305
0
  xmlGenericError(xmlGenericErrorContext,
2306
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2307
0
    }
2308
1.78M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2309
1.78M
        (ctxt->inputNr > 1024)) {
2310
1.55k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2311
228k
        while (ctxt->inputNr > 1)
2312
226k
            xmlFreeInputStream(inputPop(ctxt));
2313
1.55k
  return(-1);
2314
1.55k
    }
2315
1.78M
    ret = inputPush(ctxt, input);
2316
1.78M
    if (ctxt->instate == XML_PARSER_EOF)
2317
0
        return(-1);
2318
1.78M
    GROW;
2319
1.78M
    return(ret);
2320
1.78M
}
2321
2322
/**
2323
 * xmlParseCharRef:
2324
 * @ctxt:  an XML parser context
2325
 *
2326
 * DEPRECATED: Internal function, don't use.
2327
 *
2328
 * parse Reference declarations
2329
 *
2330
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2331
 *                  '&#x' [0-9a-fA-F]+ ';'
2332
 *
2333
 * [ WFC: Legal Character ]
2334
 * Characters referred to using character references must match the
2335
 * production for Char.
2336
 *
2337
 * Returns the value parsed (as an int), 0 in case of error
2338
 */
2339
int
2340
36.1M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2341
36.1M
    int val = 0;
2342
36.1M
    int count = 0;
2343
2344
    /*
2345
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2346
     */
2347
36.1M
    if ((RAW == '&') && (NXT(1) == '#') &&
2348
36.1M
        (NXT(2) == 'x')) {
2349
9.89M
  SKIP(3);
2350
9.89M
  GROW;
2351
23.2M
  while (RAW != ';') { /* loop blocked by count */
2352
14.4M
      if (count++ > 20) {
2353
96.3k
    count = 0;
2354
96.3k
    GROW;
2355
96.3k
                if (ctxt->instate == XML_PARSER_EOF)
2356
0
                    return(0);
2357
96.3k
      }
2358
14.4M
      if ((RAW >= '0') && (RAW <= '9'))
2359
4.24M
          val = val * 16 + (CUR - '0');
2360
10.2M
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2361
5.71M
          val = val * 16 + (CUR - 'a') + 10;
2362
4.52M
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2363
3.39M
          val = val * 16 + (CUR - 'A') + 10;
2364
1.13M
      else {
2365
1.13M
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2366
1.13M
    val = 0;
2367
1.13M
    break;
2368
1.13M
      }
2369
13.3M
      if (val > 0x110000)
2370
1.67M
          val = 0x110000;
2371
2372
13.3M
      NEXT;
2373
13.3M
      count++;
2374
13.3M
  }
2375
9.89M
  if (RAW == ';') {
2376
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2377
8.75M
      ctxt->input->col++;
2378
8.75M
      ctxt->input->cur++;
2379
8.75M
  }
2380
26.2M
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2381
26.2M
  SKIP(2);
2382
26.2M
  GROW;
2383
100M
  while (RAW != ';') { /* loop blocked by count */
2384
75.2M
      if (count++ > 20) {
2385
164k
    count = 0;
2386
164k
    GROW;
2387
164k
                if (ctxt->instate == XML_PARSER_EOF)
2388
0
                    return(0);
2389
164k
      }
2390
75.2M
      if ((RAW >= '0') && (RAW <= '9'))
2391
73.9M
          val = val * 10 + (CUR - '0');
2392
1.35M
      else {
2393
1.35M
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2394
1.35M
    val = 0;
2395
1.35M
    break;
2396
1.35M
      }
2397
73.9M
      if (val > 0x110000)
2398
1.79M
          val = 0x110000;
2399
2400
73.9M
      NEXT;
2401
73.9M
      count++;
2402
73.9M
  }
2403
26.2M
  if (RAW == ';') {
2404
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2405
24.9M
      ctxt->input->col++;
2406
24.9M
      ctxt->input->cur++;
2407
24.9M
  }
2408
26.2M
    } else {
2409
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2410
0
    }
2411
2412
    /*
2413
     * [ WFC: Legal Character ]
2414
     * Characters referred to using character references must match the
2415
     * production for Char.
2416
     */
2417
36.1M
    if (val >= 0x110000) {
2418
25.3k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2419
25.3k
                "xmlParseCharRef: character reference out of bounds\n",
2420
25.3k
          val);
2421
36.1M
    } else if (IS_CHAR(val)) {
2422
33.5M
        return(val);
2423
33.5M
    } else {
2424
2.56M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2425
2.56M
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2426
2.56M
                    val);
2427
2.56M
    }
2428
2.59M
    return(0);
2429
36.1M
}
2430
2431
/**
2432
 * xmlParseStringCharRef:
2433
 * @ctxt:  an XML parser context
2434
 * @str:  a pointer to an index in the string
2435
 *
2436
 * parse Reference declarations, variant parsing from a string rather
2437
 * than an an input flow.
2438
 *
2439
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2440
 *                  '&#x' [0-9a-fA-F]+ ';'
2441
 *
2442
 * [ WFC: Legal Character ]
2443
 * Characters referred to using character references must match the
2444
 * production for Char.
2445
 *
2446
 * Returns the value parsed (as an int), 0 in case of error, str will be
2447
 *         updated to the current value of the index
2448
 */
2449
static int
2450
1.40M
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2451
1.40M
    const xmlChar *ptr;
2452
1.40M
    xmlChar cur;
2453
1.40M
    int val = 0;
2454
2455
1.40M
    if ((str == NULL) || (*str == NULL)) return(0);
2456
1.40M
    ptr = *str;
2457
1.40M
    cur = *ptr;
2458
1.40M
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2459
390k
  ptr += 3;
2460
390k
  cur = *ptr;
2461
935k
  while (cur != ';') { /* Non input consuming loop */
2462
547k
      if ((cur >= '0') && (cur <= '9'))
2463
163k
          val = val * 16 + (cur - '0');
2464
384k
      else if ((cur >= 'a') && (cur <= 'f'))
2465
278k
          val = val * 16 + (cur - 'a') + 10;
2466
105k
      else if ((cur >= 'A') && (cur <= 'F'))
2467
102k
          val = val * 16 + (cur - 'A') + 10;
2468
2.58k
      else {
2469
2.58k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2470
2.58k
    val = 0;
2471
2.58k
    break;
2472
2.58k
      }
2473
544k
      if (val > 0x110000)
2474
39.2k
          val = 0x110000;
2475
2476
544k
      ptr++;
2477
544k
      cur = *ptr;
2478
544k
  }
2479
390k
  if (cur == ';')
2480
387k
      ptr++;
2481
1.01M
    } else if  ((cur == '&') && (ptr[1] == '#')){
2482
1.01M
  ptr += 2;
2483
1.01M
  cur = *ptr;
2484
3.76M
  while (cur != ';') { /* Non input consuming loops */
2485
2.74M
      if ((cur >= '0') && (cur <= '9'))
2486
2.74M
          val = val * 10 + (cur - '0');
2487
4.08k
      else {
2488
4.08k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2489
4.08k
    val = 0;
2490
4.08k
    break;
2491
4.08k
      }
2492
2.74M
      if (val > 0x110000)
2493
54.3k
          val = 0x110000;
2494
2495
2.74M
      ptr++;
2496
2.74M
      cur = *ptr;
2497
2.74M
  }
2498
1.01M
  if (cur == ';')
2499
1.01M
      ptr++;
2500
1.01M
    } else {
2501
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2502
0
  return(0);
2503
0
    }
2504
1.40M
    *str = ptr;
2505
2506
    /*
2507
     * [ WFC: Legal Character ]
2508
     * Characters referred to using character references must match the
2509
     * production for Char.
2510
     */
2511
1.40M
    if (val >= 0x110000) {
2512
614
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2513
614
                "xmlParseStringCharRef: character reference out of bounds\n",
2514
614
                val);
2515
1.40M
    } else if (IS_CHAR(val)) {
2516
1.40M
        return(val);
2517
1.40M
    } else {
2518
7.46k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2519
7.46k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2520
7.46k
        val);
2521
7.46k
    }
2522
8.08k
    return(0);
2523
1.40M
}
2524
2525
/**
2526
 * xmlParserHandlePEReference:
2527
 * @ctxt:  the parser context
2528
 *
2529
 * [69] PEReference ::= '%' Name ';'
2530
 *
2531
 * [ WFC: No Recursion ]
2532
 * A parsed entity must not contain a recursive
2533
 * reference to itself, either directly or indirectly.
2534
 *
2535
 * [ WFC: Entity Declared ]
2536
 * In a document without any DTD, a document with only an internal DTD
2537
 * subset which contains no parameter entity references, or a document
2538
 * with "standalone='yes'", ...  ... The declaration of a parameter
2539
 * entity must precede any reference to it...
2540
 *
2541
 * [ VC: Entity Declared ]
2542
 * In a document with an external subset or external parameter entities
2543
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2544
 * must precede any reference to it...
2545
 *
2546
 * [ WFC: In DTD ]
2547
 * Parameter-entity references may only appear in the DTD.
2548
 * NOTE: misleading but this is handled.
2549
 *
2550
 * A PEReference may have been detected in the current input stream
2551
 * the handling is done accordingly to
2552
 *      http://www.w3.org/TR/REC-xml#entproc
2553
 * i.e.
2554
 *   - Included in literal in entity values
2555
 *   - Included as Parameter Entity reference within DTDs
2556
 */
2557
void
2558
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2559
0
    switch(ctxt->instate) {
2560
0
  case XML_PARSER_CDATA_SECTION:
2561
0
      return;
2562
0
        case XML_PARSER_COMMENT:
2563
0
      return;
2564
0
  case XML_PARSER_START_TAG:
2565
0
      return;
2566
0
  case XML_PARSER_END_TAG:
2567
0
      return;
2568
0
        case XML_PARSER_EOF:
2569
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2570
0
      return;
2571
0
        case XML_PARSER_PROLOG:
2572
0
  case XML_PARSER_START:
2573
0
  case XML_PARSER_MISC:
2574
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2575
0
      return;
2576
0
  case XML_PARSER_ENTITY_DECL:
2577
0
        case XML_PARSER_CONTENT:
2578
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2579
0
        case XML_PARSER_PI:
2580
0
  case XML_PARSER_SYSTEM_LITERAL:
2581
0
  case XML_PARSER_PUBLIC_LITERAL:
2582
      /* we just ignore it there */
2583
0
      return;
2584
0
        case XML_PARSER_EPILOG:
2585
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2586
0
      return;
2587
0
  case XML_PARSER_ENTITY_VALUE:
2588
      /*
2589
       * NOTE: in the case of entity values, we don't do the
2590
       *       substitution here since we need the literal
2591
       *       entity value to be able to save the internal
2592
       *       subset of the document.
2593
       *       This will be handled by xmlStringDecodeEntities
2594
       */
2595
0
      return;
2596
0
        case XML_PARSER_DTD:
2597
      /*
2598
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2599
       * In the internal DTD subset, parameter-entity references
2600
       * can occur only where markup declarations can occur, not
2601
       * within markup declarations.
2602
       * In that case this is handled in xmlParseMarkupDecl
2603
       */
2604
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2605
0
    return;
2606
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2607
0
    return;
2608
0
            break;
2609
0
        case XML_PARSER_IGNORE:
2610
0
            return;
2611
0
    }
2612
2613
0
    xmlParsePEReference(ctxt);
2614
0
}
2615
2616
/*
2617
 * Macro used to grow the current buffer.
2618
 * buffer##_size is expected to be a size_t
2619
 * mem_error: is expected to handle memory allocation failures
2620
 */
2621
1.50M
#define growBuffer(buffer, n) {           \
2622
1.50M
    xmlChar *tmp;             \
2623
1.50M
    size_t new_size = buffer##_size * 2 + n;                            \
2624
1.50M
    if (new_size < buffer##_size) goto mem_error;                       \
2625
1.50M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2626
1.50M
    if (tmp == NULL) goto mem_error;         \
2627
1.50M
    buffer = tmp;             \
2628
1.50M
    buffer##_size = new_size;                                           \
2629
1.50M
}
2630
2631
/**
2632
 * xmlStringLenDecodeEntities:
2633
 * @ctxt:  the parser context
2634
 * @str:  the input string
2635
 * @len: the string length
2636
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637
 * @end:  an end marker xmlChar, 0 if none
2638
 * @end2:  an end marker xmlChar, 0 if none
2639
 * @end3:  an end marker xmlChar, 0 if none
2640
 *
2641
 * Takes a entity string content and process to do the adequate substitutions.
2642
 *
2643
 * [67] Reference ::= EntityRef | CharRef
2644
 *
2645
 * [69] PEReference ::= '%' Name ';'
2646
 *
2647
 * Returns A newly allocated string with the substitution done. The caller
2648
 *      must deallocate it !
2649
 */
2650
xmlChar *
2651
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2652
10.7M
          int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2653
10.7M
    xmlChar *buffer = NULL;
2654
10.7M
    size_t buffer_size = 0;
2655
10.7M
    size_t nbchars = 0;
2656
2657
10.7M
    xmlChar *current = NULL;
2658
10.7M
    xmlChar *rep = NULL;
2659
10.7M
    const xmlChar *last;
2660
10.7M
    xmlEntityPtr ent;
2661
10.7M
    int c,l;
2662
2663
10.7M
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2664
0
  return(NULL);
2665
10.7M
    last = str + len;
2666
2667
10.7M
    if (((ctxt->depth > 40) &&
2668
10.7M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2669
10.7M
  (ctxt->depth > 1024)) {
2670
14.7k
  xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2671
14.7k
  return(NULL);
2672
14.7k
    }
2673
2674
    /*
2675
     * allocate a translation buffer.
2676
     */
2677
10.6M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2678
10.6M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2679
10.6M
    if (buffer == NULL) goto mem_error;
2680
2681
    /*
2682
     * OK loop until we reach one of the ending char or a size limit.
2683
     * we are operating on already parsed values.
2684
     */
2685
10.6M
    if (str < last)
2686
10.1M
  c = CUR_SCHAR(str, l);
2687
495k
    else
2688
495k
        c = 0;
2689
752M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2690
752M
           (c != end2) && (c != end3) &&
2691
752M
           (ctxt->instate != XML_PARSER_EOF)) {
2692
2693
742M
  if (c == 0) break;
2694
742M
        if ((c == '&') && (str[1] == '#')) {
2695
1.40M
      int val = xmlParseStringCharRef(ctxt, &str);
2696
1.40M
      if (val == 0)
2697
8.08k
                goto int_error;
2698
1.40M
      COPY_BUF(0,buffer,nbchars,val);
2699
1.40M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2700
58.1k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2701
58.1k
      }
2702
741M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2703
831k
      if (xmlParserDebugEntities)
2704
0
    xmlGenericError(xmlGenericErrorContext,
2705
0
      "String decoding Entity Reference: %.30s\n",
2706
0
      str);
2707
831k
      ent = xmlParseStringEntityRef(ctxt, &str);
2708
831k
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2709
831k
      if (ent != NULL)
2710
702k
          ctxt->nbentities += ent->checked / 2;
2711
831k
      if ((ent != NULL) &&
2712
831k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2713
60.5k
    if (ent->content != NULL) {
2714
60.5k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2715
60.5k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2716
536
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2717
536
        }
2718
60.5k
    } else {
2719
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2720
0
          "predefined entity has no content\n");
2721
0
                    goto int_error;
2722
0
    }
2723
771k
      } else if ((ent != NULL) && (ent->content != NULL)) {
2724
626k
    ctxt->depth++;
2725
626k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2726
626k
                            0, 0, 0);
2727
626k
    ctxt->depth--;
2728
626k
    if (rep == NULL) {
2729
362k
                    ent->content[0] = 0;
2730
362k
                    goto int_error;
2731
362k
                }
2732
2733
263k
                current = rep;
2734
9.27M
                while (*current != 0) { /* non input consuming loop */
2735
9.00M
                    buffer[nbchars++] = *current++;
2736
9.00M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2737
7.92k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2738
0
                            goto int_error;
2739
23.7k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2740
23.7k
                    }
2741
9.00M
                }
2742
263k
                xmlFree(rep);
2743
263k
                rep = NULL;
2744
263k
      } else if (ent != NULL) {
2745
15.9k
    int i = xmlStrlen(ent->name);
2746
15.9k
    const xmlChar *cur = ent->name;
2747
2748
15.9k
    buffer[nbchars++] = '&';
2749
15.9k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2750
32
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2751
32
    }
2752
35.7k
    for (;i > 0;i--)
2753
19.8k
        buffer[nbchars++] = *cur++;
2754
15.9k
    buffer[nbchars++] = ';';
2755
15.9k
      }
2756
740M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2757
3.13M
      if (xmlParserDebugEntities)
2758
0
    xmlGenericError(xmlGenericErrorContext,
2759
0
      "String decoding PE Reference: %.30s\n", str);
2760
3.13M
      ent = xmlParseStringPEReference(ctxt, &str);
2761
3.13M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2762
3.13M
      if (ent != NULL)
2763
1.21M
          ctxt->nbentities += ent->checked / 2;
2764
3.13M
      if (ent != NULL) {
2765
1.21M
                if (ent->content == NULL) {
2766
        /*
2767
         * Note: external parsed entities will not be loaded,
2768
         * it is not required for a non-validating parser to
2769
         * complete external PEReferences coming from the
2770
         * internal subset
2771
         */
2772
16.6k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2773
16.6k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2774
16.6k
      (ctxt->validate != 0)) {
2775
16.2k
      xmlLoadEntityContent(ctxt, ent);
2776
16.2k
        } else {
2777
381
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2778
381
      "not validating will not read content for PE entity %s\n",
2779
381
                          ent->name, NULL);
2780
381
        }
2781
16.6k
    }
2782
1.21M
    ctxt->depth++;
2783
1.21M
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2784
1.21M
                            0, 0, 0);
2785
1.21M
    ctxt->depth--;
2786
1.21M
    if (rep == NULL) {
2787
268k
                    if (ent->content != NULL)
2788
256k
                        ent->content[0] = 0;
2789
268k
                    goto int_error;
2790
268k
                }
2791
946k
                current = rep;
2792
45.0M
                while (*current != 0) { /* non input consuming loop */
2793
44.0M
                    buffer[nbchars++] = *current++;
2794
44.0M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2795
97.4k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2796
872
                            goto int_error;
2797
289k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798
289k
                    }
2799
44.0M
                }
2800
945k
                xmlFree(rep);
2801
945k
                rep = NULL;
2802
945k
      }
2803
737M
  } else {
2804
737M
      COPY_BUF(l,buffer,nbchars,c);
2805
737M
      str += l;
2806
737M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2807
1.34M
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2808
1.34M
      }
2809
737M
  }
2810
741M
  if (str < last)
2811
732M
      c = CUR_SCHAR(str, l);
2812
9.55M
  else
2813
9.55M
      c = 0;
2814
741M
    }
2815
10.0M
    buffer[nbchars] = 0;
2816
10.0M
    return(buffer);
2817
2818
0
mem_error:
2819
0
    xmlErrMemory(ctxt, NULL);
2820
640k
int_error:
2821
640k
    if (rep != NULL)
2822
872
        xmlFree(rep);
2823
640k
    if (buffer != NULL)
2824
640k
        xmlFree(buffer);
2825
640k
    return(NULL);
2826
0
}
2827
2828
/**
2829
 * xmlStringDecodeEntities:
2830
 * @ctxt:  the parser context
2831
 * @str:  the input string
2832
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2833
 * @end:  an end marker xmlChar, 0 if none
2834
 * @end2:  an end marker xmlChar, 0 if none
2835
 * @end3:  an end marker xmlChar, 0 if none
2836
 *
2837
 * Takes a entity string content and process to do the adequate substitutions.
2838
 *
2839
 * [67] Reference ::= EntityRef | CharRef
2840
 *
2841
 * [69] PEReference ::= '%' Name ';'
2842
 *
2843
 * Returns A newly allocated string with the substitution done. The caller
2844
 *      must deallocate it !
2845
 */
2846
xmlChar *
2847
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2848
10.7M
            xmlChar end, xmlChar  end2, xmlChar end3) {
2849
10.7M
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2850
10.6M
    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2851
10.6M
           end, end2, end3));
2852
10.7M
}
2853
2854
/************************************************************************
2855
 *                  *
2856
 *    Commodity functions, cleanup needed ?     *
2857
 *                  *
2858
 ************************************************************************/
2859
2860
/**
2861
 * areBlanks:
2862
 * @ctxt:  an XML parser context
2863
 * @str:  a xmlChar *
2864
 * @len:  the size of @str
2865
 * @blank_chars: we know the chars are blanks
2866
 *
2867
 * Is this a sequence of blank chars that one can ignore ?
2868
 *
2869
 * Returns 1 if ignorable 0 otherwise.
2870
 */
2871
2872
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2873
142M
                     int blank_chars) {
2874
142M
    int i, ret;
2875
142M
    xmlNodePtr lastChild;
2876
2877
    /*
2878
     * Don't spend time trying to differentiate them, the same callback is
2879
     * used !
2880
     */
2881
142M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2882
969k
  return(0);
2883
2884
    /*
2885
     * Check for xml:space value.
2886
     */
2887
141M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2888
141M
        (*(ctxt->space) == -2))
2889
94.9M
  return(0);
2890
2891
    /*
2892
     * Check that the string is made of blanks
2893
     */
2894
46.7M
    if (blank_chars == 0) {
2895
99.8M
  for (i = 0;i < len;i++)
2896
86.9M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2897
26.6M
    }
2898
2899
    /*
2900
     * Look if the element is mixed content in the DTD if available
2901
     */
2902
32.9M
    if (ctxt->node == NULL) return(0);
2903
20.1M
    if (ctxt->myDoc != NULL) {
2904
20.1M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2905
20.1M
        if (ret == 0) return(1);
2906
19.7M
        if (ret == 1) return(0);
2907
19.7M
    }
2908
2909
    /*
2910
     * Otherwise, heuristic :-\
2911
     */
2912
19.7M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2913
17.9M
    if ((ctxt->node->children == NULL) &&
2914
17.9M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2915
2916
17.9M
    lastChild = xmlGetLastChild(ctxt->node);
2917
17.9M
    if (lastChild == NULL) {
2918
2.41M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2919
2.41M
            (ctxt->node->content != NULL)) return(0);
2920
15.5M
    } else if (xmlNodeIsText(lastChild))
2921
11.4M
        return(0);
2922
4.01M
    else if ((ctxt->node->children != NULL) &&
2923
4.01M
             (xmlNodeIsText(ctxt->node->children)))
2924
77.1k
        return(0);
2925
6.35M
    return(1);
2926
17.9M
}
2927
2928
/************************************************************************
2929
 *                  *
2930
 *    Extra stuff for namespace support     *
2931
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2932
 *                  *
2933
 ************************************************************************/
2934
2935
/**
2936
 * xmlSplitQName:
2937
 * @ctxt:  an XML parser context
2938
 * @name:  an XML parser context
2939
 * @prefix:  a xmlChar **
2940
 *
2941
 * parse an UTF8 encoded XML qualified name string
2942
 *
2943
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2944
 *
2945
 * [NS 6] Prefix ::= NCName
2946
 *
2947
 * [NS 7] LocalPart ::= NCName
2948
 *
2949
 * Returns the local part, and prefix is updated
2950
 *   to get the Prefix if any.
2951
 */
2952
2953
xmlChar *
2954
39.7M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2955
39.7M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2956
39.7M
    xmlChar *buffer = NULL;
2957
39.7M
    int len = 0;
2958
39.7M
    int max = XML_MAX_NAMELEN;
2959
39.7M
    xmlChar *ret = NULL;
2960
39.7M
    const xmlChar *cur = name;
2961
39.7M
    int c;
2962
2963
39.7M
    if (prefix == NULL) return(NULL);
2964
39.7M
    *prefix = NULL;
2965
2966
39.7M
    if (cur == NULL) return(NULL);
2967
2968
#ifndef XML_XML_NAMESPACE
2969
    /* xml: prefix is not really a namespace */
2970
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2971
        (cur[2] == 'l') && (cur[3] == ':'))
2972
  return(xmlStrdup(name));
2973
#endif
2974
2975
    /* nasty but well=formed */
2976
39.7M
    if (cur[0] == ':')
2977
9.30k
  return(xmlStrdup(name));
2978
2979
39.7M
    c = *cur++;
2980
162M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2981
123M
  buf[len++] = c;
2982
123M
  c = *cur++;
2983
123M
    }
2984
39.7M
    if (len >= max) {
2985
  /*
2986
   * Okay someone managed to make a huge name, so he's ready to pay
2987
   * for the processing speed.
2988
   */
2989
24.0k
  max = len * 2;
2990
2991
24.0k
  buffer = (xmlChar *) xmlMallocAtomic(max);
2992
24.0k
  if (buffer == NULL) {
2993
0
      xmlErrMemory(ctxt, NULL);
2994
0
      return(NULL);
2995
0
  }
2996
24.0k
  memcpy(buffer, buf, len);
2997
92.1M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2998
92.0M
      if (len + 10 > max) {
2999
65.0k
          xmlChar *tmp;
3000
3001
65.0k
    max *= 2;
3002
65.0k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3003
65.0k
    if (tmp == NULL) {
3004
0
        xmlFree(buffer);
3005
0
        xmlErrMemory(ctxt, NULL);
3006
0
        return(NULL);
3007
0
    }
3008
65.0k
    buffer = tmp;
3009
65.0k
      }
3010
92.0M
      buffer[len++] = c;
3011
92.0M
      c = *cur++;
3012
92.0M
  }
3013
24.0k
  buffer[len] = 0;
3014
24.0k
    }
3015
3016
39.7M
    if ((c == ':') && (*cur == 0)) {
3017
85.9k
        if (buffer != NULL)
3018
70
      xmlFree(buffer);
3019
85.9k
  *prefix = NULL;
3020
85.9k
  return(xmlStrdup(name));
3021
85.9k
    }
3022
3023
39.6M
    if (buffer == NULL)
3024
39.5M
  ret = xmlStrndup(buf, len);
3025
23.9k
    else {
3026
23.9k
  ret = buffer;
3027
23.9k
  buffer = NULL;
3028
23.9k
  max = XML_MAX_NAMELEN;
3029
23.9k
    }
3030
3031
3032
39.6M
    if (c == ':') {
3033
13.5M
  c = *cur;
3034
13.5M
        *prefix = ret;
3035
13.5M
  if (c == 0) {
3036
0
      return(xmlStrndup(BAD_CAST "", 0));
3037
0
  }
3038
13.5M
  len = 0;
3039
3040
  /*
3041
   * Check that the first character is proper to start
3042
   * a new name
3043
   */
3044
13.5M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3045
13.5M
        ((c >= 0x41) && (c <= 0x5A)) ||
3046
13.5M
        (c == '_') || (c == ':'))) {
3047
11.5k
      int l;
3048
11.5k
      int first = CUR_SCHAR(cur, l);
3049
3050
11.5k
      if (!IS_LETTER(first) && (first != '_')) {
3051
4.81k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3052
4.81k
          "Name %s is not XML Namespace compliant\n",
3053
4.81k
          name);
3054
4.81k
      }
3055
11.5k
  }
3056
13.5M
  cur++;
3057
3058
47.5M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3059
34.0M
      buf[len++] = c;
3060
34.0M
      c = *cur++;
3061
34.0M
  }
3062
13.5M
  if (len >= max) {
3063
      /*
3064
       * Okay someone managed to make a huge name, so he's ready to pay
3065
       * for the processing speed.
3066
       */
3067
18.1k
      max = len * 2;
3068
3069
18.1k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3070
18.1k
      if (buffer == NULL) {
3071
0
          xmlErrMemory(ctxt, NULL);
3072
0
    return(NULL);
3073
0
      }
3074
18.1k
      memcpy(buffer, buf, len);
3075
17.6M
      while (c != 0) { /* tested bigname2.xml */
3076
17.6M
    if (len + 10 > max) {
3077
32.3k
        xmlChar *tmp;
3078
3079
32.3k
        max *= 2;
3080
32.3k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3081
32.3k
        if (tmp == NULL) {
3082
0
      xmlErrMemory(ctxt, NULL);
3083
0
      xmlFree(buffer);
3084
0
      return(NULL);
3085
0
        }
3086
32.3k
        buffer = tmp;
3087
32.3k
    }
3088
17.6M
    buffer[len++] = c;
3089
17.6M
    c = *cur++;
3090
17.6M
      }
3091
18.1k
      buffer[len] = 0;
3092
18.1k
  }
3093
3094
13.5M
  if (buffer == NULL)
3095
13.4M
      ret = xmlStrndup(buf, len);
3096
18.1k
  else {
3097
18.1k
      ret = buffer;
3098
18.1k
  }
3099
13.5M
    }
3100
3101
39.6M
    return(ret);
3102
39.6M
}
3103
3104
/************************************************************************
3105
 *                  *
3106
 *      The parser itself       *
3107
 *  Relates to http://www.w3.org/TR/REC-xml       *
3108
 *                  *
3109
 ************************************************************************/
3110
3111
/************************************************************************
3112
 *                  *
3113
 *  Routines to parse Name, NCName and NmToken      *
3114
 *                  *
3115
 ************************************************************************/
3116
#ifdef DEBUG
3117
static unsigned long nbParseName = 0;
3118
static unsigned long nbParseNmToken = 0;
3119
static unsigned long nbParseNCName = 0;
3120
static unsigned long nbParseNCNameComplex = 0;
3121
static unsigned long nbParseNameComplex = 0;
3122
static unsigned long nbParseStringName = 0;
3123
#endif
3124
3125
/*
3126
 * The two following functions are related to the change of accepted
3127
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3128
 * They correspond to the modified production [4] and the new production [4a]
3129
 * changes in that revision. Also note that the macros used for the
3130
 * productions Letter, Digit, CombiningChar and Extender are not needed
3131
 * anymore.
3132
 * We still keep compatibility to pre-revision5 parsing semantic if the
3133
 * new XML_PARSE_OLD10 option is given to the parser.
3134
 */
3135
static int
3136
7.95M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3137
7.95M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3138
        /*
3139
   * Use the new checks of production [4] [4a] amd [5] of the
3140
   * Update 5 of XML-1.0
3141
   */
3142
5.79M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3143
5.79M
      (((c >= 'a') && (c <= 'z')) ||
3144
5.69M
       ((c >= 'A') && (c <= 'Z')) ||
3145
5.69M
       (c == '_') || (c == ':') ||
3146
5.69M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3147
5.69M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3148
5.69M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3149
5.69M
       ((c >= 0x370) && (c <= 0x37D)) ||
3150
5.69M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3151
5.69M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3152
5.69M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3153
5.69M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3154
5.69M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3155
5.69M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3156
5.69M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3157
5.69M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3158
2.42M
      return(1);
3159
5.79M
    } else {
3160
2.16M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3161
1.14M
      return(1);
3162
2.16M
    }
3163
4.38M
    return(0);
3164
7.95M
}
3165
3166
static int
3167
76.4M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3168
76.4M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3169
        /*
3170
   * Use the new checks of production [4] [4a] amd [5] of the
3171
   * Update 5 of XML-1.0
3172
   */
3173
57.2M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3174
57.2M
      (((c >= 'a') && (c <= 'z')) ||
3175
57.1M
       ((c >= 'A') && (c <= 'Z')) ||
3176
57.1M
       ((c >= '0') && (c <= '9')) || /* !start */
3177
57.1M
       (c == '_') || (c == ':') ||
3178
57.1M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3179
57.1M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3180
57.1M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3181
57.1M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3182
57.1M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3183
57.1M
       ((c >= 0x370) && (c <= 0x37D)) ||
3184
57.1M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3185
57.1M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3186
57.1M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3187
57.1M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3188
57.1M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189
57.1M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190
57.1M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191
57.1M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192
57.1M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3193
54.4M
       return(1);
3194
57.2M
    } else {
3195
19.1M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3196
19.1M
            (c == '.') || (c == '-') ||
3197
19.1M
      (c == '_') || (c == ':') ||
3198
19.1M
      (IS_COMBINING(c)) ||
3199
19.1M
      (IS_EXTENDER(c)))
3200
17.7M
      return(1);
3201
19.1M
    }
3202
4.25M
    return(0);
3203
76.4M
}
3204
3205
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3206
                                          int *len, int *alloc, int normalize);
3207
3208
static const xmlChar *
3209
129M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3210
129M
    int len = 0, l;
3211
129M
    int c;
3212
129M
    int count = 0;
3213
129M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3214
98.3M
                    XML_MAX_TEXT_LENGTH :
3215
129M
                    XML_MAX_NAME_LENGTH;
3216
3217
#ifdef DEBUG
3218
    nbParseNameComplex++;
3219
#endif
3220
3221
    /*
3222
     * Handler for more complex cases
3223
     */
3224
129M
    GROW;
3225
129M
    if (ctxt->instate == XML_PARSER_EOF)
3226
0
        return(NULL);
3227
129M
    c = CUR_CHAR(l);
3228
129M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3229
        /*
3230
   * Use the new checks of production [4] [4a] amd [5] of the
3231
   * Update 5 of XML-1.0
3232
   */
3233
47.8M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3234
47.8M
      (!(((c >= 'a') && (c <= 'z')) ||
3235
47.2M
         ((c >= 'A') && (c <= 'Z')) ||
3236
47.2M
         (c == '_') || (c == ':') ||
3237
47.2M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3238
47.2M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3239
47.2M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3240
47.2M
         ((c >= 0x370) && (c <= 0x37D)) ||
3241
47.2M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3242
47.2M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3243
47.2M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3244
47.2M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3245
47.2M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3246
47.2M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3247
47.2M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3248
47.2M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3249
46.3M
      return(NULL);
3250
46.3M
  }
3251
1.46M
  len += l;
3252
1.46M
  NEXTL(l);
3253
1.46M
  c = CUR_CHAR(l);
3254
101M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3255
101M
         (((c >= 'a') && (c <= 'z')) ||
3256
101M
          ((c >= 'A') && (c <= 'Z')) ||
3257
101M
          ((c >= '0') && (c <= '9')) || /* !start */
3258
101M
          (c == '_') || (c == ':') ||
3259
101M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3260
101M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3261
101M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3262
101M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3263
101M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3264
101M
          ((c >= 0x370) && (c <= 0x37D)) ||
3265
101M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3266
101M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3267
101M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3268
101M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3269
101M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270
101M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271
101M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272
101M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273
101M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3274
101M
    )) {
3275
100M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3276
864k
    count = 0;
3277
864k
    GROW;
3278
864k
                if (ctxt->instate == XML_PARSER_EOF)
3279
0
                    return(NULL);
3280
864k
      }
3281
100M
            if (len <= INT_MAX - l)
3282
100M
          len += l;
3283
100M
      NEXTL(l);
3284
100M
      c = CUR_CHAR(l);
3285
100M
  }
3286
81.5M
    } else {
3287
81.5M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3288
81.5M
      (!IS_LETTER(c) && (c != '_') &&
3289
80.5M
       (c != ':'))) {
3290
78.2M
      return(NULL);
3291
78.2M
  }
3292
3.29M
  len += l;
3293
3.29M
  NEXTL(l);
3294
3.29M
  c = CUR_CHAR(l);
3295
3296
77.4M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3297
77.4M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298
77.0M
    (c == '.') || (c == '-') ||
3299
77.0M
    (c == '_') || (c == ':') ||
3300
77.0M
    (IS_COMBINING(c)) ||
3301
77.0M
    (IS_EXTENDER(c)))) {
3302
74.1M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3303
567k
    count = 0;
3304
567k
    GROW;
3305
567k
                if (ctxt->instate == XML_PARSER_EOF)
3306
0
                    return(NULL);
3307
567k
      }
3308
74.1M
            if (len <= INT_MAX - l)
3309
74.1M
          len += l;
3310
74.1M
      NEXTL(l);
3311
74.1M
      c = CUR_CHAR(l);
3312
74.1M
  }
3313
3.29M
    }
3314
4.76M
    if (len > maxLength) {
3315
95
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316
95
        return(NULL);
3317
95
    }
3318
4.76M
    if (ctxt->input->cur - ctxt->input->base < len) {
3319
        /*
3320
         * There were a couple of bugs where PERefs lead to to a change
3321
         * of the buffer. Check the buffer size to avoid passing an invalid
3322
         * pointer to xmlDictLookup.
3323
         */
3324
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3325
0
                    "unexpected change of input buffer");
3326
0
        return (NULL);
3327
0
    }
3328
4.76M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3329
7.11k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3330
4.75M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3331
4.76M
}
3332
3333
/**
3334
 * xmlParseName:
3335
 * @ctxt:  an XML parser context
3336
 *
3337
 * DEPRECATED: Internal function, don't use.
3338
 *
3339
 * parse an XML name.
3340
 *
3341
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3342
 *                  CombiningChar | Extender
3343
 *
3344
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3345
 *
3346
 * [6] Names ::= Name (#x20 Name)*
3347
 *
3348
 * Returns the Name parsed or NULL
3349
 */
3350
3351
const xmlChar *
3352
498M
xmlParseName(xmlParserCtxtPtr ctxt) {
3353
498M
    const xmlChar *in;
3354
498M
    const xmlChar *ret;
3355
498M
    size_t count = 0;
3356
498M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3357
417M
                       XML_MAX_TEXT_LENGTH :
3358
498M
                       XML_MAX_NAME_LENGTH;
3359
3360
498M
    GROW;
3361
3362
#ifdef DEBUG
3363
    nbParseName++;
3364
#endif
3365
3366
    /*
3367
     * Accelerator for simple ASCII names
3368
     */
3369
498M
    in = ctxt->input->cur;
3370
498M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3371
498M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3372
498M
  (*in == '_') || (*in == ':')) {
3373
373M
  in++;
3374
1.78G
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3375
1.78G
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3376
1.78G
         ((*in >= 0x30) && (*in <= 0x39)) ||
3377
1.78G
         (*in == '_') || (*in == '-') ||
3378
1.78G
         (*in == ':') || (*in == '.'))
3379
1.41G
      in++;
3380
373M
  if ((*in > 0) && (*in < 0x80)) {
3381
368M
      count = in - ctxt->input->cur;
3382
368M
            if (count > maxLength) {
3383
10
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384
10
                return(NULL);
3385
10
            }
3386
368M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3387
368M
      ctxt->input->cur = in;
3388
368M
      ctxt->input->col += count;
3389
368M
      if (ret == NULL)
3390
0
          xmlErrMemory(ctxt, NULL);
3391
368M
      return(ret);
3392
368M
  }
3393
373M
    }
3394
    /* accelerator for special cases */
3395
129M
    return(xmlParseNameComplex(ctxt));
3396
498M
}
3397
3398
static const xmlChar *
3399
2.95M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3400
2.95M
    int len = 0, l;
3401
2.95M
    int c;
3402
2.95M
    int count = 0;
3403
2.95M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3404
700k
                    XML_MAX_TEXT_LENGTH :
3405
2.95M
                    XML_MAX_NAME_LENGTH;
3406
2.95M
    size_t startPosition = 0;
3407
3408
#ifdef DEBUG
3409
    nbParseNCNameComplex++;
3410
#endif
3411
3412
    /*
3413
     * Handler for more complex cases
3414
     */
3415
2.95M
    GROW;
3416
2.95M
    startPosition = CUR_PTR - BASE_PTR;
3417
2.95M
    c = CUR_CHAR(l);
3418
2.95M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
2.95M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
2.68M
  return(NULL);
3421
2.68M
    }
3422
3423
14.1M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
14.1M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
13.8M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3426
117k
      count = 0;
3427
117k
      GROW;
3428
117k
            if (ctxt->instate == XML_PARSER_EOF)
3429
0
                return(NULL);
3430
117k
  }
3431
13.8M
        if (len <= INT_MAX - l)
3432
13.8M
      len += l;
3433
13.8M
  NEXTL(l);
3434
13.8M
  c = CUR_CHAR(l);
3435
13.8M
  if (c == 0) {
3436
16.6k
      count = 0;
3437
      /*
3438
       * when shrinking to extend the buffer we really need to preserve
3439
       * the part of the name we already parsed. Hence rolling back
3440
       * by current length.
3441
       */
3442
16.6k
      ctxt->input->cur -= l;
3443
16.6k
      GROW;
3444
16.6k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
16.6k
      ctxt->input->cur += l;
3447
16.6k
      c = CUR_CHAR(l);
3448
16.6k
  }
3449
13.8M
    }
3450
272k
    if (len > maxLength) {
3451
77
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3452
77
        return(NULL);
3453
77
    }
3454
272k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3455
272k
}
3456
3457
/**
3458
 * xmlParseNCName:
3459
 * @ctxt:  an XML parser context
3460
 * @len:  length of the string parsed
3461
 *
3462
 * parse an XML name.
3463
 *
3464
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3465
 *                      CombiningChar | Extender
3466
 *
3467
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3468
 *
3469
 * Returns the Name parsed or NULL
3470
 */
3471
3472
static const xmlChar *
3473
31.2M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3474
31.2M
    const xmlChar *in, *e;
3475
31.2M
    const xmlChar *ret;
3476
31.2M
    size_t count = 0;
3477
31.2M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3478
10.6M
                       XML_MAX_TEXT_LENGTH :
3479
31.2M
                       XML_MAX_NAME_LENGTH;
3480
3481
#ifdef DEBUG
3482
    nbParseNCName++;
3483
#endif
3484
3485
    /*
3486
     * Accelerator for simple ASCII names
3487
     */
3488
31.2M
    in = ctxt->input->cur;
3489
31.2M
    e = ctxt->input->end;
3490
31.2M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3491
31.2M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3492
31.2M
   (*in == '_')) && (in < e)) {
3493
28.5M
  in++;
3494
122M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3495
122M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3496
122M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3497
122M
          (*in == '_') || (*in == '-') ||
3498
122M
          (*in == '.')) && (in < e))
3499
93.4M
      in++;
3500
28.5M
  if (in >= e)
3501
2.27k
      goto complex;
3502
28.5M
  if ((*in > 0) && (*in < 0x80)) {
3503
28.3M
      count = in - ctxt->input->cur;
3504
28.3M
            if (count > maxLength) {
3505
1
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3506
1
                return(NULL);
3507
1
            }
3508
28.3M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3509
28.3M
      ctxt->input->cur = in;
3510
28.3M
      ctxt->input->col += count;
3511
28.3M
      if (ret == NULL) {
3512
0
          xmlErrMemory(ctxt, NULL);
3513
0
      }
3514
28.3M
      return(ret);
3515
28.3M
  }
3516
28.5M
    }
3517
2.95M
complex:
3518
2.95M
    return(xmlParseNCNameComplex(ctxt));
3519
31.2M
}
3520
3521
/**
3522
 * xmlParseNameAndCompare:
3523
 * @ctxt:  an XML parser context
3524
 *
3525
 * parse an XML name and compares for match
3526
 * (specialized for endtag parsing)
3527
 *
3528
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3529
 * and the name for mismatch
3530
 */
3531
3532
static const xmlChar *
3533
99.3M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3534
99.3M
    register const xmlChar *cmp = other;
3535
99.3M
    register const xmlChar *in;
3536
99.3M
    const xmlChar *ret;
3537
3538
99.3M
    GROW;
3539
99.3M
    if (ctxt->instate == XML_PARSER_EOF)
3540
0
        return(NULL);
3541
3542
99.3M
    in = ctxt->input->cur;
3543
540M
    while (*in != 0 && *in == *cmp) {
3544
441M
  ++in;
3545
441M
  ++cmp;
3546
441M
    }
3547
99.3M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3548
  /* success */
3549
91.1M
  ctxt->input->col += in - ctxt->input->cur;
3550
91.1M
  ctxt->input->cur = in;
3551
91.1M
  return (const xmlChar*) 1;
3552
91.1M
    }
3553
    /* failure (or end of input buffer), check with full function */
3554
8.25M
    ret = xmlParseName (ctxt);
3555
    /* strings coming from the dictionary direct compare possible */
3556
8.25M
    if (ret == other) {
3557
126k
  return (const xmlChar*) 1;
3558
126k
    }
3559
8.12M
    return ret;
3560
8.25M
}
3561
3562
/**
3563
 * xmlParseStringName:
3564
 * @ctxt:  an XML parser context
3565
 * @str:  a pointer to the string pointer (IN/OUT)
3566
 *
3567
 * parse an XML name.
3568
 *
3569
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3570
 *                  CombiningChar | Extender
3571
 *
3572
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3573
 *
3574
 * [6] Names ::= Name (#x20 Name)*
3575
 *
3576
 * Returns the Name parsed or NULL. The @str pointer
3577
 * is updated to the current location in the string.
3578
 */
3579
3580
static xmlChar *
3581
5.06M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3582
5.06M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3583
5.06M
    const xmlChar *cur = *str;
3584
5.06M
    int len = 0, l;
3585
5.06M
    int c;
3586
5.06M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3587
1.73M
                    XML_MAX_TEXT_LENGTH :
3588
5.06M
                    XML_MAX_NAME_LENGTH;
3589
3590
#ifdef DEBUG
3591
    nbParseStringName++;
3592
#endif
3593
3594
5.06M
    c = CUR_SCHAR(cur, l);
3595
5.06M
    if (!xmlIsNameStartChar(ctxt, c)) {
3596
1.78M
  return(NULL);
3597
1.78M
    }
3598
3599
3.28M
    COPY_BUF(l,buf,len,c);
3600
3.28M
    cur += l;
3601
3.28M
    c = CUR_SCHAR(cur, l);
3602
32.0M
    while (xmlIsNameChar(ctxt, c)) {
3603
28.7M
  COPY_BUF(l,buf,len,c);
3604
28.7M
  cur += l;
3605
28.7M
  c = CUR_SCHAR(cur, l);
3606
28.7M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3607
      /*
3608
       * Okay someone managed to make a huge name, so he's ready to pay
3609
       * for the processing speed.
3610
       */
3611
24.6k
      xmlChar *buffer;
3612
24.6k
      int max = len * 2;
3613
3614
24.6k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3615
24.6k
      if (buffer == NULL) {
3616
0
          xmlErrMemory(ctxt, NULL);
3617
0
    return(NULL);
3618
0
      }
3619
24.6k
      memcpy(buffer, buf, len);
3620
14.1M
      while (xmlIsNameChar(ctxt, c)) {
3621
14.1M
    if (len + 10 > max) {
3622
27.7k
        xmlChar *tmp;
3623
3624
27.7k
        max *= 2;
3625
27.7k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3626
27.7k
        if (tmp == NULL) {
3627
0
      xmlErrMemory(ctxt, NULL);
3628
0
      xmlFree(buffer);
3629
0
      return(NULL);
3630
0
        }
3631
27.7k
        buffer = tmp;
3632
27.7k
    }
3633
14.1M
    COPY_BUF(l,buffer,len,c);
3634
14.1M
    cur += l;
3635
14.1M
    c = CUR_SCHAR(cur, l);
3636
14.1M
                if (len > maxLength) {
3637
9
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638
9
                    xmlFree(buffer);
3639
9
                    return(NULL);
3640
9
                }
3641
14.1M
      }
3642
24.6k
      buffer[len] = 0;
3643
24.6k
      *str = cur;
3644
24.6k
      return(buffer);
3645
24.6k
  }
3646
28.7M
    }
3647
3.26M
    if (len > maxLength) {
3648
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3649
0
        return(NULL);
3650
0
    }
3651
3.26M
    *str = cur;
3652
3.26M
    return(xmlStrndup(buf, len));
3653
3.26M
}
3654
3655
/**
3656
 * xmlParseNmtoken:
3657
 * @ctxt:  an XML parser context
3658
 *
3659
 * DEPRECATED: Internal function, don't use.
3660
 *
3661
 * parse an XML Nmtoken.
3662
 *
3663
 * [7] Nmtoken ::= (NameChar)+
3664
 *
3665
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3666
 *
3667
 * Returns the Nmtoken parsed or NULL
3668
 */
3669
3670
xmlChar *
3671
840k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3672
840k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3673
840k
    int len = 0, l;
3674
840k
    int c;
3675
840k
    int count = 0;
3676
840k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3677
231k
                    XML_MAX_TEXT_LENGTH :
3678
840k
                    XML_MAX_NAME_LENGTH;
3679
3680
#ifdef DEBUG
3681
    nbParseNmToken++;
3682
#endif
3683
3684
840k
    GROW;
3685
840k
    if (ctxt->instate == XML_PARSER_EOF)
3686
0
        return(NULL);
3687
840k
    c = CUR_CHAR(l);
3688
3689
5.17M
    while (xmlIsNameChar(ctxt, c)) {
3690
4.33M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3691
0
      count = 0;
3692
0
      GROW;
3693
0
  }
3694
4.33M
  COPY_BUF(l,buf,len,c);
3695
4.33M
  NEXTL(l);
3696
4.33M
  c = CUR_CHAR(l);
3697
4.33M
  if (c == 0) {
3698
742
      count = 0;
3699
742
      GROW;
3700
742
      if (ctxt->instate == XML_PARSER_EOF)
3701
0
    return(NULL);
3702
742
            c = CUR_CHAR(l);
3703
742
  }
3704
4.33M
  if (len >= XML_MAX_NAMELEN) {
3705
      /*
3706
       * Okay someone managed to make a huge token, so he's ready to pay
3707
       * for the processing speed.
3708
       */
3709
4.34k
      xmlChar *buffer;
3710
4.34k
      int max = len * 2;
3711
3712
4.34k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3713
4.34k
      if (buffer == NULL) {
3714
0
          xmlErrMemory(ctxt, NULL);
3715
0
    return(NULL);
3716
0
      }
3717
4.34k
      memcpy(buffer, buf, len);
3718
11.1M
      while (xmlIsNameChar(ctxt, c)) {
3719
11.1M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3720
111k
        count = 0;
3721
111k
        GROW;
3722
111k
                    if (ctxt->instate == XML_PARSER_EOF) {
3723
0
                        xmlFree(buffer);
3724
0
                        return(NULL);
3725
0
                    }
3726
111k
    }
3727
11.1M
    if (len + 10 > max) {
3728
9.96k
        xmlChar *tmp;
3729
3730
9.96k
        max *= 2;
3731
9.96k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3732
9.96k
        if (tmp == NULL) {
3733
0
      xmlErrMemory(ctxt, NULL);
3734
0
      xmlFree(buffer);
3735
0
      return(NULL);
3736
0
        }
3737
9.96k
        buffer = tmp;
3738
9.96k
    }
3739
11.1M
    COPY_BUF(l,buffer,len,c);
3740
11.1M
    NEXTL(l);
3741
11.1M
    c = CUR_CHAR(l);
3742
11.1M
                if (len > maxLength) {
3743
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3744
0
                    xmlFree(buffer);
3745
0
                    return(NULL);
3746
0
                }
3747
11.1M
      }
3748
4.34k
      buffer[len] = 0;
3749
4.34k
      return(buffer);
3750
4.34k
  }
3751
4.33M
    }
3752
835k
    if (len == 0)
3753
40.3k
        return(NULL);
3754
795k
    if (len > maxLength) {
3755
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3756
0
        return(NULL);
3757
0
    }
3758
795k
    return(xmlStrndup(buf, len));
3759
795k
}
3760
3761
/**
3762
 * xmlParseEntityValue:
3763
 * @ctxt:  an XML parser context
3764
 * @orig:  if non-NULL store a copy of the original entity value
3765
 *
3766
 * DEPRECATED: Internal function, don't use.
3767
 *
3768
 * parse a value for ENTITY declarations
3769
 *
3770
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3771
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3772
 *
3773
 * Returns the EntityValue parsed with reference substituted or NULL
3774
 */
3775
3776
xmlChar *
3777
2.15M
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3778
2.15M
    xmlChar *buf = NULL;
3779
2.15M
    int len = 0;
3780
2.15M
    int size = XML_PARSER_BUFFER_SIZE;
3781
2.15M
    int c, l;
3782
2.15M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3783
687k
                    XML_MAX_HUGE_LENGTH :
3784
2.15M
                    XML_MAX_TEXT_LENGTH;
3785
2.15M
    xmlChar stop;
3786
2.15M
    xmlChar *ret = NULL;
3787
2.15M
    const xmlChar *cur = NULL;
3788
2.15M
    xmlParserInputPtr input;
3789
3790
2.15M
    if (RAW == '"') stop = '"';
3791
546k
    else if (RAW == '\'') stop = '\'';
3792
0
    else {
3793
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3794
0
  return(NULL);
3795
0
    }
3796
2.15M
    buf = (xmlChar *) xmlMallocAtomic(size);
3797
2.15M
    if (buf == NULL) {
3798
0
  xmlErrMemory(ctxt, NULL);
3799
0
  return(NULL);
3800
0
    }
3801
3802
    /*
3803
     * The content of the entity definition is copied in a buffer.
3804
     */
3805
3806
2.15M
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3807
2.15M
    input = ctxt->input;
3808
2.15M
    GROW;
3809
2.15M
    if (ctxt->instate == XML_PARSER_EOF)
3810
0
        goto error;
3811
2.15M
    NEXT;
3812
2.15M
    c = CUR_CHAR(l);
3813
    /*
3814
     * NOTE: 4.4.5 Included in Literal
3815
     * When a parameter entity reference appears in a literal entity
3816
     * value, ... a single or double quote character in the replacement
3817
     * text is always treated as a normal data character and will not
3818
     * terminate the literal.
3819
     * In practice it means we stop the loop only when back at parsing
3820
     * the initial entity and the quote is found
3821
     */
3822
117M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3823
117M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3824
115M
  if (len + 5 >= size) {
3825
378k
      xmlChar *tmp;
3826
3827
378k
      size *= 2;
3828
378k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3829
378k
      if (tmp == NULL) {
3830
0
    xmlErrMemory(ctxt, NULL);
3831
0
                goto error;
3832
0
      }
3833
378k
      buf = tmp;
3834
378k
  }
3835
115M
  COPY_BUF(l,buf,len,c);
3836
115M
  NEXTL(l);
3837
3838
115M
  GROW;
3839
115M
  c = CUR_CHAR(l);
3840
115M
  if (c == 0) {
3841
2.06k
      GROW;
3842
2.06k
      c = CUR_CHAR(l);
3843
2.06k
  }
3844
3845
115M
        if (len > maxLength) {
3846
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3847
0
                           "entity value too long\n");
3848
0
            goto error;
3849
0
        }
3850
115M
    }
3851
2.15M
    buf[len] = 0;
3852
2.15M
    if (ctxt->instate == XML_PARSER_EOF)
3853
0
        goto error;
3854
2.15M
    if (c != stop) {
3855
3.79k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3856
3.79k
        goto error;
3857
3.79k
    }
3858
2.15M
    NEXT;
3859
3860
    /*
3861
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3862
     * reference constructs. Note Charref will be handled in
3863
     * xmlStringDecodeEntities()
3864
     */
3865
2.15M
    cur = buf;
3866
95.1M
    while (*cur != 0) { /* non input consuming */
3867
93.0M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3868
1.09M
      xmlChar *name;
3869
1.09M
      xmlChar tmp = *cur;
3870
1.09M
            int nameOk = 0;
3871
3872
1.09M
      cur++;
3873
1.09M
      name = xmlParseStringName(ctxt, &cur);
3874
1.09M
            if (name != NULL) {
3875
1.09M
                nameOk = 1;
3876
1.09M
                xmlFree(name);
3877
1.09M
            }
3878
1.09M
            if ((nameOk == 0) || (*cur != ';')) {
3879
16.6k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3880
16.6k
      "EntityValue: '%c' forbidden except for entities references\n",
3881
16.6k
                            tmp);
3882
16.6k
                goto error;
3883
16.6k
      }
3884
1.08M
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3885
1.08M
    (ctxt->inputNr == 1)) {
3886
1.15k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3887
1.15k
                goto error;
3888
1.15k
      }
3889
1.07M
      if (*cur == 0)
3890
0
          break;
3891
1.07M
  }
3892
92.9M
  cur++;
3893
92.9M
    }
3894
3895
    /*
3896
     * Then PEReference entities are substituted.
3897
     *
3898
     * NOTE: 4.4.7 Bypassed
3899
     * When a general entity reference appears in the EntityValue in
3900
     * an entity declaration, it is bypassed and left as is.
3901
     * so XML_SUBSTITUTE_REF is not set here.
3902
     */
3903
2.13M
    ++ctxt->depth;
3904
2.13M
    ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3905
2.13M
                                  0, 0, 0);
3906
2.13M
    --ctxt->depth;
3907
2.13M
    if (orig != NULL) {
3908
2.13M
        *orig = buf;
3909
2.13M
        buf = NULL;
3910
2.13M
    }
3911
3912
2.15M
error:
3913
2.15M
    if (buf != NULL)
3914
21.6k
        xmlFree(buf);
3915
2.15M
    return(ret);
3916
2.13M
}
3917
3918
/**
3919
 * xmlParseAttValueComplex:
3920
 * @ctxt:  an XML parser context
3921
 * @len:   the resulting attribute len
3922
 * @normalize:  whether to apply the inner normalization
3923
 *
3924
 * parse a value for an attribute, this is the fallback function
3925
 * of xmlParseAttValue() when the attribute parsing requires handling
3926
 * of non-ASCII characters, or normalization compaction.
3927
 *
3928
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3929
 */
3930
static xmlChar *
3931
9.33M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3932
9.33M
    xmlChar limit = 0;
3933
9.33M
    xmlChar *buf = NULL;
3934
9.33M
    xmlChar *rep = NULL;
3935
9.33M
    size_t len = 0;
3936
9.33M
    size_t buf_size = 0;
3937
9.33M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3938
8.08M
                       XML_MAX_HUGE_LENGTH :
3939
9.33M
                       XML_MAX_TEXT_LENGTH;
3940
9.33M
    int c, l, in_space = 0;
3941
9.33M
    xmlChar *current = NULL;
3942
9.33M
    xmlEntityPtr ent;
3943
3944
9.33M
    if (NXT(0) == '"') {
3945
6.14M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3946
6.14M
  limit = '"';
3947
6.14M
        NEXT;
3948
6.14M
    } else if (NXT(0) == '\'') {
3949
3.18M
  limit = '\'';
3950
3.18M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3951
3.18M
        NEXT;
3952
3.18M
    } else {
3953
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3954
0
  return(NULL);
3955
0
    }
3956
3957
    /*
3958
     * allocate a translation buffer.
3959
     */
3960
9.33M
    buf_size = XML_PARSER_BUFFER_SIZE;
3961
9.33M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3962
9.33M
    if (buf == NULL) goto mem_error;
3963
3964
    /*
3965
     * OK loop until we reach one of the ending char or a size limit.
3966
     */
3967
9.33M
    c = CUR_CHAR(l);
3968
382M
    while (((NXT(0) != limit) && /* checked */
3969
382M
            (IS_CHAR(c)) && (c != '<')) &&
3970
382M
            (ctxt->instate != XML_PARSER_EOF)) {
3971
373M
  if (c == '&') {
3972
14.8M
      in_space = 0;
3973
14.8M
      if (NXT(1) == '#') {
3974
5.47M
    int val = xmlParseCharRef(ctxt);
3975
3976
5.47M
    if (val == '&') {
3977
89.2k
        if (ctxt->replaceEntities) {
3978
6.31k
      if (len + 10 > buf_size) {
3979
304
          growBuffer(buf, 10);
3980
304
      }
3981
6.31k
      buf[len++] = '&';
3982
82.9k
        } else {
3983
      /*
3984
       * The reparsing will be done in xmlStringGetNodeList()
3985
       * called by the attribute() function in SAX.c
3986
       */
3987
82.9k
      if (len + 10 > buf_size) {
3988
394
          growBuffer(buf, 10);
3989
394
      }
3990
82.9k
      buf[len++] = '&';
3991
82.9k
      buf[len++] = '#';
3992
82.9k
      buf[len++] = '3';
3993
82.9k
      buf[len++] = '8';
3994
82.9k
      buf[len++] = ';';
3995
82.9k
        }
3996
5.38M
    } else if (val != 0) {
3997
4.57M
        if (len + 10 > buf_size) {
3998
5.66k
      growBuffer(buf, 10);
3999
5.66k
        }
4000
4.57M
        len += xmlCopyChar(0, &buf[len], val);
4001
4.57M
    }
4002
9.35M
      } else {
4003
9.35M
    ent = xmlParseEntityRef(ctxt);
4004
9.35M
    ctxt->nbentities++;
4005
9.35M
    if (ent != NULL)
4006
2.13M
        ctxt->nbentities += ent->owner;
4007
9.35M
    if ((ent != NULL) &&
4008
9.35M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4009
1.88M
        if (len + 10 > buf_size) {
4010
296
      growBuffer(buf, 10);
4011
296
        }
4012
1.88M
        if ((ctxt->replaceEntities == 0) &&
4013
1.88M
            (ent->content[0] == '&')) {
4014
752k
      buf[len++] = '&';
4015
752k
      buf[len++] = '#';
4016
752k
      buf[len++] = '3';
4017
752k
      buf[len++] = '8';
4018
752k
      buf[len++] = ';';
4019
1.13M
        } else {
4020
1.13M
      buf[len++] = ent->content[0];
4021
1.13M
        }
4022
7.47M
    } else if ((ent != NULL) &&
4023
7.47M
               (ctxt->replaceEntities != 0)) {
4024
81.6k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4025
81.6k
      ++ctxt->depth;
4026
81.6k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4027
81.6k
                  XML_SUBSTITUTE_REF,
4028
81.6k
                  0, 0, 0);
4029
81.6k
      --ctxt->depth;
4030
81.6k
      if (rep != NULL) {
4031
77.0k
          current = rep;
4032
8.21M
          while (*current != 0) { /* non input consuming */
4033
8.13M
                                if ((*current == 0xD) || (*current == 0xA) ||
4034
8.13M
                                    (*current == 0x9)) {
4035
104k
                                    buf[len++] = 0x20;
4036
104k
                                    current++;
4037
104k
                                } else
4038
8.03M
                                    buf[len++] = *current++;
4039
8.13M
        if (len + 10 > buf_size) {
4040
9.91k
            growBuffer(buf, 10);
4041
9.91k
        }
4042
8.13M
          }
4043
77.0k
          xmlFree(rep);
4044
77.0k
          rep = NULL;
4045
77.0k
      }
4046
81.6k
        } else {
4047
0
      if (len + 10 > buf_size) {
4048
0
          growBuffer(buf, 10);
4049
0
      }
4050
0
      if (ent->content != NULL)
4051
0
          buf[len++] = ent->content[0];
4052
0
        }
4053
7.38M
    } else if (ent != NULL) {
4054
165k
        int i = xmlStrlen(ent->name);
4055
165k
        const xmlChar *cur = ent->name;
4056
4057
        /*
4058
         * This may look absurd but is needed to detect
4059
         * entities problems
4060
         */
4061
165k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4062
165k
      (ent->content != NULL) && (ent->checked == 0)) {
4063
21.7k
      unsigned long oldnbent = ctxt->nbentities, diff;
4064
4065
21.7k
      ++ctxt->depth;
4066
21.7k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4067
21.7k
              XML_SUBSTITUTE_REF, 0, 0, 0);
4068
21.7k
      --ctxt->depth;
4069
4070
21.7k
                        diff = ctxt->nbentities - oldnbent + 1;
4071
21.7k
                        if (diff > INT_MAX / 2)
4072
0
                            diff = INT_MAX / 2;
4073
21.7k
                        ent->checked = diff * 2;
4074
21.7k
      if (rep != NULL) {
4075
21.4k
          if (xmlStrchr(rep, '<'))
4076
1.49k
              ent->checked |= 1;
4077
21.4k
          xmlFree(rep);
4078
21.4k
          rep = NULL;
4079
21.4k
      } else {
4080
309
                            ent->content[0] = 0;
4081
309
                        }
4082
21.7k
        }
4083
4084
        /*
4085
         * Just output the reference
4086
         */
4087
165k
        buf[len++] = '&';
4088
166k
        while (len + i + 10 > buf_size) {
4089
1.33k
      growBuffer(buf, i + 10);
4090
1.33k
        }
4091
924k
        for (;i > 0;i--)
4092
758k
      buf[len++] = *cur++;
4093
165k
        buf[len++] = ';';
4094
165k
    }
4095
9.35M
      }
4096
358M
  } else {
4097
358M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4098
30.7M
          if ((len != 0) || (!normalize)) {
4099
30.6M
        if ((!normalize) || (!in_space)) {
4100
30.1M
      COPY_BUF(l,buf,len,0x20);
4101
30.1M
      while (len + 10 > buf_size) {
4102
114k
          growBuffer(buf, 10);
4103
114k
      }
4104
30.1M
        }
4105
30.6M
        in_space = 1;
4106
30.6M
    }
4107
327M
      } else {
4108
327M
          in_space = 0;
4109
327M
    COPY_BUF(l,buf,len,c);
4110
327M
    if (len + 10 > buf_size) {
4111
1.25M
        growBuffer(buf, 10);
4112
1.25M
    }
4113
327M
      }
4114
358M
      NEXTL(l);
4115
358M
  }
4116
373M
  GROW;
4117
373M
  c = CUR_CHAR(l);
4118
373M
        if (len > maxLength) {
4119
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4120
0
                           "AttValue length too long\n");
4121
0
            goto mem_error;
4122
0
        }
4123
373M
    }
4124
9.33M
    if (ctxt->instate == XML_PARSER_EOF)
4125
0
        goto error;
4126
4127
9.33M
    if ((in_space) && (normalize)) {
4128
81.3k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4129
39.1k
    }
4130
9.33M
    buf[len] = 0;
4131
9.33M
    if (RAW == '<') {
4132
1.70M
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4133
7.62M
    } else if (RAW != limit) {
4134
873k
  if ((c != 0) && (!IS_CHAR(c))) {
4135
652k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4136
652k
         "invalid character in attribute value\n");
4137
652k
  } else {
4138
220k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4139
220k
         "AttValue: ' expected\n");
4140
220k
        }
4141
873k
    } else
4142
6.75M
  NEXT;
4143
4144
9.33M
    if (attlen != NULL) *attlen = len;
4145
9.33M
    return(buf);
4146
4147
0
mem_error:
4148
0
    xmlErrMemory(ctxt, NULL);
4149
0
error:
4150
0
    if (buf != NULL)
4151
0
        xmlFree(buf);
4152
0
    if (rep != NULL)
4153
0
        xmlFree(rep);
4154
0
    return(NULL);
4155
0
}
4156
4157
/**
4158
 * xmlParseAttValue:
4159
 * @ctxt:  an XML parser context
4160
 *
4161
 * DEPRECATED: Internal function, don't use.
4162
 *
4163
 * parse a value for an attribute
4164
 * Note: the parser won't do substitution of entities here, this
4165
 * will be handled later in xmlStringGetNodeList
4166
 *
4167
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4168
 *                   "'" ([^<&'] | Reference)* "'"
4169
 *
4170
 * 3.3.3 Attribute-Value Normalization:
4171
 * Before the value of an attribute is passed to the application or
4172
 * checked for validity, the XML processor must normalize it as follows:
4173
 * - a character reference is processed by appending the referenced
4174
 *   character to the attribute value
4175
 * - an entity reference is processed by recursively processing the
4176
 *   replacement text of the entity
4177
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4178
 *   appending #x20 to the normalized value, except that only a single
4179
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4180
 *   parsed entity or the literal entity value of an internal parsed entity
4181
 * - other characters are processed by appending them to the normalized value
4182
 * If the declared value is not CDATA, then the XML processor must further
4183
 * process the normalized attribute value by discarding any leading and
4184
 * trailing space (#x20) characters, and by replacing sequences of space
4185
 * (#x20) characters by a single space (#x20) character.
4186
 * All attributes for which no declaration has been read should be treated
4187
 * by a non-validating parser as if declared CDATA.
4188
 *
4189
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4190
 */
4191
4192
4193
xmlChar *
4194
132M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4195
132M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4196
132M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4197
132M
}
4198
4199
/**
4200
 * xmlParseSystemLiteral:
4201
 * @ctxt:  an XML parser context
4202
 *
4203
 * DEPRECATED: Internal function, don't use.
4204
 *
4205
 * parse an XML Literal
4206
 *
4207
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4208
 *
4209
 * Returns the SystemLiteral parsed or NULL
4210
 */
4211
4212
xmlChar *
4213
335k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4214
335k
    xmlChar *buf = NULL;
4215
335k
    int len = 0;
4216
335k
    int size = XML_PARSER_BUFFER_SIZE;
4217
335k
    int cur, l;
4218
335k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4219
72.6k
                    XML_MAX_TEXT_LENGTH :
4220
335k
                    XML_MAX_NAME_LENGTH;
4221
335k
    xmlChar stop;
4222
335k
    int state = ctxt->instate;
4223
335k
    int count = 0;
4224
4225
335k
    SHRINK;
4226
335k
    if (RAW == '"') {
4227
297k
        NEXT;
4228
297k
  stop = '"';
4229
297k
    } else if (RAW == '\'') {
4230
29.4k
        NEXT;
4231
29.4k
  stop = '\'';
4232
29.4k
    } else {
4233
8.45k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4234
8.45k
  return(NULL);
4235
8.45k
    }
4236
4237
326k
    buf = (xmlChar *) xmlMallocAtomic(size);
4238
326k
    if (buf == NULL) {
4239
0
        xmlErrMemory(ctxt, NULL);
4240
0
  return(NULL);
4241
0
    }
4242
326k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4243
326k
    cur = CUR_CHAR(l);
4244
13.3M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4245
12.9M
  if (len + 5 >= size) {
4246
16.7k
      xmlChar *tmp;
4247
4248
16.7k
      size *= 2;
4249
16.7k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4250
16.7k
      if (tmp == NULL) {
4251
0
          xmlFree(buf);
4252
0
    xmlErrMemory(ctxt, NULL);
4253
0
    ctxt->instate = (xmlParserInputState) state;
4254
0
    return(NULL);
4255
0
      }
4256
16.7k
      buf = tmp;
4257
16.7k
  }
4258
12.9M
  count++;
4259
12.9M
  if (count > 50) {
4260
149k
      SHRINK;
4261
149k
      GROW;
4262
149k
      count = 0;
4263
149k
            if (ctxt->instate == XML_PARSER_EOF) {
4264
0
          xmlFree(buf);
4265
0
    return(NULL);
4266
0
            }
4267
149k
  }
4268
12.9M
  COPY_BUF(l,buf,len,cur);
4269
12.9M
  NEXTL(l);
4270
12.9M
  cur = CUR_CHAR(l);
4271
12.9M
  if (cur == 0) {
4272
5.79k
      GROW;
4273
5.79k
      SHRINK;
4274
5.79k
      cur = CUR_CHAR(l);
4275
5.79k
  }
4276
12.9M
        if (len > maxLength) {
4277
50
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4278
50
            xmlFree(buf);
4279
50
            ctxt->instate = (xmlParserInputState) state;
4280
50
            return(NULL);
4281
50
        }
4282
12.9M
    }
4283
326k
    buf[len] = 0;
4284
326k
    ctxt->instate = (xmlParserInputState) state;
4285
326k
    if (!IS_CHAR(cur)) {
4286
7.42k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4287
319k
    } else {
4288
319k
  NEXT;
4289
319k
    }
4290
326k
    return(buf);
4291
326k
}
4292
4293
/**
4294
 * xmlParsePubidLiteral:
4295
 * @ctxt:  an XML parser context
4296
 *
4297
 * DEPRECATED: Internal function, don't use.
4298
 *
4299
 * parse an XML public literal
4300
 *
4301
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4302
 *
4303
 * Returns the PubidLiteral parsed or NULL.
4304
 */
4305
4306
xmlChar *
4307
102k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4308
102k
    xmlChar *buf = NULL;
4309
102k
    int len = 0;
4310
102k
    int size = XML_PARSER_BUFFER_SIZE;
4311
102k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4312
28.0k
                    XML_MAX_TEXT_LENGTH :
4313
102k
                    XML_MAX_NAME_LENGTH;
4314
102k
    xmlChar cur;
4315
102k
    xmlChar stop;
4316
102k
    int count = 0;
4317
102k
    xmlParserInputState oldstate = ctxt->instate;
4318
4319
102k
    SHRINK;
4320
102k
    if (RAW == '"') {
4321
80.8k
        NEXT;
4322
80.8k
  stop = '"';
4323
80.8k
    } else if (RAW == '\'') {
4324
21.0k
        NEXT;
4325
21.0k
  stop = '\'';
4326
21.0k
    } else {
4327
1.00k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4328
1.00k
  return(NULL);
4329
1.00k
    }
4330
101k
    buf = (xmlChar *) xmlMallocAtomic(size);
4331
101k
    if (buf == NULL) {
4332
0
  xmlErrMemory(ctxt, NULL);
4333
0
  return(NULL);
4334
0
    }
4335
101k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4336
101k
    cur = CUR;
4337
6.42M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4338
6.32M
  if (len + 1 >= size) {
4339
10.8k
      xmlChar *tmp;
4340
4341
10.8k
      size *= 2;
4342
10.8k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4343
10.8k
      if (tmp == NULL) {
4344
0
    xmlErrMemory(ctxt, NULL);
4345
0
    xmlFree(buf);
4346
0
    return(NULL);
4347
0
      }
4348
10.8k
      buf = tmp;
4349
10.8k
  }
4350
6.32M
  buf[len++] = cur;
4351
6.32M
  count++;
4352
6.32M
  if (count > 50) {
4353
76.9k
      SHRINK;
4354
76.9k
      GROW;
4355
76.9k
      count = 0;
4356
76.9k
            if (ctxt->instate == XML_PARSER_EOF) {
4357
0
    xmlFree(buf);
4358
0
    return(NULL);
4359
0
            }
4360
76.9k
  }
4361
6.32M
  NEXT;
4362
6.32M
  cur = CUR;
4363
6.32M
  if (cur == 0) {
4364
1.36k
      GROW;
4365
1.36k
      SHRINK;
4366
1.36k
      cur = CUR;
4367
1.36k
  }
4368
6.32M
        if (len > maxLength) {
4369
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4370
0
            xmlFree(buf);
4371
0
            return(NULL);
4372
0
        }
4373
6.32M
    }
4374
101k
    buf[len] = 0;
4375
101k
    if (cur != stop) {
4376
7.71k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4377
94.0k
    } else {
4378
94.0k
  NEXT;
4379
94.0k
    }
4380
101k
    ctxt->instate = oldstate;
4381
101k
    return(buf);
4382
101k
}
4383
4384
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4385
4386
/*
4387
 * used for the test in the inner loop of the char data testing
4388
 */
4389
static const unsigned char test_char_data[256] = {
4390
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4392
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4395
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4396
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4397
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4398
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4399
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4400
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4401
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4402
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4403
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4404
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4405
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4406
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4407
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4416
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4417
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4418
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4420
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4422
};
4423
4424
/**
4425
 * xmlParseCharData:
4426
 * @ctxt:  an XML parser context
4427
 * @cdata:  int indicating whether we are within a CDATA section
4428
 *
4429
 * DEPRECATED: Internal function, don't use.
4430
 *
4431
 * parse a CharData section.
4432
 * if we are within a CDATA section ']]>' marks an end of section.
4433
 *
4434
 * The right angle bracket (>) may be represented using the string "&gt;",
4435
 * and must, for compatibility, be escaped using "&gt;" or a character
4436
 * reference when it appears in the string "]]>" in content, when that
4437
 * string is not marking the end of a CDATA section.
4438
 *
4439
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4440
 */
4441
4442
void
4443
533M
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4444
533M
    const xmlChar *in;
4445
533M
    int nbchar = 0;
4446
533M
    int line = ctxt->input->line;
4447
533M
    int col = ctxt->input->col;
4448
533M
    int ccol;
4449
4450
533M
    SHRINK;
4451
533M
    GROW;
4452
    /*
4453
     * Accelerated common case where input don't need to be
4454
     * modified before passing it to the handler.
4455
     */
4456
533M
    if (!cdata) {
4457
533M
  in = ctxt->input->cur;
4458
577M
  do {
4459
699M
get_more_space:
4460
1.22G
      while (*in == 0x20) { in++; ctxt->input->col++; }
4461
699M
      if (*in == 0xA) {
4462
163M
    do {
4463
163M
        ctxt->input->line++; ctxt->input->col = 1;
4464
163M
        in++;
4465
163M
    } while (*in == 0xA);
4466
122M
    goto get_more_space;
4467
122M
      }
4468
577M
      if (*in == '<') {
4469
76.1M
    nbchar = in - ctxt->input->cur;
4470
76.1M
    if (nbchar > 0) {
4471
76.0M
        const xmlChar *tmp = ctxt->input->cur;
4472
76.0M
        ctxt->input->cur = in;
4473
4474
76.0M
        if ((ctxt->sax != NULL) &&
4475
76.0M
            (ctxt->sax->ignorableWhitespace !=
4476
76.0M
             ctxt->sax->characters)) {
4477
55.4M
      if (areBlanks(ctxt, tmp, nbchar, 1)) {
4478
4.51M
          if (ctxt->sax->ignorableWhitespace != NULL)
4479
4.51M
        ctxt->sax->ignorableWhitespace(ctxt->userData,
4480
4.51M
                   tmp, nbchar);
4481
50.9M
      } else {
4482
50.9M
          if (ctxt->sax->characters != NULL)
4483
50.9M
        ctxt->sax->characters(ctxt->userData,
4484
50.9M
                  tmp, nbchar);
4485
50.9M
          if (*ctxt->space == -1)
4486
15.5M
              *ctxt->space = -2;
4487
50.9M
      }
4488
55.4M
        } else if ((ctxt->sax != NULL) &&
4489
20.5M
                   (ctxt->sax->characters != NULL)) {
4490
20.5M
      ctxt->sax->characters(ctxt->userData,
4491
20.5M
                tmp, nbchar);
4492
20.5M
        }
4493
76.0M
    }
4494
76.1M
    return;
4495
76.1M
      }
4496
4497
649M
get_more:
4498
649M
            ccol = ctxt->input->col;
4499
8.18G
      while (test_char_data[*in]) {
4500
7.53G
    in++;
4501
7.53G
    ccol++;
4502
7.53G
      }
4503
649M
      ctxt->input->col = ccol;
4504
649M
      if (*in == 0xA) {
4505
145M
    do {
4506
145M
        ctxt->input->line++; ctxt->input->col = 1;
4507
145M
        in++;
4508
145M
    } while (*in == 0xA);
4509
115M
    goto get_more;
4510
115M
      }
4511
533M
      if (*in == ']') {
4512
33.0M
    if ((in[1] == ']') && (in[2] == '>')) {
4513
785k
        xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4514
785k
        ctxt->input->cur = in + 1;
4515
785k
        return;
4516
785k
    }
4517
32.2M
    in++;
4518
32.2M
    ctxt->input->col++;
4519
32.2M
    goto get_more;
4520
33.0M
      }
4521
500M
      nbchar = in - ctxt->input->cur;
4522
500M
      if (nbchar > 0) {
4523
301M
    if ((ctxt->sax != NULL) &&
4524
301M
        (ctxt->sax->ignorableWhitespace !=
4525
301M
         ctxt->sax->characters) &&
4526
301M
        (IS_BLANK_CH(*ctxt->input->cur))) {
4527
83.9M
        const xmlChar *tmp = ctxt->input->cur;
4528
83.9M
        ctxt->input->cur = in;
4529
4530
83.9M
        if (areBlanks(ctxt, tmp, nbchar, 0)) {
4531
2.29M
            if (ctxt->sax->ignorableWhitespace != NULL)
4532
2.29M
          ctxt->sax->ignorableWhitespace(ctxt->userData,
4533
2.29M
                 tmp, nbchar);
4534
81.6M
        } else {
4535
81.6M
            if (ctxt->sax->characters != NULL)
4536
81.6M
          ctxt->sax->characters(ctxt->userData,
4537
81.6M
              tmp, nbchar);
4538
81.6M
      if (*ctxt->space == -1)
4539
23.0M
          *ctxt->space = -2;
4540
81.6M
        }
4541
83.9M
                    line = ctxt->input->line;
4542
83.9M
                    col = ctxt->input->col;
4543
217M
    } else if (ctxt->sax != NULL) {
4544
217M
        if (ctxt->sax->characters != NULL)
4545
217M
      ctxt->sax->characters(ctxt->userData,
4546
217M
                ctxt->input->cur, nbchar);
4547
217M
                    line = ctxt->input->line;
4548
217M
                    col = ctxt->input->col;
4549
217M
    }
4550
                /* something really bad happened in the SAX callback */
4551
301M
                if (ctxt->instate != XML_PARSER_CONTENT)
4552
0
                    return;
4553
301M
      }
4554
500M
      ctxt->input->cur = in;
4555
500M
      if (*in == 0xD) {
4556
45.8M
    in++;
4557
45.8M
    if (*in == 0xA) {
4558
44.2M
        ctxt->input->cur = in;
4559
44.2M
        in++;
4560
44.2M
        ctxt->input->line++; ctxt->input->col = 1;
4561
44.2M
        continue; /* while */
4562
44.2M
    }
4563
1.52M
    in--;
4564
1.52M
      }
4565
455M
      if (*in == '<') {
4566
242M
    return;
4567
242M
      }
4568
213M
      if (*in == '&') {
4569
41.9M
    return;
4570
41.9M
      }
4571
171M
      SHRINK;
4572
171M
      GROW;
4573
171M
            if (ctxt->instate == XML_PARSER_EOF)
4574
0
    return;
4575
171M
      in = ctxt->input->cur;
4576
216M
  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4577
172M
  nbchar = 0;
4578
172M
    }
4579
172M
    ctxt->input->line = line;
4580
172M
    ctxt->input->col = col;
4581
172M
    xmlParseCharDataComplex(ctxt, cdata);
4582
172M
}
4583
4584
/**
4585
 * xmlParseCharDataComplex:
4586
 * @ctxt:  an XML parser context
4587
 * @cdata:  int indicating whether we are within a CDATA section
4588
 *
4589
 * parse a CharData section.this is the fallback function
4590
 * of xmlParseCharData() when the parsing requires handling
4591
 * of non-ASCII characters.
4592
 */
4593
static void
4594
172M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4595
172M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4596
172M
    int nbchar = 0;
4597
172M
    int cur, l;
4598
172M
    int count = 0;
4599
4600
172M
    SHRINK;
4601
172M
    GROW;
4602
172M
    cur = CUR_CHAR(l);
4603
1.52G
    while ((cur != '<') && /* checked */
4604
1.52G
           (cur != '&') &&
4605
1.52G
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4606
1.35G
  if ((cur == ']') && (NXT(1) == ']') &&
4607
1.35G
      (NXT(2) == '>')) {
4608
277k
      if (cdata) break;
4609
277k
      else {
4610
277k
    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4611
277k
      }
4612
277k
  }
4613
1.35G
  COPY_BUF(l,buf,nbchar,cur);
4614
  /* move current position before possible calling of ctxt->sax->characters */
4615
1.35G
  NEXTL(l);
4616
1.35G
  cur = CUR_CHAR(l);
4617
1.35G
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4618
2.95M
      buf[nbchar] = 0;
4619
4620
      /*
4621
       * OK the segment is to be consumed as chars.
4622
       */
4623
2.95M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4624
294k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4625
4.08k
        if (ctxt->sax->ignorableWhitespace != NULL)
4626
4.08k
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4627
4.08k
                                     buf, nbchar);
4628
290k
    } else {
4629
290k
        if (ctxt->sax->characters != NULL)
4630
290k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4631
290k
        if ((ctxt->sax->characters !=
4632
290k
             ctxt->sax->ignorableWhitespace) &&
4633
290k
      (*ctxt->space == -1))
4634
49.4k
      *ctxt->space = -2;
4635
290k
    }
4636
294k
      }
4637
2.95M
      nbchar = 0;
4638
            /* something really bad happened in the SAX callback */
4639
2.95M
            if (ctxt->instate != XML_PARSER_CONTENT)
4640
0
                return;
4641
2.95M
  }
4642
1.35G
  count++;
4643
1.35G
  if (count > 50) {
4644
22.2M
      SHRINK;
4645
22.2M
      GROW;
4646
22.2M
      count = 0;
4647
22.2M
            if (ctxt->instate == XML_PARSER_EOF)
4648
0
    return;
4649
22.2M
  }
4650
1.35G
    }
4651
172M
    if (nbchar != 0) {
4652
14.0M
        buf[nbchar] = 0;
4653
  /*
4654
   * OK the segment is to be consumed as chars.
4655
   */
4656
14.0M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4657
2.92M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4658
7.98k
    if (ctxt->sax->ignorableWhitespace != NULL)
4659
7.98k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4660
2.92M
      } else {
4661
2.92M
    if (ctxt->sax->characters != NULL)
4662
2.92M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4663
2.92M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4664
2.92M
        (*ctxt->space == -1))
4665
1.26M
        *ctxt->space = -2;
4666
2.92M
      }
4667
2.92M
  }
4668
14.0M
    }
4669
172M
    if ((cur != 0) && (!IS_CHAR(cur))) {
4670
  /* Generate the error and skip the offending character */
4671
157M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4672
157M
                          "PCDATA invalid Char value %d\n",
4673
157M
                    cur);
4674
157M
  NEXTL(l);
4675
157M
    }
4676
172M
}
4677
4678
/**
4679
 * xmlParseExternalID:
4680
 * @ctxt:  an XML parser context
4681
 * @publicID:  a xmlChar** receiving PubidLiteral
4682
 * @strict: indicate whether we should restrict parsing to only
4683
 *          production [75], see NOTE below
4684
 *
4685
 * DEPRECATED: Internal function, don't use.
4686
 *
4687
 * Parse an External ID or a Public ID
4688
 *
4689
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4690
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4691
 *
4692
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4693
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4694
 *
4695
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4696
 *
4697
 * Returns the function returns SystemLiteral and in the second
4698
 *                case publicID receives PubidLiteral, is strict is off
4699
 *                it is possible to return NULL and have publicID set.
4700
 */
4701
4702
xmlChar *
4703
708k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4704
708k
    xmlChar *URI = NULL;
4705
4706
708k
    SHRINK;
4707
4708
708k
    *publicID = NULL;
4709
708k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4710
240k
        SKIP(6);
4711
240k
  if (SKIP_BLANKS == 0) {
4712
900
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4713
900
                     "Space required after 'SYSTEM'\n");
4714
900
  }
4715
240k
  URI = xmlParseSystemLiteral(ctxt);
4716
240k
  if (URI == NULL) {
4717
1.23k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4718
1.23k
        }
4719
468k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4720
102k
        SKIP(6);
4721
102k
  if (SKIP_BLANKS == 0) {
4722
716
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4723
716
        "Space required after 'PUBLIC'\n");
4724
716
  }
4725
102k
  *publicID = xmlParsePubidLiteral(ctxt);
4726
102k
  if (*publicID == NULL) {
4727
1.00k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4728
1.00k
  }
4729
102k
  if (strict) {
4730
      /*
4731
       * We don't handle [83] so "S SystemLiteral" is required.
4732
       */
4733
94.9k
      if (SKIP_BLANKS == 0) {
4734
6.47k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4735
6.47k
      "Space required after the Public Identifier\n");
4736
6.47k
      }
4737
94.9k
  } else {
4738
      /*
4739
       * We handle [83] so we return immediately, if
4740
       * "S SystemLiteral" is not detected. We skip blanks if no
4741
             * system literal was found, but this is harmless since we must
4742
             * be at the end of a NotationDecl.
4743
       */
4744
7.87k
      if (SKIP_BLANKS == 0) return(NULL);
4745
436
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4746
436
  }
4747
95.0k
  URI = xmlParseSystemLiteral(ctxt);
4748
95.0k
  if (URI == NULL) {
4749
7.26k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4750
7.26k
        }
4751
95.0k
    }
4752
701k
    return(URI);
4753
708k
}
4754
4755
/**
4756
 * xmlParseCommentComplex:
4757
 * @ctxt:  an XML parser context
4758
 * @buf:  the already parsed part of the buffer
4759
 * @len:  number of bytes in the buffer
4760
 * @size:  allocated size of the buffer
4761
 *
4762
 * Skip an XML (SGML) comment <!-- .... -->
4763
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4764
 *  must not occur within comments. "
4765
 * This is the slow routine in case the accelerator for ascii didn't work
4766
 *
4767
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4768
 */
4769
static void
4770
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4771
2.53M
                       size_t len, size_t size) {
4772
2.53M
    int q, ql;
4773
2.53M
    int r, rl;
4774
2.53M
    int cur, l;
4775
2.53M
    size_t count = 0;
4776
2.53M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4777
2.26M
                       XML_MAX_HUGE_LENGTH :
4778
2.53M
                       XML_MAX_TEXT_LENGTH;
4779
2.53M
    int inputid;
4780
4781
2.53M
    inputid = ctxt->input->id;
4782
4783
2.53M
    if (buf == NULL) {
4784
191k
        len = 0;
4785
191k
  size = XML_PARSER_BUFFER_SIZE;
4786
191k
  buf = (xmlChar *) xmlMallocAtomic(size);
4787
191k
  if (buf == NULL) {
4788
0
      xmlErrMemory(ctxt, NULL);
4789
0
      return;
4790
0
  }
4791
191k
    }
4792
2.53M
    GROW; /* Assure there's enough input data */
4793
2.53M
    q = CUR_CHAR(ql);
4794
2.53M
    if (q == 0)
4795
285k
        goto not_terminated;
4796
2.25M
    if (!IS_CHAR(q)) {
4797
543k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4798
543k
                          "xmlParseComment: invalid xmlChar value %d\n",
4799
543k
                    q);
4800
543k
  xmlFree (buf);
4801
543k
  return;
4802
543k
    }
4803
1.70M
    NEXTL(ql);
4804
1.70M
    r = CUR_CHAR(rl);
4805
1.70M
    if (r == 0)
4806
52.3k
        goto not_terminated;
4807
1.65M
    if (!IS_CHAR(r)) {
4808
110k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4809
110k
                          "xmlParseComment: invalid xmlChar value %d\n",
4810
110k
                    q);
4811
110k
  xmlFree (buf);
4812
110k
  return;
4813
110k
    }
4814
1.54M
    NEXTL(rl);
4815
1.54M
    cur = CUR_CHAR(l);
4816
1.54M
    if (cur == 0)
4817
29.1k
        goto not_terminated;
4818
337M
    while (IS_CHAR(cur) && /* checked */
4819
337M
           ((cur != '>') ||
4820
336M
      (r != '-') || (q != '-'))) {
4821
335M
  if ((r == '-') && (q == '-')) {
4822
757k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4823
757k
  }
4824
335M
  if (len + 5 >= size) {
4825
603k
      xmlChar *new_buf;
4826
603k
            size_t new_size;
4827
4828
603k
      new_size = size * 2;
4829
603k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4830
603k
      if (new_buf == NULL) {
4831
0
    xmlFree (buf);
4832
0
    xmlErrMemory(ctxt, NULL);
4833
0
    return;
4834
0
      }
4835
603k
      buf = new_buf;
4836
603k
            size = new_size;
4837
603k
  }
4838
335M
  COPY_BUF(ql,buf,len,q);
4839
335M
  q = r;
4840
335M
  ql = rl;
4841
335M
  r = cur;
4842
335M
  rl = l;
4843
4844
335M
  count++;
4845
335M
  if (count > 50) {
4846
6.08M
      SHRINK;
4847
6.08M
      GROW;
4848
6.08M
      count = 0;
4849
6.08M
            if (ctxt->instate == XML_PARSER_EOF) {
4850
0
    xmlFree(buf);
4851
0
    return;
4852
0
            }
4853
6.08M
  }
4854
335M
  NEXTL(l);
4855
335M
  cur = CUR_CHAR(l);
4856
335M
  if (cur == 0) {
4857
234k
      SHRINK;
4858
234k
      GROW;
4859
234k
      cur = CUR_CHAR(l);
4860
234k
  }
4861
4862
335M
        if (len > maxLength) {
4863
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4864
0
                         "Comment too big found", NULL);
4865
0
            xmlFree (buf);
4866
0
            return;
4867
0
        }
4868
335M
    }
4869
1.51M
    buf[len] = 0;
4870
1.51M
    if (cur == 0) {
4871
234k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872
234k
                       "Comment not terminated \n<!--%.50s\n", buf);
4873
1.28M
    } else if (!IS_CHAR(cur)) {
4874
254k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4875
254k
                          "xmlParseComment: invalid xmlChar value %d\n",
4876
254k
                    cur);
4877
1.02M
    } else {
4878
1.02M
  if (inputid != ctxt->input->id) {
4879
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4880
0
               "Comment doesn't start and stop in the same"
4881
0
                           " entity\n");
4882
0
  }
4883
1.02M
        NEXT;
4884
1.02M
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4885
1.02M
      (!ctxt->disableSAX))
4886
78.8k
      ctxt->sax->comment(ctxt->userData, buf);
4887
1.02M
    }
4888
1.51M
    xmlFree(buf);
4889
1.51M
    return;
4890
366k
not_terminated:
4891
366k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4892
366k
       "Comment not terminated\n", NULL);
4893
366k
    xmlFree(buf);
4894
366k
    return;
4895
1.51M
}
4896
4897
/**
4898
 * xmlParseComment:
4899
 * @ctxt:  an XML parser context
4900
 *
4901
 * DEPRECATED: Internal function, don't use.
4902
 *
4903
 * Skip an XML (SGML) comment <!-- .... -->
4904
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4905
 *  must not occur within comments. "
4906
 *
4907
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4908
 */
4909
void
4910
32.1M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4911
32.1M
    xmlChar *buf = NULL;
4912
32.1M
    size_t size = XML_PARSER_BUFFER_SIZE;
4913
32.1M
    size_t len = 0;
4914
32.1M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4915
28.1M
                       XML_MAX_HUGE_LENGTH :
4916
32.1M
                       XML_MAX_TEXT_LENGTH;
4917
32.1M
    xmlParserInputState state;
4918
32.1M
    const xmlChar *in;
4919
32.1M
    size_t nbchar = 0;
4920
32.1M
    int ccol;
4921
32.1M
    int inputid;
4922
4923
    /*
4924
     * Check that there is a comment right here.
4925
     */
4926
32.1M
    if ((RAW != '<') || (NXT(1) != '!') ||
4927
32.1M
        (NXT(2) != '-') || (NXT(3) != '-')) return;
4928
32.1M
    state = ctxt->instate;
4929
32.1M
    ctxt->instate = XML_PARSER_COMMENT;
4930
32.1M
    inputid = ctxt->input->id;
4931
32.1M
    SKIP(4);
4932
32.1M
    SHRINK;
4933
32.1M
    GROW;
4934
4935
    /*
4936
     * Accelerated common case where input don't need to be
4937
     * modified before passing it to the handler.
4938
     */
4939
32.1M
    in = ctxt->input->cur;
4940
32.1M
    do {
4941
32.1M
  if (*in == 0xA) {
4942
1.81M
      do {
4943
1.81M
    ctxt->input->line++; ctxt->input->col = 1;
4944
1.81M
    in++;
4945
1.81M
      } while (*in == 0xA);
4946
1.56M
  }
4947
79.3M
get_more:
4948
79.3M
        ccol = ctxt->input->col;
4949
2.30G
  while (((*in > '-') && (*in <= 0x7F)) ||
4950
2.30G
         ((*in >= 0x20) && (*in < '-')) ||
4951
2.30G
         (*in == 0x09)) {
4952
2.22G
        in++;
4953
2.22G
        ccol++;
4954
2.22G
  }
4955
79.3M
  ctxt->input->col = ccol;
4956
79.3M
  if (*in == 0xA) {
4957
39.2M
      do {
4958
39.2M
    ctxt->input->line++; ctxt->input->col = 1;
4959
39.2M
    in++;
4960
39.2M
      } while (*in == 0xA);
4961
22.3M
      goto get_more;
4962
22.3M
  }
4963
57.0M
  nbchar = in - ctxt->input->cur;
4964
  /*
4965
   * save current set of data
4966
   */
4967
57.0M
  if (nbchar > 0) {
4968
56.7M
      if ((ctxt->sax != NULL) &&
4969
56.7M
    (ctxt->sax->comment != NULL)) {
4970
56.7M
    if (buf == NULL) {
4971
31.9M
        if ((*in == '-') && (in[1] == '-'))
4972
23.7M
            size = nbchar + 1;
4973
8.17M
        else
4974
8.17M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4975
31.9M
        buf = (xmlChar *) xmlMallocAtomic(size);
4976
31.9M
        if (buf == NULL) {
4977
0
            xmlErrMemory(ctxt, NULL);
4978
0
      ctxt->instate = state;
4979
0
      return;
4980
0
        }
4981
31.9M
        len = 0;
4982
31.9M
    } else if (len + nbchar + 1 >= size) {
4983
2.80M
        xmlChar *new_buf;
4984
2.80M
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4985
2.80M
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4986
2.80M
        if (new_buf == NULL) {
4987
0
            xmlFree (buf);
4988
0
      xmlErrMemory(ctxt, NULL);
4989
0
      ctxt->instate = state;
4990
0
      return;
4991
0
        }
4992
2.80M
        buf = new_buf;
4993
2.80M
    }
4994
56.7M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4995
56.7M
    len += nbchar;
4996
56.7M
    buf[len] = 0;
4997
56.7M
      }
4998
56.7M
  }
4999
57.0M
        if (len > maxLength) {
5000
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5001
0
                         "Comment too big found", NULL);
5002
0
            xmlFree (buf);
5003
0
            return;
5004
0
        }
5005
57.0M
  ctxt->input->cur = in;
5006
57.0M
  if (*in == 0xA) {
5007
0
      in++;
5008
0
      ctxt->input->line++; ctxt->input->col = 1;
5009
0
  }
5010
57.0M
  if (*in == 0xD) {
5011
1.99M
      in++;
5012
1.99M
      if (*in == 0xA) {
5013
1.90M
    ctxt->input->cur = in;
5014
1.90M
    in++;
5015
1.90M
    ctxt->input->line++; ctxt->input->col = 1;
5016
1.90M
    goto get_more;
5017
1.90M
      }
5018
86.7k
      in--;
5019
86.7k
  }
5020
55.1M
  SHRINK;
5021
55.1M
  GROW;
5022
55.1M
        if (ctxt->instate == XML_PARSER_EOF) {
5023
0
            xmlFree(buf);
5024
0
            return;
5025
0
        }
5026
55.1M
  in = ctxt->input->cur;
5027
55.1M
  if (*in == '-') {
5028
52.6M
      if (in[1] == '-') {
5029
37.1M
          if (in[2] == '>') {
5030
29.5M
        if (ctxt->input->id != inputid) {
5031
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5032
0
                     "comment doesn't start and stop in the"
5033
0
                                       " same entity\n");
5034
0
        }
5035
29.5M
        SKIP(3);
5036
29.5M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5037
29.5M
            (!ctxt->disableSAX)) {
5038
2.41M
      if (buf != NULL)
5039
2.41M
          ctxt->sax->comment(ctxt->userData, buf);
5040
1.53k
      else
5041
1.53k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5042
2.41M
        }
5043
29.5M
        if (buf != NULL)
5044
29.5M
            xmlFree(buf);
5045
29.5M
        if (ctxt->instate != XML_PARSER_EOF)
5046
29.5M
      ctxt->instate = state;
5047
29.5M
        return;
5048
29.5M
    }
5049
7.55M
    if (buf != NULL) {
5050
7.52M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5051
7.52M
                          "Double hyphen within comment: "
5052
7.52M
                                      "<!--%.50s\n",
5053
7.52M
              buf);
5054
7.52M
    } else
5055
26.9k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5056
26.9k
                          "Double hyphen within comment\n", NULL);
5057
7.55M
                if (ctxt->instate == XML_PARSER_EOF) {
5058
0
                    xmlFree(buf);
5059
0
                    return;
5060
0
                }
5061
7.55M
    in++;
5062
7.55M
    ctxt->input->col++;
5063
7.55M
      }
5064
23.0M
      in++;
5065
23.0M
      ctxt->input->col++;
5066
23.0M
      goto get_more;
5067
52.6M
  }
5068
55.1M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5069
2.53M
    xmlParseCommentComplex(ctxt, buf, len, size);
5070
2.53M
    ctxt->instate = state;
5071
2.53M
    return;
5072
32.1M
}
5073
5074
5075
/**
5076
 * xmlParsePITarget:
5077
 * @ctxt:  an XML parser context
5078
 *
5079
 * DEPRECATED: Internal function, don't use.
5080
 *
5081
 * parse the name of a PI
5082
 *
5083
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5084
 *
5085
 * Returns the PITarget name or NULL
5086
 */
5087
5088
const xmlChar *
5089
2.76M
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5090
2.76M
    const xmlChar *name;
5091
5092
2.76M
    name = xmlParseName(ctxt);
5093
2.76M
    if ((name != NULL) &&
5094
2.76M
        ((name[0] == 'x') || (name[0] == 'X')) &&
5095
2.76M
        ((name[1] == 'm') || (name[1] == 'M')) &&
5096
2.76M
        ((name[2] == 'l') || (name[2] == 'L'))) {
5097
656k
  int i;
5098
656k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5099
656k
      (name[2] == 'l') && (name[3] == 0)) {
5100
536k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5101
536k
     "XML declaration allowed only at the start of the document\n");
5102
536k
      return(name);
5103
536k
  } else if (name[3] == 0) {
5104
6.13k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5105
6.13k
      return(name);
5106
6.13k
  }
5107
181k
  for (i = 0;;i++) {
5108
181k
      if (xmlW3CPIs[i] == NULL) break;
5109
147k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5110
79.7k
          return(name);
5111
147k
  }
5112
34.0k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5113
34.0k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5114
34.0k
          NULL, NULL);
5115
34.0k
    }
5116
2.14M
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5117
20.9k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5118
20.9k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5119
20.9k
    }
5120
2.14M
    return(name);
5121
2.76M
}
5122
5123
#ifdef LIBXML_CATALOG_ENABLED
5124
/**
5125
 * xmlParseCatalogPI:
5126
 * @ctxt:  an XML parser context
5127
 * @catalog:  the PI value string
5128
 *
5129
 * parse an XML Catalog Processing Instruction.
5130
 *
5131
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5132
 *
5133
 * Occurs only if allowed by the user and if happening in the Misc
5134
 * part of the document before any doctype information
5135
 * This will add the given catalog to the parsing context in order
5136
 * to be used if there is a resolution need further down in the document
5137
 */
5138
5139
static void
5140
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5141
0
    xmlChar *URL = NULL;
5142
0
    const xmlChar *tmp, *base;
5143
0
    xmlChar marker;
5144
5145
0
    tmp = catalog;
5146
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5147
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5148
0
  goto error;
5149
0
    tmp += 7;
5150
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5151
0
    if (*tmp != '=') {
5152
0
  return;
5153
0
    }
5154
0
    tmp++;
5155
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5156
0
    marker = *tmp;
5157
0
    if ((marker != '\'') && (marker != '"'))
5158
0
  goto error;
5159
0
    tmp++;
5160
0
    base = tmp;
5161
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5162
0
    if (*tmp == 0)
5163
0
  goto error;
5164
0
    URL = xmlStrndup(base, tmp - base);
5165
0
    tmp++;
5166
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5167
0
    if (*tmp != 0)
5168
0
  goto error;
5169
5170
0
    if (URL != NULL) {
5171
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5172
0
  xmlFree(URL);
5173
0
    }
5174
0
    return;
5175
5176
0
error:
5177
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5178
0
            "Catalog PI syntax error: %s\n",
5179
0
      catalog, NULL);
5180
0
    if (URL != NULL)
5181
0
  xmlFree(URL);
5182
0
}
5183
#endif
5184
5185
/**
5186
 * xmlParsePI:
5187
 * @ctxt:  an XML parser context
5188
 *
5189
 * DEPRECATED: Internal function, don't use.
5190
 *
5191
 * parse an XML Processing Instruction.
5192
 *
5193
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5194
 *
5195
 * The processing is transferred to SAX once parsed.
5196
 */
5197
5198
void
5199
2.76M
xmlParsePI(xmlParserCtxtPtr ctxt) {
5200
2.76M
    xmlChar *buf = NULL;
5201
2.76M
    size_t len = 0;
5202
2.76M
    size_t size = XML_PARSER_BUFFER_SIZE;
5203
2.76M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5204
2.38M
                       XML_MAX_HUGE_LENGTH :
5205
2.76M
                       XML_MAX_TEXT_LENGTH;
5206
2.76M
    int cur, l;
5207
2.76M
    const xmlChar *target;
5208
2.76M
    xmlParserInputState state;
5209
2.76M
    int count = 0;
5210
5211
2.76M
    if ((RAW == '<') && (NXT(1) == '?')) {
5212
2.76M
  int inputid = ctxt->input->id;
5213
2.76M
  state = ctxt->instate;
5214
2.76M
        ctxt->instate = XML_PARSER_PI;
5215
  /*
5216
   * this is a Processing Instruction.
5217
   */
5218
2.76M
  SKIP(2);
5219
2.76M
  SHRINK;
5220
5221
  /*
5222
   * Parse the target name and check for special support like
5223
   * namespace.
5224
   */
5225
2.76M
        target = xmlParsePITarget(ctxt);
5226
2.76M
  if (target != NULL) {
5227
2.63M
      if ((RAW == '?') && (NXT(1) == '>')) {
5228
240k
    if (inputid != ctxt->input->id) {
5229
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5230
0
                             "PI declaration doesn't start and stop in"
5231
0
                                   " the same entity\n");
5232
0
    }
5233
240k
    SKIP(2);
5234
5235
    /*
5236
     * SAX: PI detected.
5237
     */
5238
240k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5239
240k
        (ctxt->sax->processingInstruction != NULL))
5240
85.4k
        ctxt->sax->processingInstruction(ctxt->userData,
5241
85.4k
                                         target, NULL);
5242
240k
    if (ctxt->instate != XML_PARSER_EOF)
5243
240k
        ctxt->instate = state;
5244
240k
    return;
5245
240k
      }
5246
2.39M
      buf = (xmlChar *) xmlMallocAtomic(size);
5247
2.39M
      if (buf == NULL) {
5248
0
    xmlErrMemory(ctxt, NULL);
5249
0
    ctxt->instate = state;
5250
0
    return;
5251
0
      }
5252
2.39M
      if (SKIP_BLANKS == 0) {
5253
193k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5254
193k
        "ParsePI: PI %s space expected\n", target);
5255
193k
      }
5256
2.39M
      cur = CUR_CHAR(l);
5257
856M
      while (IS_CHAR(cur) && /* checked */
5258
856M
       ((cur != '?') || (NXT(1) != '>'))) {
5259
854M
    if (len + 5 >= size) {
5260
1.73M
        xmlChar *tmp;
5261
1.73M
                    size_t new_size = size * 2;
5262
1.73M
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5263
1.73M
        if (tmp == NULL) {
5264
0
      xmlErrMemory(ctxt, NULL);
5265
0
      xmlFree(buf);
5266
0
      ctxt->instate = state;
5267
0
      return;
5268
0
        }
5269
1.73M
        buf = tmp;
5270
1.73M
                    size = new_size;
5271
1.73M
    }
5272
854M
    count++;
5273
854M
    if (count > 50) {
5274
16.1M
        SHRINK;
5275
16.1M
        GROW;
5276
16.1M
                    if (ctxt->instate == XML_PARSER_EOF) {
5277
0
                        xmlFree(buf);
5278
0
                        return;
5279
0
                    }
5280
16.1M
        count = 0;
5281
16.1M
    }
5282
854M
    COPY_BUF(l,buf,len,cur);
5283
854M
    NEXTL(l);
5284
854M
    cur = CUR_CHAR(l);
5285
854M
    if (cur == 0) {
5286
308k
        SHRINK;
5287
308k
        GROW;
5288
308k
        cur = CUR_CHAR(l);
5289
308k
    }
5290
854M
                if (len > maxLength) {
5291
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5292
0
                                      "PI %s too big found", target);
5293
0
                    xmlFree(buf);
5294
0
                    ctxt->instate = state;
5295
0
                    return;
5296
0
                }
5297
854M
      }
5298
2.39M
      buf[len] = 0;
5299
2.39M
      if (cur != '?') {
5300
510k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5301
510k
          "ParsePI: PI %s never end ...\n", target);
5302
1.88M
      } else {
5303
1.88M
    if (inputid != ctxt->input->id) {
5304
9
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5305
9
                             "PI declaration doesn't start and stop in"
5306
9
                                   " the same entity\n");
5307
9
    }
5308
1.88M
    SKIP(2);
5309
5310
1.88M
#ifdef LIBXML_CATALOG_ENABLED
5311
1.88M
    if (((state == XML_PARSER_MISC) ||
5312
1.88M
               (state == XML_PARSER_START)) &&
5313
1.88M
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5314
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5315
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5316
0
      (allow == XML_CATA_ALLOW_ALL))
5317
0
      xmlParseCatalogPI(ctxt, buf);
5318
0
    }
5319
1.88M
#endif
5320
5321
5322
    /*
5323
     * SAX: PI detected.
5324
     */
5325
1.88M
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5326
1.88M
        (ctxt->sax->processingInstruction != NULL))
5327
281k
        ctxt->sax->processingInstruction(ctxt->userData,
5328
281k
                                         target, buf);
5329
1.88M
      }
5330
2.39M
      xmlFree(buf);
5331
2.39M
  } else {
5332
133k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5333
133k
  }
5334
2.52M
  if (ctxt->instate != XML_PARSER_EOF)
5335
2.52M
      ctxt->instate = state;
5336
2.52M
    }
5337
2.76M
}
5338
5339
/**
5340
 * xmlParseNotationDecl:
5341
 * @ctxt:  an XML parser context
5342
 *
5343
 * DEPRECATED: Internal function, don't use.
5344
 *
5345
 * parse a notation declaration
5346
 *
5347
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5348
 *
5349
 * Hence there is actually 3 choices:
5350
 *     'PUBLIC' S PubidLiteral
5351
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5352
 * and 'SYSTEM' S SystemLiteral
5353
 *
5354
 * See the NOTE on xmlParseExternalID().
5355
 */
5356
5357
void
5358
20.0k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5359
20.0k
    const xmlChar *name;
5360
20.0k
    xmlChar *Pubid;
5361
20.0k
    xmlChar *Systemid;
5362
5363
20.0k
    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5364
16.3k
  int inputid = ctxt->input->id;
5365
16.3k
  SHRINK;
5366
16.3k
  SKIP(10);
5367
16.3k
  if (SKIP_BLANKS == 0) {
5368
605
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5369
605
         "Space required after '<!NOTATION'\n");
5370
605
      return;
5371
605
  }
5372
5373
15.7k
        name = xmlParseName(ctxt);
5374
15.7k
  if (name == NULL) {
5375
420
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5376
420
      return;
5377
420
  }
5378
15.3k
  if (xmlStrchr(name, ':') != NULL) {
5379
211
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5380
211
         "colons are forbidden from notation names '%s'\n",
5381
211
         name, NULL, NULL);
5382
211
  }
5383
15.3k
  if (SKIP_BLANKS == 0) {
5384
524
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385
524
         "Space required after the NOTATION name'\n");
5386
524
      return;
5387
524
  }
5388
5389
  /*
5390
   * Parse the IDs.
5391
   */
5392
14.8k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5393
14.8k
  SKIP_BLANKS;
5394
5395
14.8k
  if (RAW == '>') {
5396
10.5k
      if (inputid != ctxt->input->id) {
5397
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5398
0
                         "Notation declaration doesn't start and stop"
5399
0
                               " in the same entity\n");
5400
0
      }
5401
10.5k
      NEXT;
5402
10.5k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5403
10.5k
    (ctxt->sax->notationDecl != NULL))
5404
9.19k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5405
10.5k
  } else {
5406
4.24k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5407
4.24k
  }
5408
14.8k
  if (Systemid != NULL) xmlFree(Systemid);
5409
14.8k
  if (Pubid != NULL) xmlFree(Pubid);
5410
14.8k
    }
5411
20.0k
}
5412
5413
/**
5414
 * xmlParseEntityDecl:
5415
 * @ctxt:  an XML parser context
5416
 *
5417
 * DEPRECATED: Internal function, don't use.
5418
 *
5419
 * parse <!ENTITY declarations
5420
 *
5421
 * [70] EntityDecl ::= GEDecl | PEDecl
5422
 *
5423
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5424
 *
5425
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5426
 *
5427
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5428
 *
5429
 * [74] PEDef ::= EntityValue | ExternalID
5430
 *
5431
 * [76] NDataDecl ::= S 'NDATA' S Name
5432
 *
5433
 * [ VC: Notation Declared ]
5434
 * The Name must match the declared name of a notation.
5435
 */
5436
5437
void
5438
2.33M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5439
2.33M
    const xmlChar *name = NULL;
5440
2.33M
    xmlChar *value = NULL;
5441
2.33M
    xmlChar *URI = NULL, *literal = NULL;
5442
2.33M
    const xmlChar *ndata = NULL;
5443
2.33M
    int isParameter = 0;
5444
2.33M
    xmlChar *orig = NULL;
5445
5446
    /* GROW; done in the caller */
5447
2.33M
    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5448
2.32M
  int inputid = ctxt->input->id;
5449
2.32M
  SHRINK;
5450
2.32M
  SKIP(8);
5451
2.32M
  if (SKIP_BLANKS == 0) {
5452
3.61k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5453
3.61k
         "Space required after '<!ENTITY'\n");
5454
3.61k
  }
5455
5456
2.32M
  if (RAW == '%') {
5457
1.24M
      NEXT;
5458
1.24M
      if (SKIP_BLANKS == 0) {
5459
642
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
642
             "Space required after '%%'\n");
5461
642
      }
5462
1.24M
      isParameter = 1;
5463
1.24M
  }
5464
5465
2.32M
        name = xmlParseName(ctxt);
5466
2.32M
  if (name == NULL) {
5467
31.8k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5468
31.8k
                     "xmlParseEntityDecl: no name\n");
5469
31.8k
            return;
5470
31.8k
  }
5471
2.29M
  if (xmlStrchr(name, ':') != NULL) {
5472
909
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5473
909
         "colons are forbidden from entities names '%s'\n",
5474
909
         name, NULL, NULL);
5475
909
  }
5476
2.29M
  if (SKIP_BLANKS == 0) {
5477
7.46k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478
7.46k
         "Space required after the entity name\n");
5479
7.46k
  }
5480
5481
2.29M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5482
  /*
5483
   * handle the various case of definitions...
5484
   */
5485
2.29M
  if (isParameter) {
5486
1.24M
      if ((RAW == '"') || (RAW == '\'')) {
5487
1.20M
          value = xmlParseEntityValue(ctxt, &orig);
5488
1.20M
    if (value) {
5489
1.19M
        if ((ctxt->sax != NULL) &&
5490
1.19M
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5491
1.14M
      ctxt->sax->entityDecl(ctxt->userData, name,
5492
1.14M
                        XML_INTERNAL_PARAMETER_ENTITY,
5493
1.14M
            NULL, NULL, value);
5494
1.19M
    }
5495
1.20M
      } else {
5496
39.2k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5497
39.2k
    if ((URI == NULL) && (literal == NULL)) {
5498
2.38k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5499
2.38k
    }
5500
39.2k
    if (URI) {
5501
36.4k
        xmlURIPtr uri;
5502
5503
36.4k
        uri = xmlParseURI((const char *) URI);
5504
36.4k
        if (uri == NULL) {
5505
2.07k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5506
2.07k
             "Invalid URI: %s\n", URI);
5507
      /*
5508
       * This really ought to be a well formedness error
5509
       * but the XML Core WG decided otherwise c.f. issue
5510
       * E26 of the XML erratas.
5511
       */
5512
34.4k
        } else {
5513
34.4k
      if (uri->fragment != NULL) {
5514
          /*
5515
           * Okay this is foolish to block those but not
5516
           * invalid URIs.
5517
           */
5518
289
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5519
34.1k
      } else {
5520
34.1k
          if ((ctxt->sax != NULL) &&
5521
34.1k
        (!ctxt->disableSAX) &&
5522
34.1k
        (ctxt->sax->entityDecl != NULL))
5523
31.8k
        ctxt->sax->entityDecl(ctxt->userData, name,
5524
31.8k
              XML_EXTERNAL_PARAMETER_ENTITY,
5525
31.8k
              literal, URI, NULL);
5526
34.1k
      }
5527
34.4k
      xmlFreeURI(uri);
5528
34.4k
        }
5529
36.4k
    }
5530
39.2k
      }
5531
1.24M
  } else {
5532
1.04M
      if ((RAW == '"') || (RAW == '\'')) {
5533
952k
          value = xmlParseEntityValue(ctxt, &orig);
5534
952k
    if ((ctxt->sax != NULL) &&
5535
952k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5536
817k
        ctxt->sax->entityDecl(ctxt->userData, name,
5537
817k
        XML_INTERNAL_GENERAL_ENTITY,
5538
817k
        NULL, NULL, value);
5539
    /*
5540
     * For expat compatibility in SAX mode.
5541
     */
5542
952k
    if ((ctxt->myDoc == NULL) ||
5543
952k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5544
19.3k
        if (ctxt->myDoc == NULL) {
5545
2.41k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5546
2.41k
      if (ctxt->myDoc == NULL) {
5547
0
          xmlErrMemory(ctxt, "New Doc failed");
5548
0
          return;
5549
0
      }
5550
2.41k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5551
2.41k
        }
5552
19.3k
        if (ctxt->myDoc->intSubset == NULL)
5553
2.41k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5554
2.41k
              BAD_CAST "fake", NULL, NULL);
5555
5556
19.3k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5557
19.3k
                    NULL, NULL, value);
5558
19.3k
    }
5559
952k
      } else {
5560
97.2k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5561
97.2k
    if ((URI == NULL) && (literal == NULL)) {
5562
9.68k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5563
9.68k
    }
5564
97.2k
    if (URI) {
5565
85.1k
        xmlURIPtr uri;
5566
5567
85.1k
        uri = xmlParseURI((const char *)URI);
5568
85.1k
        if (uri == NULL) {
5569
5.37k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5570
5.37k
             "Invalid URI: %s\n", URI);
5571
      /*
5572
       * This really ought to be a well formedness error
5573
       * but the XML Core WG decided otherwise c.f. issue
5574
       * E26 of the XML erratas.
5575
       */
5576
79.7k
        } else {
5577
79.7k
      if (uri->fragment != NULL) {
5578
          /*
5579
           * Okay this is foolish to block those but not
5580
           * invalid URIs.
5581
           */
5582
952
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5583
952
      }
5584
79.7k
      xmlFreeURI(uri);
5585
79.7k
        }
5586
85.1k
    }
5587
97.2k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5588
10.3k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5589
10.3k
           "Space required before 'NDATA'\n");
5590
10.3k
    }
5591
97.2k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5592
21.6k
        SKIP(5);
5593
21.6k
        if (SKIP_BLANKS == 0) {
5594
353
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5595
353
               "Space required after 'NDATA'\n");
5596
353
        }
5597
21.6k
        ndata = xmlParseName(ctxt);
5598
21.6k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5599
21.6k
            (ctxt->sax->unparsedEntityDecl != NULL))
5600
20.1k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5601
20.1k
            literal, URI, ndata);
5602
75.5k
    } else {
5603
75.5k
        if ((ctxt->sax != NULL) &&
5604
75.5k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5605
65.1k
      ctxt->sax->entityDecl(ctxt->userData, name,
5606
65.1k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5607
65.1k
            literal, URI, NULL);
5608
        /*
5609
         * For expat compatibility in SAX mode.
5610
         * assuming the entity replacement was asked for
5611
         */
5612
75.5k
        if ((ctxt->replaceEntities != 0) &&
5613
75.5k
      ((ctxt->myDoc == NULL) ||
5614
40.5k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5615
1.27k
      if (ctxt->myDoc == NULL) {
5616
600
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5617
600
          if (ctxt->myDoc == NULL) {
5618
0
              xmlErrMemory(ctxt, "New Doc failed");
5619
0
        return;
5620
0
          }
5621
600
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5622
600
      }
5623
5624
1.27k
      if (ctxt->myDoc->intSubset == NULL)
5625
600
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5626
600
            BAD_CAST "fake", NULL, NULL);
5627
1.27k
      xmlSAX2EntityDecl(ctxt, name,
5628
1.27k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5629
1.27k
                  literal, URI, NULL);
5630
1.27k
        }
5631
75.5k
    }
5632
97.2k
      }
5633
1.04M
  }
5634
2.29M
  if (ctxt->instate == XML_PARSER_EOF)
5635
0
      goto done;
5636
2.29M
  SKIP_BLANKS;
5637
2.29M
  if (RAW != '>') {
5638
24.8k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5639
24.8k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5640
24.8k
      xmlHaltParser(ctxt);
5641
2.26M
  } else {
5642
2.26M
      if (inputid != ctxt->input->id) {
5643
58
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5644
58
                         "Entity declaration doesn't start and stop in"
5645
58
                               " the same entity\n");
5646
58
      }
5647
2.26M
      NEXT;
5648
2.26M
  }
5649
2.29M
  if (orig != NULL) {
5650
      /*
5651
       * Ugly mechanism to save the raw entity value.
5652
       */
5653
2.13M
      xmlEntityPtr cur = NULL;
5654
5655
2.13M
      if (isParameter) {
5656
1.19M
          if ((ctxt->sax != NULL) &&
5657
1.19M
        (ctxt->sax->getParameterEntity != NULL))
5658
1.19M
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5659
1.19M
      } else {
5660
942k
          if ((ctxt->sax != NULL) &&
5661
942k
        (ctxt->sax->getEntity != NULL))
5662
942k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5663
942k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5664
102k
        cur = xmlSAX2GetEntity(ctxt, name);
5665
102k
    }
5666
942k
      }
5667
2.13M
            if ((cur != NULL) && (cur->orig == NULL)) {
5668
1.56M
    cur->orig = orig;
5669
1.56M
                orig = NULL;
5670
1.56M
      }
5671
2.13M
  }
5672
5673
2.29M
done:
5674
2.29M
  if (value != NULL) xmlFree(value);
5675
2.29M
  if (URI != NULL) xmlFree(URI);
5676
2.29M
  if (literal != NULL) xmlFree(literal);
5677
2.29M
        if (orig != NULL) xmlFree(orig);
5678
2.29M
    }
5679
2.33M
}
5680
5681
/**
5682
 * xmlParseDefaultDecl:
5683
 * @ctxt:  an XML parser context
5684
 * @value:  Receive a possible fixed default value for the attribute
5685
 *
5686
 * DEPRECATED: Internal function, don't use.
5687
 *
5688
 * Parse an attribute default declaration
5689
 *
5690
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5691
 *
5692
 * [ VC: Required Attribute ]
5693
 * if the default declaration is the keyword #REQUIRED, then the
5694
 * attribute must be specified for all elements of the type in the
5695
 * attribute-list declaration.
5696
 *
5697
 * [ VC: Attribute Default Legal ]
5698
 * The declared default value must meet the lexical constraints of
5699
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5700
 *
5701
 * [ VC: Fixed Attribute Default ]
5702
 * if an attribute has a default value declared with the #FIXED
5703
 * keyword, instances of that attribute must match the default value.
5704
 *
5705
 * [ WFC: No < in Attribute Values ]
5706
 * handled in xmlParseAttValue()
5707
 *
5708
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5709
 *          or XML_ATTRIBUTE_FIXED.
5710
 */
5711
5712
int
5713
3.67M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5714
3.67M
    int val;
5715
3.67M
    xmlChar *ret;
5716
5717
3.67M
    *value = NULL;
5718
3.67M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5719
676k
  SKIP(9);
5720
676k
  return(XML_ATTRIBUTE_REQUIRED);
5721
676k
    }
5722
3.00M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5723
2.56M
  SKIP(8);
5724
2.56M
  return(XML_ATTRIBUTE_IMPLIED);
5725
2.56M
    }
5726
435k
    val = XML_ATTRIBUTE_NONE;
5727
435k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5728
309k
  SKIP(6);
5729
309k
  val = XML_ATTRIBUTE_FIXED;
5730
309k
  if (SKIP_BLANKS == 0) {
5731
455
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
455
         "Space required after '#FIXED'\n");
5733
455
  }
5734
309k
    }
5735
435k
    ret = xmlParseAttValue(ctxt);
5736
435k
    ctxt->instate = XML_PARSER_DTD;
5737
435k
    if (ret == NULL) {
5738
11.3k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5739
11.3k
           "Attribute default value declaration error\n");
5740
11.3k
    } else
5741
424k
        *value = ret;
5742
435k
    return(val);
5743
3.00M
}
5744
5745
/**
5746
 * xmlParseNotationType:
5747
 * @ctxt:  an XML parser context
5748
 *
5749
 * DEPRECATED: Internal function, don't use.
5750
 *
5751
 * parse an Notation attribute type.
5752
 *
5753
 * Note: the leading 'NOTATION' S part has already being parsed...
5754
 *
5755
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5756
 *
5757
 * [ VC: Notation Attributes ]
5758
 * Values of this type must match one of the notation names included
5759
 * in the declaration; all notation names in the declaration must be declared.
5760
 *
5761
 * Returns: the notation attribute tree built while parsing
5762
 */
5763
5764
xmlEnumerationPtr
5765
4.27k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5766
4.27k
    const xmlChar *name;
5767
4.27k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5768
5769
4.27k
    if (RAW != '(') {
5770
208
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5771
208
  return(NULL);
5772
208
    }
5773
4.06k
    SHRINK;
5774
4.71k
    do {
5775
4.71k
        NEXT;
5776
4.71k
  SKIP_BLANKS;
5777
4.71k
        name = xmlParseName(ctxt);
5778
4.71k
  if (name == NULL) {
5779
240
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5780
240
         "Name expected in NOTATION declaration\n");
5781
240
            xmlFreeEnumeration(ret);
5782
240
      return(NULL);
5783
240
  }
5784
4.47k
  tmp = ret;
5785
5.61k
  while (tmp != NULL) {
5786
1.17k
      if (xmlStrEqual(name, tmp->name)) {
5787
41
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5788
41
    "standalone: attribute notation value token %s duplicated\n",
5789
41
         name, NULL);
5790
41
    if (!xmlDictOwns(ctxt->dict, name))
5791
0
        xmlFree((xmlChar *) name);
5792
41
    break;
5793
41
      }
5794
1.13k
      tmp = tmp->next;
5795
1.13k
  }
5796
4.47k
  if (tmp == NULL) {
5797
4.43k
      cur = xmlCreateEnumeration(name);
5798
4.43k
      if (cur == NULL) {
5799
0
                xmlFreeEnumeration(ret);
5800
0
                return(NULL);
5801
0
            }
5802
4.43k
      if (last == NULL) ret = last = cur;
5803
548
      else {
5804
548
    last->next = cur;
5805
548
    last = cur;
5806
548
      }
5807
4.43k
  }
5808
4.47k
  SKIP_BLANKS;
5809
4.47k
    } while (RAW == '|');
5810
3.82k
    if (RAW != ')') {
5811
360
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5812
360
        xmlFreeEnumeration(ret);
5813
360
  return(NULL);
5814
360
    }
5815
3.46k
    NEXT;
5816
3.46k
    return(ret);
5817
3.82k
}
5818
5819
/**
5820
 * xmlParseEnumerationType:
5821
 * @ctxt:  an XML parser context
5822
 *
5823
 * DEPRECATED: Internal function, don't use.
5824
 *
5825
 * parse an Enumeration attribute type.
5826
 *
5827
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5828
 *
5829
 * [ VC: Enumeration ]
5830
 * Values of this type must match one of the Nmtoken tokens in
5831
 * the declaration
5832
 *
5833
 * Returns: the enumeration attribute tree built while parsing
5834
 */
5835
5836
xmlEnumerationPtr
5837
292k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5838
292k
    xmlChar *name;
5839
292k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5840
5841
292k
    if (RAW != '(') {
5842
17.3k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5843
17.3k
  return(NULL);
5844
17.3k
    }
5845
274k
    SHRINK;
5846
785k
    do {
5847
785k
        NEXT;
5848
785k
  SKIP_BLANKS;
5849
785k
        name = xmlParseNmtoken(ctxt);
5850
785k
  if (name == NULL) {
5851
730
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5852
730
      return(ret);
5853
730
  }
5854
784k
  tmp = ret;
5855
1.92M
  while (tmp != NULL) {
5856
1.14M
      if (xmlStrEqual(name, tmp->name)) {
5857
626
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5858
626
    "standalone: attribute enumeration value token %s duplicated\n",
5859
626
         name, NULL);
5860
626
    if (!xmlDictOwns(ctxt->dict, name))
5861
626
        xmlFree(name);
5862
626
    break;
5863
626
      }
5864
1.14M
      tmp = tmp->next;
5865
1.14M
  }
5866
784k
  if (tmp == NULL) {
5867
784k
      cur = xmlCreateEnumeration(name);
5868
784k
      if (!xmlDictOwns(ctxt->dict, name))
5869
784k
    xmlFree(name);
5870
784k
      if (cur == NULL) {
5871
0
                xmlFreeEnumeration(ret);
5872
0
                return(NULL);
5873
0
            }
5874
784k
      if (last == NULL) ret = last = cur;
5875
509k
      else {
5876
509k
    last->next = cur;
5877
509k
    last = cur;
5878
509k
      }
5879
784k
  }
5880
784k
  SKIP_BLANKS;
5881
784k
    } while (RAW == '|');
5882
274k
    if (RAW != ')') {
5883
2.08k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5884
2.08k
  return(ret);
5885
2.08k
    }
5886
272k
    NEXT;
5887
272k
    return(ret);
5888
274k
}
5889
5890
/**
5891
 * xmlParseEnumeratedType:
5892
 * @ctxt:  an XML parser context
5893
 * @tree:  the enumeration tree built while parsing
5894
 *
5895
 * DEPRECATED: Internal function, don't use.
5896
 *
5897
 * parse an Enumerated attribute type.
5898
 *
5899
 * [57] EnumeratedType ::= NotationType | Enumeration
5900
 *
5901
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5902
 *
5903
 *
5904
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5905
 */
5906
5907
int
5908
296k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5909
296k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5910
4.60k
  SKIP(8);
5911
4.60k
  if (SKIP_BLANKS == 0) {
5912
338
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5913
338
         "Space required after 'NOTATION'\n");
5914
338
      return(0);
5915
338
  }
5916
4.27k
  *tree = xmlParseNotationType(ctxt);
5917
4.27k
  if (*tree == NULL) return(0);
5918
3.46k
  return(XML_ATTRIBUTE_NOTATION);
5919
4.27k
    }
5920
292k
    *tree = xmlParseEnumerationType(ctxt);
5921
292k
    if (*tree == NULL) return(0);
5922
274k
    return(XML_ATTRIBUTE_ENUMERATION);
5923
292k
}
5924
5925
/**
5926
 * xmlParseAttributeType:
5927
 * @ctxt:  an XML parser context
5928
 * @tree:  the enumeration tree built while parsing
5929
 *
5930
 * DEPRECATED: Internal function, don't use.
5931
 *
5932
 * parse the Attribute list def for an element
5933
 *
5934
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5935
 *
5936
 * [55] StringType ::= 'CDATA'
5937
 *
5938
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5939
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5940
 *
5941
 * Validity constraints for attribute values syntax are checked in
5942
 * xmlValidateAttributeValue()
5943
 *
5944
 * [ VC: ID ]
5945
 * Values of type ID must match the Name production. A name must not
5946
 * appear more than once in an XML document as a value of this type;
5947
 * i.e., ID values must uniquely identify the elements which bear them.
5948
 *
5949
 * [ VC: One ID per Element Type ]
5950
 * No element type may have more than one ID attribute specified.
5951
 *
5952
 * [ VC: ID Attribute Default ]
5953
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5954
 *
5955
 * [ VC: IDREF ]
5956
 * Values of type IDREF must match the Name production, and values
5957
 * of type IDREFS must match Names; each IDREF Name must match the value
5958
 * of an ID attribute on some element in the XML document; i.e. IDREF
5959
 * values must match the value of some ID attribute.
5960
 *
5961
 * [ VC: Entity Name ]
5962
 * Values of type ENTITY must match the Name production, values
5963
 * of type ENTITIES must match Names; each Entity Name must match the
5964
 * name of an unparsed entity declared in the DTD.
5965
 *
5966
 * [ VC: Name Token ]
5967
 * Values of type NMTOKEN must match the Nmtoken production; values
5968
 * of type NMTOKENS must match Nmtokens.
5969
 *
5970
 * Returns the attribute type
5971
 */
5972
int
5973
3.70M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5974
3.70M
    SHRINK;
5975
3.70M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5976
1.29M
  SKIP(5);
5977
1.29M
  return(XML_ATTRIBUTE_CDATA);
5978
2.40M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5979
17.2k
  SKIP(6);
5980
17.2k
  return(XML_ATTRIBUTE_IDREFS);
5981
2.38M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5982
94.7k
  SKIP(5);
5983
94.7k
  return(XML_ATTRIBUTE_IDREF);
5984
2.29M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5985
933k
        SKIP(2);
5986
933k
  return(XML_ATTRIBUTE_ID);
5987
1.35M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5988
17.9k
  SKIP(6);
5989
17.9k
  return(XML_ATTRIBUTE_ENTITY);
5990
1.34M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5991
12.3k
  SKIP(8);
5992
12.3k
  return(XML_ATTRIBUTE_ENTITIES);
5993
1.32M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5994
129k
  SKIP(8);
5995
129k
  return(XML_ATTRIBUTE_NMTOKENS);
5996
1.19M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5997
902k
  SKIP(7);
5998
902k
  return(XML_ATTRIBUTE_NMTOKEN);
5999
902k
     }
6000
296k
     return(xmlParseEnumeratedType(ctxt, tree));
6001
3.70M
}
6002
6003
/**
6004
 * xmlParseAttributeListDecl:
6005
 * @ctxt:  an XML parser context
6006
 *
6007
 * DEPRECATED: Internal function, don't use.
6008
 *
6009
 * : parse the Attribute list def for an element
6010
 *
6011
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6012
 *
6013
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6014
 *
6015
 */
6016
void
6017
1.72M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6018
1.72M
    const xmlChar *elemName;
6019
1.72M
    const xmlChar *attrName;
6020
1.72M
    xmlEnumerationPtr tree;
6021
6022
1.72M
    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6023
1.71M
  int inputid = ctxt->input->id;
6024
6025
1.71M
  SKIP(9);
6026
1.71M
  if (SKIP_BLANKS == 0) {
6027
2.62k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6028
2.62k
                     "Space required after '<!ATTLIST'\n");
6029
2.62k
  }
6030
1.71M
        elemName = xmlParseName(ctxt);
6031
1.71M
  if (elemName == NULL) {
6032
2.98k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6033
2.98k
         "ATTLIST: no name for Element\n");
6034
2.98k
      return;
6035
2.98k
  }
6036
1.71M
  SKIP_BLANKS;
6037
1.71M
  GROW;
6038
5.37M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6039
3.77M
      int type;
6040
3.77M
      int def;
6041
3.77M
      xmlChar *defaultValue = NULL;
6042
6043
3.77M
      GROW;
6044
3.77M
            tree = NULL;
6045
3.77M
      attrName = xmlParseName(ctxt);
6046
3.77M
      if (attrName == NULL) {
6047
68.2k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6048
68.2k
             "ATTLIST: no name for Attribute\n");
6049
68.2k
    break;
6050
68.2k
      }
6051
3.70M
      GROW;
6052
3.70M
      if (SKIP_BLANKS == 0) {
6053
6.74k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6054
6.74k
            "Space required after the attribute name\n");
6055
6.74k
    break;
6056
6.74k
      }
6057
6058
3.70M
      type = xmlParseAttributeType(ctxt, &tree);
6059
3.70M
      if (type <= 0) {
6060
19.0k
          break;
6061
19.0k
      }
6062
6063
3.68M
      GROW;
6064
3.68M
      if (SKIP_BLANKS == 0) {
6065
6.03k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6066
6.03k
             "Space required after the attribute type\n");
6067
6.03k
          if (tree != NULL)
6068
2.73k
        xmlFreeEnumeration(tree);
6069
6.03k
    break;
6070
6.03k
      }
6071
6072
3.67M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6073
3.67M
      if (def <= 0) {
6074
0
                if (defaultValue != NULL)
6075
0
        xmlFree(defaultValue);
6076
0
          if (tree != NULL)
6077
0
        xmlFreeEnumeration(tree);
6078
0
          break;
6079
0
      }
6080
3.67M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6081
115k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6082
6083
3.67M
      GROW;
6084
3.67M
            if (RAW != '>') {
6085
3.06M
    if (SKIP_BLANKS == 0) {
6086
20.4k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6087
20.4k
      "Space required after the attribute default value\n");
6088
20.4k
        if (defaultValue != NULL)
6089
8.63k
      xmlFree(defaultValue);
6090
20.4k
        if (tree != NULL)
6091
1.91k
      xmlFreeEnumeration(tree);
6092
20.4k
        break;
6093
20.4k
    }
6094
3.06M
      }
6095
3.65M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6096
3.65M
    (ctxt->sax->attributeDecl != NULL))
6097
3.45M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6098
3.45M
                          type, def, defaultValue, tree);
6099
201k
      else if (tree != NULL)
6100
17.2k
    xmlFreeEnumeration(tree);
6101
6102
3.65M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6103
3.65M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6104
3.65M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6105
304k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6106
304k
      }
6107
3.65M
      if (ctxt->sax2) {
6108
2.72M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6109
2.72M
      }
6110
3.65M
      if (defaultValue != NULL)
6111
415k
          xmlFree(defaultValue);
6112
3.65M
      GROW;
6113
3.65M
  }
6114
1.71M
  if (RAW == '>') {
6115
1.59M
      if (inputid != ctxt->input->id) {
6116
233
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6117
233
                               "Attribute list declaration doesn't start and"
6118
233
                               " stop in the same entity\n");
6119
233
      }
6120
1.59M
      NEXT;
6121
1.59M
  }
6122
1.71M
    }
6123
1.72M
}
6124
6125
/**
6126
 * xmlParseElementMixedContentDecl:
6127
 * @ctxt:  an XML parser context
6128
 * @inputchk:  the input used for the current entity, needed for boundary checks
6129
 *
6130
 * DEPRECATED: Internal function, don't use.
6131
 *
6132
 * parse the declaration for a Mixed Element content
6133
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6134
 *
6135
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6136
 *                '(' S? '#PCDATA' S? ')'
6137
 *
6138
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6139
 *
6140
 * [ VC: No Duplicate Types ]
6141
 * The same name must not appear more than once in a single
6142
 * mixed-content declaration.
6143
 *
6144
 * returns: the list of the xmlElementContentPtr describing the element choices
6145
 */
6146
xmlElementContentPtr
6147
675k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6148
675k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6149
675k
    const xmlChar *elem = NULL;
6150
6151
675k
    GROW;
6152
675k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6153
675k
  SKIP(7);
6154
675k
  SKIP_BLANKS;
6155
675k
  SHRINK;
6156
675k
  if (RAW == ')') {
6157
481k
      if (ctxt->input->id != inputchk) {
6158
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159
0
                               "Element content declaration doesn't start and"
6160
0
                               " stop in the same entity\n");
6161
0
      }
6162
481k
      NEXT;
6163
481k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6164
481k
      if (ret == NULL)
6165
0
          return(NULL);
6166
481k
      if (RAW == '*') {
6167
64
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168
64
    NEXT;
6169
64
      }
6170
481k
      return(ret);
6171
481k
  }
6172
193k
  if ((RAW == '(') || (RAW == '|')) {
6173
192k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6174
192k
      if (ret == NULL) return(NULL);
6175
192k
  }
6176
2.22M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6177
2.02M
      NEXT;
6178
2.02M
      if (elem == NULL) {
6179
192k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6180
192k
    if (ret == NULL) {
6181
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6182
0
                    return(NULL);
6183
0
                }
6184
192k
    ret->c1 = cur;
6185
192k
    if (cur != NULL)
6186
192k
        cur->parent = ret;
6187
192k
    cur = ret;
6188
1.83M
      } else {
6189
1.83M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6190
1.83M
    if (n == NULL) {
6191
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6192
0
                    return(NULL);
6193
0
                }
6194
1.83M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6195
1.83M
    if (n->c1 != NULL)
6196
1.83M
        n->c1->parent = n;
6197
1.83M
          cur->c2 = n;
6198
1.83M
    if (n != NULL)
6199
1.83M
        n->parent = cur;
6200
1.83M
    cur = n;
6201
1.83M
      }
6202
2.02M
      SKIP_BLANKS;
6203
2.02M
      elem = xmlParseName(ctxt);
6204
2.02M
      if (elem == NULL) {
6205
767
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6206
767
      "xmlParseElementMixedContentDecl : Name expected\n");
6207
767
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6208
767
    return(NULL);
6209
767
      }
6210
2.02M
      SKIP_BLANKS;
6211
2.02M
      GROW;
6212
2.02M
  }
6213
193k
  if ((RAW == ')') && (NXT(1) == '*')) {
6214
189k
      if (elem != NULL) {
6215
189k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6216
189k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6217
189k
    if (cur->c2 != NULL)
6218
189k
        cur->c2->parent = cur;
6219
189k
            }
6220
189k
            if (ret != NULL)
6221
189k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6222
189k
      if (ctxt->input->id != inputchk) {
6223
26
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6224
26
                               "Element content declaration doesn't start and"
6225
26
                               " stop in the same entity\n");
6226
26
      }
6227
189k
      SKIP(2);
6228
189k
  } else {
6229
3.40k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6230
3.40k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6231
3.40k
      return(NULL);
6232
3.40k
  }
6233
6234
193k
    } else {
6235
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6236
0
    }
6237
189k
    return(ret);
6238
675k
}
6239
6240
/**
6241
 * xmlParseElementChildrenContentDeclPriv:
6242
 * @ctxt:  an XML parser context
6243
 * @inputchk:  the input used for the current entity, needed for boundary checks
6244
 * @depth: the level of recursion
6245
 *
6246
 * parse the declaration for a Mixed Element content
6247
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6248
 *
6249
 *
6250
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6251
 *
6252
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6253
 *
6254
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6255
 *
6256
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6257
 *
6258
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6259
 * TODO Parameter-entity replacement text must be properly nested
6260
 *  with parenthesized groups. That is to say, if either of the
6261
 *  opening or closing parentheses in a choice, seq, or Mixed
6262
 *  construct is contained in the replacement text for a parameter
6263
 *  entity, both must be contained in the same replacement text. For
6264
 *  interoperability, if a parameter-entity reference appears in a
6265
 *  choice, seq, or Mixed construct, its replacement text should not
6266
 *  be empty, and neither the first nor last non-blank character of
6267
 *  the replacement text should be a connector (| or ,).
6268
 *
6269
 * Returns the tree of xmlElementContentPtr describing the element
6270
 *          hierarchy.
6271
 */
6272
static xmlElementContentPtr
6273
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6274
1.34M
                                       int depth) {
6275
1.34M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6276
1.34M
    const xmlChar *elem;
6277
1.34M
    xmlChar type = 0;
6278
6279
1.34M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6280
1.34M
        (depth >  2048)) {
6281
139
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6282
139
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6283
139
                          depth);
6284
139
  return(NULL);
6285
139
    }
6286
1.34M
    SKIP_BLANKS;
6287
1.34M
    GROW;
6288
1.34M
    if (RAW == '(') {
6289
180k
  int inputid = ctxt->input->id;
6290
6291
        /* Recurse on first child */
6292
180k
  NEXT;
6293
180k
  SKIP_BLANKS;
6294
180k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6295
180k
                                                           depth + 1);
6296
180k
        if (cur == NULL)
6297
120k
            return(NULL);
6298
59.6k
  SKIP_BLANKS;
6299
59.6k
  GROW;
6300
1.16M
    } else {
6301
1.16M
  elem = xmlParseName(ctxt);
6302
1.16M
  if (elem == NULL) {
6303
7.05k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6304
7.05k
      return(NULL);
6305
7.05k
  }
6306
1.16M
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6307
1.16M
  if (cur == NULL) {
6308
0
      xmlErrMemory(ctxt, NULL);
6309
0
      return(NULL);
6310
0
  }
6311
1.16M
  GROW;
6312
1.16M
  if (RAW == '?') {
6313
76.1k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6314
76.1k
      NEXT;
6315
1.08M
  } else if (RAW == '*') {
6316
82.7k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6317
82.7k
      NEXT;
6318
1.00M
  } else if (RAW == '+') {
6319
161k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6320
161k
      NEXT;
6321
839k
  } else {
6322
839k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6323
839k
  }
6324
1.16M
  GROW;
6325
1.16M
    }
6326
1.22M
    SKIP_BLANKS;
6327
1.22M
    SHRINK;
6328
4.91M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6329
        /*
6330
   * Each loop we parse one separator and one element.
6331
   */
6332
3.71M
        if (RAW == ',') {
6333
1.01M
      if (type == 0) type = CUR;
6334
6335
      /*
6336
       * Detect "Name | Name , Name" error
6337
       */
6338
603k
      else if (type != CUR) {
6339
242
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6340
242
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6341
242
                      type);
6342
242
    if ((last != NULL) && (last != ret))
6343
242
        xmlFreeDocElementContent(ctxt->myDoc, last);
6344
242
    if (ret != NULL)
6345
242
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6346
242
    return(NULL);
6347
242
      }
6348
1.01M
      NEXT;
6349
6350
1.01M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6351
1.01M
      if (op == NULL) {
6352
0
    if ((last != NULL) && (last != ret))
6353
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6354
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6355
0
    return(NULL);
6356
0
      }
6357
1.01M
      if (last == NULL) {
6358
409k
    op->c1 = ret;
6359
409k
    if (ret != NULL)
6360
409k
        ret->parent = op;
6361
409k
    ret = cur = op;
6362
603k
      } else {
6363
603k
          cur->c2 = op;
6364
603k
    if (op != NULL)
6365
603k
        op->parent = cur;
6366
603k
    op->c1 = last;
6367
603k
    if (last != NULL)
6368
603k
        last->parent = op;
6369
603k
    cur =op;
6370
603k
    last = NULL;
6371
603k
      }
6372
2.70M
  } else if (RAW == '|') {
6373
2.68M
      if (type == 0) type = CUR;
6374
6375
      /*
6376
       * Detect "Name , Name | Name" error
6377
       */
6378
2.33M
      else if (type != CUR) {
6379
214
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6380
214
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6381
214
          type);
6382
214
    if ((last != NULL) && (last != ret))
6383
214
        xmlFreeDocElementContent(ctxt->myDoc, last);
6384
214
    if (ret != NULL)
6385
214
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6386
214
    return(NULL);
6387
214
      }
6388
2.68M
      NEXT;
6389
6390
2.68M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6391
2.68M
      if (op == NULL) {
6392
0
    if ((last != NULL) && (last != ret))
6393
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6394
0
    if (ret != NULL)
6395
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6396
0
    return(NULL);
6397
0
      }
6398
2.68M
      if (last == NULL) {
6399
356k
    op->c1 = ret;
6400
356k
    if (ret != NULL)
6401
356k
        ret->parent = op;
6402
356k
    ret = cur = op;
6403
2.33M
      } else {
6404
2.33M
          cur->c2 = op;
6405
2.33M
    if (op != NULL)
6406
2.33M
        op->parent = cur;
6407
2.33M
    op->c1 = last;
6408
2.33M
    if (last != NULL)
6409
2.33M
        last->parent = op;
6410
2.33M
    cur =op;
6411
2.33M
    last = NULL;
6412
2.33M
      }
6413
2.68M
  } else {
6414
15.5k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6415
15.5k
      if ((last != NULL) && (last != ret))
6416
6.38k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6417
15.5k
      if (ret != NULL)
6418
15.5k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
15.5k
      return(NULL);
6420
15.5k
  }
6421
3.69M
  GROW;
6422
3.69M
  SKIP_BLANKS;
6423
3.69M
  GROW;
6424
3.69M
  if (RAW == '(') {
6425
164k
      int inputid = ctxt->input->id;
6426
      /* Recurse on second child */
6427
164k
      NEXT;
6428
164k
      SKIP_BLANKS;
6429
164k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6430
164k
                                                          depth + 1);
6431
164k
            if (last == NULL) {
6432
2.09k
    if (ret != NULL)
6433
2.09k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6434
2.09k
    return(NULL);
6435
2.09k
            }
6436
162k
      SKIP_BLANKS;
6437
3.53M
  } else {
6438
3.53M
      elem = xmlParseName(ctxt);
6439
3.53M
      if (elem == NULL) {
6440
1.67k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6441
1.67k
    if (ret != NULL)
6442
1.67k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
1.67k
    return(NULL);
6444
1.67k
      }
6445
3.53M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6446
3.53M
      if (last == NULL) {
6447
0
    if (ret != NULL)
6448
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6449
0
    return(NULL);
6450
0
      }
6451
3.53M
      if (RAW == '?') {
6452
418k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6453
418k
    NEXT;
6454
3.11M
      } else if (RAW == '*') {
6455
216k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6456
216k
    NEXT;
6457
2.89M
      } else if (RAW == '+') {
6458
40.5k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6459
40.5k
    NEXT;
6460
2.85M
      } else {
6461
2.85M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6462
2.85M
      }
6463
3.53M
  }
6464
3.69M
  SKIP_BLANKS;
6465
3.69M
  GROW;
6466
3.69M
    }
6467
1.20M
    if ((cur != NULL) && (last != NULL)) {
6468
755k
        cur->c2 = last;
6469
755k
  if (last != NULL)
6470
755k
      last->parent = cur;
6471
755k
    }
6472
1.20M
    if (ctxt->input->id != inputchk) {
6473
301
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6474
301
                       "Element content declaration doesn't start and stop in"
6475
301
                       " the same entity\n");
6476
301
    }
6477
1.20M
    NEXT;
6478
1.20M
    if (RAW == '?') {
6479
16.6k
  if (ret != NULL) {
6480
16.6k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6481
16.6k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6482
110
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6483
16.5k
      else
6484
16.5k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6485
16.6k
  }
6486
16.6k
  NEXT;
6487
1.18M
    } else if (RAW == '*') {
6488
400k
  if (ret != NULL) {
6489
400k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6490
400k
      cur = ret;
6491
      /*
6492
       * Some normalization:
6493
       * (a | b* | c?)* == (a | b | c)*
6494
       */
6495
2.03M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6496
1.63M
    if ((cur->c1 != NULL) &&
6497
1.63M
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6498
1.63M
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6499
81.0k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6500
1.63M
    if ((cur->c2 != NULL) &&
6501
1.63M
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6502
1.63M
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6503
13.2k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6504
1.63M
    cur = cur->c2;
6505
1.63M
      }
6506
400k
  }
6507
400k
  NEXT;
6508
783k
    } else if (RAW == '+') {
6509
129k
  if (ret != NULL) {
6510
129k
      int found = 0;
6511
6512
129k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6513
129k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6514
10
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6515
129k
      else
6516
129k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6517
      /*
6518
       * Some normalization:
6519
       * (a | b*)+ == (a | b)*
6520
       * (a | b?)+ == (a | b)*
6521
       */
6522
224k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6523
95.8k
    if ((cur->c1 != NULL) &&
6524
95.8k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6525
95.8k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6526
592
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6527
592
        found = 1;
6528
592
    }
6529
95.8k
    if ((cur->c2 != NULL) &&
6530
95.8k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6531
95.8k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6532
281
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6533
281
        found = 1;
6534
281
    }
6535
95.8k
    cur = cur->c2;
6536
95.8k
      }
6537
129k
      if (found)
6538
721
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6539
129k
  }
6540
129k
  NEXT;
6541
129k
    }
6542
1.20M
    return(ret);
6543
1.22M
}
6544
6545
/**
6546
 * xmlParseElementChildrenContentDecl:
6547
 * @ctxt:  an XML parser context
6548
 * @inputchk:  the input used for the current entity, needed for boundary checks
6549
 *
6550
 * DEPRECATED: Internal function, don't use.
6551
 *
6552
 * parse the declaration for a Mixed Element content
6553
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6554
 *
6555
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6556
 *
6557
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6558
 *
6559
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6560
 *
6561
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6562
 *
6563
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6564
 * TODO Parameter-entity replacement text must be properly nested
6565
 *  with parenthesized groups. That is to say, if either of the
6566
 *  opening or closing parentheses in a choice, seq, or Mixed
6567
 *  construct is contained in the replacement text for a parameter
6568
 *  entity, both must be contained in the same replacement text. For
6569
 *  interoperability, if a parameter-entity reference appears in a
6570
 *  choice, seq, or Mixed construct, its replacement text should not
6571
 *  be empty, and neither the first nor last non-blank character of
6572
 *  the replacement text should be a connector (| or ,).
6573
 *
6574
 * Returns the tree of xmlElementContentPtr describing the element
6575
 *          hierarchy.
6576
 */
6577
xmlElementContentPtr
6578
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6579
    /* stub left for API/ABI compat */
6580
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6581
0
}
6582
6583
/**
6584
 * xmlParseElementContentDecl:
6585
 * @ctxt:  an XML parser context
6586
 * @name:  the name of the element being defined.
6587
 * @result:  the Element Content pointer will be stored here if any
6588
 *
6589
 * DEPRECATED: Internal function, don't use.
6590
 *
6591
 * parse the declaration for an Element content either Mixed or Children,
6592
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6593
 *
6594
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6595
 *
6596
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6597
 */
6598
6599
int
6600
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6601
1.67M
                           xmlElementContentPtr *result) {
6602
6603
1.67M
    xmlElementContentPtr tree = NULL;
6604
1.67M
    int inputid = ctxt->input->id;
6605
1.67M
    int res;
6606
6607
1.67M
    *result = NULL;
6608
6609
1.67M
    if (RAW != '(') {
6610
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6611
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6612
0
  return(-1);
6613
0
    }
6614
1.67M
    NEXT;
6615
1.67M
    GROW;
6616
1.67M
    if (ctxt->instate == XML_PARSER_EOF)
6617
0
        return(-1);
6618
1.67M
    SKIP_BLANKS;
6619
1.67M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6620
675k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6621
675k
  res = XML_ELEMENT_TYPE_MIXED;
6622
1.00M
    } else {
6623
1.00M
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6624
1.00M
  res = XML_ELEMENT_TYPE_ELEMENT;
6625
1.00M
    }
6626
1.67M
    SKIP_BLANKS;
6627
1.67M
    *result = tree;
6628
1.67M
    return(res);
6629
1.67M
}
6630
6631
/**
6632
 * xmlParseElementDecl:
6633
 * @ctxt:  an XML parser context
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse an Element declaration.
6638
 *
6639
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6640
 *
6641
 * [ VC: Unique Element Type Declaration ]
6642
 * No element type may be declared more than once
6643
 *
6644
 * Returns the type of the element, or -1 in case of error
6645
 */
6646
int
6647
2.18M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6648
2.18M
    const xmlChar *name;
6649
2.18M
    int ret = -1;
6650
2.18M
    xmlElementContentPtr content  = NULL;
6651
6652
    /* GROW; done in the caller */
6653
2.18M
    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6654
2.17M
  int inputid = ctxt->input->id;
6655
6656
2.17M
  SKIP(9);
6657
2.17M
  if (SKIP_BLANKS == 0) {
6658
2.76k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6659
2.76k
               "Space required after 'ELEMENT'\n");
6660
2.76k
      return(-1);
6661
2.76k
  }
6662
2.17M
        name = xmlParseName(ctxt);
6663
2.17M
  if (name == NULL) {
6664
1.81k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6665
1.81k
         "xmlParseElementDecl: no name for Element\n");
6666
1.81k
      return(-1);
6667
1.81k
  }
6668
2.17M
  if (SKIP_BLANKS == 0) {
6669
13.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6670
13.9k
         "Space required after the element name\n");
6671
13.9k
  }
6672
2.17M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6673
462k
      SKIP(5);
6674
      /*
6675
       * Element must always be empty.
6676
       */
6677
462k
      ret = XML_ELEMENT_TYPE_EMPTY;
6678
1.71M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6679
1.71M
             (NXT(2) == 'Y')) {
6680
13.0k
      SKIP(3);
6681
      /*
6682
       * Element is a generic container.
6683
       */
6684
13.0k
      ret = XML_ELEMENT_TYPE_ANY;
6685
1.69M
  } else if (RAW == '(') {
6686
1.67M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6687
1.67M
  } else {
6688
      /*
6689
       * [ WFC: PEs in Internal Subset ] error handling.
6690
       */
6691
20.5k
      if ((RAW == '%') && (ctxt->external == 0) &&
6692
20.5k
          (ctxt->inputNr == 1)) {
6693
420
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6694
420
    "PEReference: forbidden within markup decl in internal subset\n");
6695
20.0k
      } else {
6696
20.0k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6697
20.0k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6698
20.0k
            }
6699
20.5k
      return(-1);
6700
20.5k
  }
6701
6702
2.15M
  SKIP_BLANKS;
6703
6704
2.15M
  if (RAW != '>') {
6705
29.2k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6706
29.2k
      if (content != NULL) {
6707
3.04k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6708
3.04k
      }
6709
2.12M
  } else {
6710
2.12M
      if (inputid != ctxt->input->id) {
6711
183
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
183
                               "Element declaration doesn't start and stop in"
6713
183
                               " the same entity\n");
6714
183
      }
6715
6716
2.12M
      NEXT;
6717
2.12M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6718
2.12M
    (ctxt->sax->elementDecl != NULL)) {
6719
1.96M
    if (content != NULL)
6720
1.51M
        content->parent = NULL;
6721
1.96M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6722
1.96M
                           content);
6723
1.96M
    if ((content != NULL) && (content->parent == NULL)) {
6724
        /*
6725
         * this is a trick: if xmlAddElementDecl is called,
6726
         * instead of copying the full tree it is plugged directly
6727
         * if called from the parser. Avoid duplicating the
6728
         * interfaces or change the API/ABI
6729
         */
6730
102k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6731
102k
    }
6732
1.96M
      } else if (content != NULL) {
6733
136k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6734
136k
      }
6735
2.12M
  }
6736
2.15M
    }
6737
2.16M
    return(ret);
6738
2.18M
}
6739
6740
/**
6741
 * xmlParseConditionalSections
6742
 * @ctxt:  an XML parser context
6743
 *
6744
 * [61] conditionalSect ::= includeSect | ignoreSect
6745
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6746
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6747
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6748
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6749
 */
6750
6751
static void
6752
16.9k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6753
16.9k
    int *inputIds = NULL;
6754
16.9k
    size_t inputIdsSize = 0;
6755
16.9k
    size_t depth = 0;
6756
6757
92.1k
    while (ctxt->instate != XML_PARSER_EOF) {
6758
91.8k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6759
48.5k
            int id = ctxt->input->id;
6760
6761
48.5k
            SKIP(3);
6762
48.5k
            SKIP_BLANKS;
6763
6764
48.5k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6765
40.0k
                SKIP(7);
6766
40.0k
                SKIP_BLANKS;
6767
40.0k
                if (RAW != '[') {
6768
345
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6769
345
                    xmlHaltParser(ctxt);
6770
345
                    goto error;
6771
345
                }
6772
39.6k
                if (ctxt->input->id != id) {
6773
13
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6774
13
                                   "All markup of the conditional section is"
6775
13
                                   " not in the same entity\n");
6776
13
                }
6777
39.6k
                NEXT;
6778
6779
39.6k
                if (inputIdsSize <= depth) {
6780
11.9k
                    int *tmp;
6781
6782
11.9k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6783
11.9k
                    tmp = (int *) xmlRealloc(inputIds,
6784
11.9k
                            inputIdsSize * sizeof(int));
6785
11.9k
                    if (tmp == NULL) {
6786
0
                        xmlErrMemory(ctxt, NULL);
6787
0
                        goto error;
6788
0
                    }
6789
11.9k
                    inputIds = tmp;
6790
11.9k
                }
6791
39.6k
                inputIds[depth] = id;
6792
39.6k
                depth++;
6793
39.6k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6794
5.67k
                int state;
6795
5.67k
                xmlParserInputState instate;
6796
5.67k
                size_t ignoreDepth = 0;
6797
6798
5.67k
                SKIP(6);
6799
5.67k
                SKIP_BLANKS;
6800
5.67k
                if (RAW != '[') {
6801
194
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6802
194
                    xmlHaltParser(ctxt);
6803
194
                    goto error;
6804
194
                }
6805
5.48k
                if (ctxt->input->id != id) {
6806
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6807
0
                                   "All markup of the conditional section is"
6808
0
                                   " not in the same entity\n");
6809
0
                }
6810
5.48k
                NEXT;
6811
6812
                /*
6813
                 * Parse up to the end of the conditional section but disable
6814
                 * SAX event generating DTD building in the meantime
6815
                 */
6816
5.48k
                state = ctxt->disableSAX;
6817
5.48k
                instate = ctxt->instate;
6818
5.48k
                if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6819
5.48k
                ctxt->instate = XML_PARSER_IGNORE;
6820
6821
5.95M
                while (RAW != 0) {
6822
5.95M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6823
15.0k
                        SKIP(3);
6824
15.0k
                        ignoreDepth++;
6825
                        /* Check for integer overflow */
6826
15.0k
                        if (ignoreDepth == 0) {
6827
0
                            xmlErrMemory(ctxt, NULL);
6828
0
                            goto error;
6829
0
                        }
6830
5.93M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6831
5.93M
                               (NXT(2) == '>')) {
6832
11.3k
                        if (ignoreDepth == 0)
6833
2.48k
                            break;
6834
8.86k
                        SKIP(3);
6835
8.86k
                        ignoreDepth--;
6836
5.92M
                    } else {
6837
5.92M
                        NEXT;
6838
5.92M
                    }
6839
5.95M
                }
6840
6841
5.48k
                ctxt->disableSAX = state;
6842
5.48k
                ctxt->instate = instate;
6843
6844
5.48k
    if (RAW == 0) {
6845
3.00k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6846
3.00k
                    goto error;
6847
3.00k
    }
6848
2.48k
                if (ctxt->input->id != id) {
6849
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6850
0
                                   "All markup of the conditional section is"
6851
0
                                   " not in the same entity\n");
6852
0
                }
6853
2.48k
                SKIP(3);
6854
2.87k
            } else {
6855
2.87k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6856
2.87k
                xmlHaltParser(ctxt);
6857
2.87k
                goto error;
6858
2.87k
            }
6859
48.5k
        } else if ((depth > 0) &&
6860
43.2k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6861
21.1k
            depth--;
6862
21.1k
            if (ctxt->input->id != inputIds[depth]) {
6863
291
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6864
291
                               "All markup of the conditional section is not"
6865
291
                               " in the same entity\n");
6866
291
            }
6867
21.1k
            SKIP(3);
6868
22.1k
        } else {
6869
22.1k
            int id = ctxt->input->id;
6870
22.1k
            unsigned long cons = CUR_CONSUMED;
6871
6872
22.1k
            xmlParseMarkupDecl(ctxt);
6873
6874
22.1k
            if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6875
3.82k
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6876
3.82k
                xmlHaltParser(ctxt);
6877
3.82k
                goto error;
6878
3.82k
            }
6879
22.1k
        }
6880
6881
81.5k
        if (depth == 0)
6882
6.39k
            break;
6883
6884
75.1k
        SKIP_BLANKS;
6885
75.1k
        GROW;
6886
75.1k
    }
6887
6888
16.9k
error:
6889
16.9k
    xmlFree(inputIds);
6890
16.9k
}
6891
6892
/**
6893
 * xmlParseMarkupDecl:
6894
 * @ctxt:  an XML parser context
6895
 *
6896
 * DEPRECATED: Internal function, don't use.
6897
 *
6898
 * parse Markup declarations
6899
 *
6900
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6901
 *                     NotationDecl | PI | Comment
6902
 *
6903
 * [ VC: Proper Declaration/PE Nesting ]
6904
 * Parameter-entity replacement text must be properly nested with
6905
 * markup declarations. That is to say, if either the first character
6906
 * or the last character of a markup declaration (markupdecl above) is
6907
 * contained in the replacement text for a parameter-entity reference,
6908
 * both must be contained in the same replacement text.
6909
 *
6910
 * [ WFC: PEs in Internal Subset ]
6911
 * In the internal DTD subset, parameter-entity references can occur
6912
 * only where markup declarations can occur, not within markup declarations.
6913
 * (This does not apply to references that occur in external parameter
6914
 * entities or to the external subset.)
6915
 */
6916
void
6917
8.54M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6918
8.54M
    GROW;
6919
8.54M
    if (CUR == '<') {
6920
7.96M
        if (NXT(1) == '!') {
6921
7.91M
      switch (NXT(2)) {
6922
4.52M
          case 'E':
6923
4.52M
        if (NXT(3) == 'L')
6924
2.18M
      xmlParseElementDecl(ctxt);
6925
2.33M
        else if (NXT(3) == 'N')
6926
2.33M
      xmlParseEntityDecl(ctxt);
6927
4.52M
        break;
6928
1.72M
          case 'A':
6929
1.72M
        xmlParseAttributeListDecl(ctxt);
6930
1.72M
        break;
6931
20.0k
          case 'N':
6932
20.0k
        xmlParseNotationDecl(ctxt);
6933
20.0k
        break;
6934
1.63M
          case '-':
6935
1.63M
        xmlParseComment(ctxt);
6936
1.63M
        break;
6937
6.89k
    default:
6938
        /* there is an error but it will be detected later */
6939
6.89k
        break;
6940
7.91M
      }
6941
7.91M
  } else if (NXT(1) == '?') {
6942
34.1k
      xmlParsePI(ctxt);
6943
34.1k
  }
6944
7.96M
    }
6945
6946
    /*
6947
     * detect requirement to exit there and act accordingly
6948
     * and avoid having instate overridden later on
6949
     */
6950
8.54M
    if (ctxt->instate == XML_PARSER_EOF)
6951
24.8k
        return;
6952
6953
8.52M
    ctxt->instate = XML_PARSER_DTD;
6954
8.52M
}
6955
6956
/**
6957
 * xmlParseTextDecl:
6958
 * @ctxt:  an XML parser context
6959
 *
6960
 * DEPRECATED: Internal function, don't use.
6961
 *
6962
 * parse an XML declaration header for external entities
6963
 *
6964
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6965
 */
6966
6967
void
6968
57.9k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6969
57.9k
    xmlChar *version;
6970
57.9k
    const xmlChar *encoding;
6971
57.9k
    int oldstate;
6972
6973
    /*
6974
     * We know that '<?xml' is here.
6975
     */
6976
57.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6977
57.6k
  SKIP(5);
6978
57.6k
    } else {
6979
275
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6980
275
  return;
6981
275
    }
6982
6983
    /* Avoid expansion of parameter entities when skipping blanks. */
6984
57.6k
    oldstate = ctxt->instate;
6985
57.6k
    ctxt->instate = XML_PARSER_START;
6986
6987
57.6k
    if (SKIP_BLANKS == 0) {
6988
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6989
0
           "Space needed after '<?xml'\n");
6990
0
    }
6991
6992
    /*
6993
     * We may have the VersionInfo here.
6994
     */
6995
57.6k
    version = xmlParseVersionInfo(ctxt);
6996
57.6k
    if (version == NULL)
6997
11.5k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6998
46.1k
    else {
6999
46.1k
  if (SKIP_BLANKS == 0) {
7000
1.59k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7001
1.59k
               "Space needed here\n");
7002
1.59k
  }
7003
46.1k
    }
7004
57.6k
    ctxt->input->version = version;
7005
7006
    /*
7007
     * We must have the encoding declaration
7008
     */
7009
57.6k
    encoding = xmlParseEncodingDecl(ctxt);
7010
57.6k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7011
  /*
7012
   * The XML REC instructs us to stop parsing right here
7013
   */
7014
237
        ctxt->instate = oldstate;
7015
237
        return;
7016
237
    }
7017
57.4k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7018
4.42k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7019
4.42k
           "Missing encoding in text declaration\n");
7020
4.42k
    }
7021
7022
57.4k
    SKIP_BLANKS;
7023
57.4k
    if ((RAW == '?') && (NXT(1) == '>')) {
7024
36.4k
        SKIP(2);
7025
36.4k
    } else if (RAW == '>') {
7026
        /* Deprecated old WD ... */
7027
341
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7028
341
  NEXT;
7029
20.6k
    } else {
7030
20.6k
        int c;
7031
7032
20.6k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7033
2.72M
        while ((c = CUR) != 0) {
7034
2.72M
            NEXT;
7035
2.72M
            if (c == '>')
7036
20.0k
                break;
7037
2.72M
        }
7038
20.6k
    }
7039
7040
57.4k
    ctxt->instate = oldstate;
7041
57.4k
}
7042
7043
/**
7044
 * xmlParseExternalSubset:
7045
 * @ctxt:  an XML parser context
7046
 * @ExternalID: the external identifier
7047
 * @SystemID: the system identifier (or URL)
7048
 *
7049
 * parse Markup declarations from an external subset
7050
 *
7051
 * [30] extSubset ::= textDecl? extSubsetDecl
7052
 *
7053
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7054
 */
7055
void
7056
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7057
73.6k
                       const xmlChar *SystemID) {
7058
73.6k
    xmlDetectSAX2(ctxt);
7059
73.6k
    GROW;
7060
7061
73.6k
    if ((ctxt->encoding == NULL) &&
7062
73.6k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7063
73.5k
        xmlChar start[4];
7064
73.5k
  xmlCharEncoding enc;
7065
7066
73.5k
  start[0] = RAW;
7067
73.5k
  start[1] = NXT(1);
7068
73.5k
  start[2] = NXT(2);
7069
73.5k
  start[3] = NXT(3);
7070
73.5k
  enc = xmlDetectCharEncoding(start, 4);
7071
73.5k
  if (enc != XML_CHAR_ENCODING_NONE)
7072
15.2k
      xmlSwitchEncoding(ctxt, enc);
7073
73.5k
    }
7074
7075
73.6k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7076
14.4k
  xmlParseTextDecl(ctxt);
7077
14.4k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7078
      /*
7079
       * The XML REC instructs us to stop parsing right here
7080
       */
7081
204
      xmlHaltParser(ctxt);
7082
204
      return;
7083
204
  }
7084
14.4k
    }
7085
73.4k
    if (ctxt->myDoc == NULL) {
7086
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7087
0
  if (ctxt->myDoc == NULL) {
7088
0
      xmlErrMemory(ctxt, "New Doc failed");
7089
0
      return;
7090
0
  }
7091
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7092
0
    }
7093
73.4k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7094
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7095
7096
73.4k
    ctxt->instate = XML_PARSER_DTD;
7097
73.4k
    ctxt->external = 1;
7098
73.4k
    SKIP_BLANKS;
7099
3.93M
    while (((RAW == '<') && (NXT(1) == '?')) ||
7100
3.93M
           ((RAW == '<') && (NXT(1) == '!')) ||
7101
3.93M
     (RAW == '%')) {
7102
3.87M
  int id = ctxt->input->id;
7103
3.87M
  unsigned long cons = CUR_CONSUMED;
7104
7105
3.87M
  GROW;
7106
3.87M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7107
16.9k
      xmlParseConditionalSections(ctxt);
7108
16.9k
  } else
7109
3.85M
      xmlParseMarkupDecl(ctxt);
7110
3.87M
        SKIP_BLANKS;
7111
7112
3.87M
  if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7113
9.88k
      xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7114
9.88k
      break;
7115
9.88k
  }
7116
3.87M
    }
7117
7118
73.4k
    if (RAW != 0) {
7119
28.8k
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7120
28.8k
    }
7121
7122
73.4k
}
7123
7124
/**
7125
 * xmlParseReference:
7126
 * @ctxt:  an XML parser context
7127
 *
7128
 * DEPRECATED: Internal function, don't use.
7129
 *
7130
 * parse and handle entity references in content, depending on the SAX
7131
 * interface, this may end-up in a call to character() if this is a
7132
 * CharRef, a predefined entity, if there is no reference() callback.
7133
 * or if the parser was asked to switch to that mode.
7134
 *
7135
 * [67] Reference ::= EntityRef | CharRef
7136
 */
7137
void
7138
77.0M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7139
77.0M
    xmlEntityPtr ent;
7140
77.0M
    xmlChar *val;
7141
77.0M
    int was_checked;
7142
77.0M
    xmlNodePtr list = NULL;
7143
77.0M
    xmlParserErrors ret = XML_ERR_OK;
7144
7145
7146
77.0M
    if (RAW != '&')
7147
0
        return;
7148
7149
    /*
7150
     * Simple case of a CharRef
7151
     */
7152
77.0M
    if (NXT(1) == '#') {
7153
30.6M
  int i = 0;
7154
30.6M
  xmlChar out[16];
7155
30.6M
  int hex = NXT(2);
7156
30.6M
  int value = xmlParseCharRef(ctxt);
7157
7158
30.6M
  if (value == 0)
7159
1.78M
      return;
7160
28.8M
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7161
      /*
7162
       * So we are using non-UTF-8 buffers
7163
       * Check that the char fit on 8bits, if not
7164
       * generate a CharRef.
7165
       */
7166
25.5M
      if (value <= 0xFF) {
7167
25.2M
    out[0] = value;
7168
25.2M
    out[1] = 0;
7169
25.2M
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7170
25.2M
        (!ctxt->disableSAX))
7171
625k
        ctxt->sax->characters(ctxt->userData, out, 1);
7172
25.2M
      } else {
7173
310k
    if ((hex == 'x') || (hex == 'X'))
7174
7.67k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7175
303k
    else
7176
303k
        snprintf((char *)out, sizeof(out), "#%d", value);
7177
310k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7178
310k
        (!ctxt->disableSAX))
7179
27.1k
        ctxt->sax->reference(ctxt->userData, out);
7180
310k
      }
7181
25.5M
  } else {
7182
      /*
7183
       * Just encode the value in UTF-8
7184
       */
7185
3.33M
      COPY_BUF(0 ,out, i, value);
7186
3.33M
      out[i] = 0;
7187
3.33M
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7188
3.33M
    (!ctxt->disableSAX))
7189
624k
    ctxt->sax->characters(ctxt->userData, out, i);
7190
3.33M
  }
7191
28.8M
  return;
7192
30.6M
    }
7193
7194
    /*
7195
     * We are seeing an entity reference
7196
     */
7197
46.3M
    ent = xmlParseEntityRef(ctxt);
7198
46.3M
    if (ent == NULL) return;
7199
15.8M
    if (!ctxt->wellFormed)
7200
9.48M
  return;
7201
6.40M
    was_checked = ent->checked;
7202
7203
    /* special case of predefined entities */
7204
6.40M
    if ((ent->name == NULL) ||
7205
6.40M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7206
230k
  val = ent->content;
7207
230k
  if (val == NULL) return;
7208
  /*
7209
   * inline the entity.
7210
   */
7211
230k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7212
230k
      (!ctxt->disableSAX))
7213
230k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7214
230k
  return;
7215
230k
    }
7216
7217
    /*
7218
     * The first reference to the entity trigger a parsing phase
7219
     * where the ent->children is filled with the result from
7220
     * the parsing.
7221
     * Note: external parsed entities will not be loaded, it is not
7222
     * required for a non-validating parser, unless the parsing option
7223
     * of validating, or substituting entities were given. Doing so is
7224
     * far more secure as the parser will only process data coming from
7225
     * the document entity by default.
7226
     */
7227
6.17M
    if (((ent->checked == 0) ||
7228
6.17M
         ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7229
6.17M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7230
5.94M
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7231
5.92M
  unsigned long oldnbent = ctxt->nbentities, diff;
7232
7233
  /*
7234
   * This is a bit hackish but this seems the best
7235
   * way to make sure both SAX and DOM entity support
7236
   * behaves okay.
7237
   */
7238
5.92M
  void *user_data;
7239
5.92M
  if (ctxt->userData == ctxt)
7240
5.92M
      user_data = NULL;
7241
0
  else
7242
0
      user_data = ctxt->userData;
7243
7244
  /*
7245
   * Check that this entity is well formed
7246
   * 4.3.2: An internal general parsed entity is well-formed
7247
   * if its replacement text matches the production labeled
7248
   * content.
7249
   */
7250
5.92M
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7251
428k
      ctxt->depth++;
7252
428k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7253
428k
                                                user_data, &list);
7254
428k
      ctxt->depth--;
7255
7256
5.49M
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7257
5.49M
      ctxt->depth++;
7258
5.49M
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7259
5.49M
                                     user_data, ctxt->depth, ent->URI,
7260
5.49M
             ent->ExternalID, &list);
7261
5.49M
      ctxt->depth--;
7262
5.49M
  } else {
7263
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7264
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7265
0
       "invalid entity type found\n", NULL);
7266
0
  }
7267
7268
  /*
7269
   * Store the number of entities needing parsing for this entity
7270
   * content and do checkings
7271
   */
7272
5.92M
        diff = ctxt->nbentities - oldnbent + 1;
7273
5.92M
        if (diff > INT_MAX / 2)
7274
0
            diff = INT_MAX / 2;
7275
5.92M
        ent->checked = diff * 2;
7276
5.92M
  if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7277
92.1k
      ent->checked |= 1;
7278
5.92M
  if (ret == XML_ERR_ENTITY_LOOP) {
7279
835k
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7280
835k
            xmlHaltParser(ctxt);
7281
835k
      xmlFreeNodeList(list);
7282
835k
      return;
7283
835k
  }
7284
5.08M
  if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7285
1.02k
      xmlFreeNodeList(list);
7286
1.02k
      return;
7287
1.02k
  }
7288
7289
5.08M
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7290
60.6k
      if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7291
60.6k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7292
60.6k
    (ent->children == NULL)) {
7293
60.3k
    ent->children = list;
7294
                /*
7295
                 * Prune it directly in the generated document
7296
                 * except for single text nodes.
7297
                 */
7298
60.3k
                if ((ctxt->replaceEntities == 0) ||
7299
60.3k
                    (ctxt->parseMode == XML_PARSE_READER) ||
7300
60.3k
                    ((list->type == XML_TEXT_NODE) &&
7301
57.3k
                     (list->next == NULL))) {
7302
57.3k
                    ent->owner = 1;
7303
136k
                    while (list != NULL) {
7304
79.2k
                        list->parent = (xmlNodePtr) ent;
7305
79.2k
                        if (list->doc != ent->doc)
7306
0
                            xmlSetTreeDoc(list, ent->doc);
7307
79.2k
                        if (list->next == NULL)
7308
57.3k
                            ent->last = list;
7309
79.2k
                        list = list->next;
7310
79.2k
                    }
7311
57.3k
                    list = NULL;
7312
57.3k
                } else {
7313
2.99k
                    ent->owner = 0;
7314
12.5k
                    while (list != NULL) {
7315
9.57k
                        list->parent = (xmlNodePtr) ctxt->node;
7316
9.57k
                        list->doc = ctxt->myDoc;
7317
9.57k
                        if (list->next == NULL)
7318
2.99k
                            ent->last = list;
7319
9.57k
                        list = list->next;
7320
9.57k
                    }
7321
2.99k
                    list = ent->children;
7322
#ifdef LIBXML_LEGACY_ENABLED
7323
                    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7324
                        xmlAddEntityReference(ent, list, NULL);
7325
#endif /* LIBXML_LEGACY_ENABLED */
7326
2.99k
                }
7327
60.3k
      } else {
7328
284
    xmlFreeNodeList(list);
7329
284
    list = NULL;
7330
284
      }
7331
5.02M
  } else if ((ret != XML_ERR_OK) &&
7332
5.02M
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7333
4.97M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7334
4.97M
         "Entity '%s' failed to parse\n", ent->name);
7335
4.97M
            if (ent->content != NULL)
7336
254k
                ent->content[0] = 0;
7337
4.97M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
7338
4.97M
  } else if (list != NULL) {
7339
0
      xmlFreeNodeList(list);
7340
0
      list = NULL;
7341
0
  }
7342
5.08M
  if (ent->checked == 0)
7343
0
      ent->checked = 2;
7344
7345
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7346
5.08M
        was_checked = 0;
7347
5.08M
    } else if (ent->checked != 1) {
7348
248k
  ctxt->nbentities += ent->checked / 2;
7349
248k
    }
7350
7351
    /*
7352
     * Now that the entity content has been gathered
7353
     * provide it to the application, this can take different forms based
7354
     * on the parsing modes.
7355
     */
7356
5.33M
    if (ent->children == NULL) {
7357
  /*
7358
   * Probably running in SAX mode and the callbacks don't
7359
   * build the entity content. So unless we already went
7360
   * though parsing for first checking go though the entity
7361
   * content to generate callbacks associated to the entity
7362
   */
7363
5.07M
  if (was_checked != 0) {
7364
44.2k
      void *user_data;
7365
      /*
7366
       * This is a bit hackish but this seems the best
7367
       * way to make sure both SAX and DOM entity support
7368
       * behaves okay.
7369
       */
7370
44.2k
      if (ctxt->userData == ctxt)
7371
44.2k
    user_data = NULL;
7372
0
      else
7373
0
    user_data = ctxt->userData;
7374
7375
44.2k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7376
9.81k
    ctxt->depth++;
7377
9.81k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7378
9.81k
           ent->content, user_data, NULL);
7379
9.81k
    ctxt->depth--;
7380
34.4k
      } else if (ent->etype ==
7381
34.4k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7382
34.4k
    ctxt->depth++;
7383
34.4k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7384
34.4k
         ctxt->sax, user_data, ctxt->depth,
7385
34.4k
         ent->URI, ent->ExternalID, NULL);
7386
34.4k
    ctxt->depth--;
7387
34.4k
      } else {
7388
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7389
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7390
0
           "invalid entity type found\n", NULL);
7391
0
      }
7392
44.2k
      if (ret == XML_ERR_ENTITY_LOOP) {
7393
164
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7394
164
    return;
7395
164
      }
7396
44.2k
  }
7397
5.07M
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7398
5.07M
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7399
      /*
7400
       * Entity reference callback comes second, it's somewhat
7401
       * superfluous but a compatibility to historical behaviour
7402
       */
7403
83.1k
      ctxt->sax->reference(ctxt->userData, ent->name);
7404
83.1k
  }
7405
5.07M
  return;
7406
5.07M
    }
7407
7408
    /*
7409
     * If we didn't get any children for the entity being built
7410
     */
7411
255k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7412
255k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7413
  /*
7414
   * Create a node.
7415
   */
7416
177k
  ctxt->sax->reference(ctxt->userData, ent->name);
7417
177k
  return;
7418
177k
    }
7419
7420
77.4k
    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7421
  /*
7422
   * There is a problem on the handling of _private for entities
7423
   * (bug 155816): Should we copy the content of the field from
7424
   * the entity (possibly overwriting some value set by the user
7425
   * when a copy is created), should we leave it alone, or should
7426
   * we try to take care of different situations?  The problem
7427
   * is exacerbated by the usage of this field by the xmlReader.
7428
   * To fix this bug, we look at _private on the created node
7429
   * and, if it's NULL, we copy in whatever was in the entity.
7430
   * If it's not NULL we leave it alone.  This is somewhat of a
7431
   * hack - maybe we should have further tests to determine
7432
   * what to do.
7433
   */
7434
64.5k
  if ((ctxt->node != NULL) && (ent->children != NULL)) {
7435
      /*
7436
       * Seems we are generating the DOM content, do
7437
       * a simple tree copy for all references except the first
7438
       * In the first occurrence list contains the replacement.
7439
       */
7440
64.5k
      if (((list == NULL) && (ent->owner == 0)) ||
7441
64.5k
    (ctxt->parseMode == XML_PARSE_READER)) {
7442
17.6k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7443
7444
    /*
7445
     * We are copying here, make sure there is no abuse
7446
     */
7447
17.6k
    ctxt->sizeentcopy += ent->length + 5;
7448
17.6k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7449
0
        return;
7450
7451
    /*
7452
     * when operating on a reader, the entities definitions
7453
     * are always owning the entities subtree.
7454
    if (ctxt->parseMode == XML_PARSE_READER)
7455
        ent->owner = 1;
7456
     */
7457
7458
17.6k
    cur = ent->children;
7459
22.6k
    while (cur != NULL) {
7460
22.6k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7461
22.6k
        if (nw != NULL) {
7462
22.6k
      if (nw->_private == NULL)
7463
22.6k
          nw->_private = cur->_private;
7464
22.6k
      if (firstChild == NULL){
7465
17.6k
          firstChild = nw;
7466
17.6k
      }
7467
22.6k
      nw = xmlAddChild(ctxt->node, nw);
7468
22.6k
        }
7469
22.6k
        if (cur == ent->last) {
7470
      /*
7471
       * needed to detect some strange empty
7472
       * node cases in the reader tests
7473
       */
7474
17.6k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7475
17.6k
          (nw != NULL) &&
7476
17.6k
          (nw->type == XML_ELEMENT_NODE) &&
7477
17.6k
          (nw->children == NULL))
7478
785
          nw->extra = 1;
7479
7480
17.6k
      break;
7481
17.6k
        }
7482
5.08k
        cur = cur->next;
7483
5.08k
    }
7484
#ifdef LIBXML_LEGACY_ENABLED
7485
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7486
      xmlAddEntityReference(ent, firstChild, nw);
7487
#endif /* LIBXML_LEGACY_ENABLED */
7488
46.8k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7489
46.8k
    xmlNodePtr nw = NULL, cur, next, last,
7490
46.8k
         firstChild = NULL;
7491
7492
    /*
7493
     * We are copying here, make sure there is no abuse
7494
     */
7495
46.8k
    ctxt->sizeentcopy += ent->length + 5;
7496
46.8k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7497
0
        return;
7498
7499
    /*
7500
     * Copy the entity child list and make it the new
7501
     * entity child list. The goal is to make sure any
7502
     * ID or REF referenced will be the one from the
7503
     * document content and not the entity copy.
7504
     */
7505
46.8k
    cur = ent->children;
7506
46.8k
    ent->children = NULL;
7507
46.8k
    last = ent->last;
7508
46.8k
    ent->last = NULL;
7509
57.0k
    while (cur != NULL) {
7510
57.0k
        next = cur->next;
7511
57.0k
        cur->next = NULL;
7512
57.0k
        cur->parent = NULL;
7513
57.0k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7514
57.0k
        if (nw != NULL) {
7515
57.0k
      if (nw->_private == NULL)
7516
57.0k
          nw->_private = cur->_private;
7517
57.0k
      if (firstChild == NULL){
7518
46.8k
          firstChild = cur;
7519
46.8k
      }
7520
57.0k
      xmlAddChild((xmlNodePtr) ent, nw);
7521
57.0k
      xmlAddChild(ctxt->node, cur);
7522
57.0k
        }
7523
57.0k
        if (cur == last)
7524
46.8k
      break;
7525
10.1k
        cur = next;
7526
10.1k
    }
7527
46.8k
    if (ent->owner == 0)
7528
2.99k
        ent->owner = 1;
7529
#ifdef LIBXML_LEGACY_ENABLED
7530
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7531
      xmlAddEntityReference(ent, firstChild, nw);
7532
#endif /* LIBXML_LEGACY_ENABLED */
7533
46.8k
      } else {
7534
0
    const xmlChar *nbktext;
7535
7536
    /*
7537
     * the name change is to avoid coalescing of the
7538
     * node with a possible previous text one which
7539
     * would make ent->children a dangling pointer
7540
     */
7541
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7542
0
          -1);
7543
0
    if (ent->children->type == XML_TEXT_NODE)
7544
0
        ent->children->name = nbktext;
7545
0
    if ((ent->last != ent->children) &&
7546
0
        (ent->last->type == XML_TEXT_NODE))
7547
0
        ent->last->name = nbktext;
7548
0
    xmlAddChildList(ctxt->node, ent->children);
7549
0
      }
7550
7551
      /*
7552
       * This is to avoid a nasty side effect, see
7553
       * characters() in SAX.c
7554
       */
7555
64.5k
      ctxt->nodemem = 0;
7556
64.5k
      ctxt->nodelen = 0;
7557
64.5k
      return;
7558
64.5k
  }
7559
64.5k
    }
7560
77.4k
}
7561
7562
/**
7563
 * xmlParseEntityRef:
7564
 * @ctxt:  an XML parser context
7565
 *
7566
 * DEPRECATED: Internal function, don't use.
7567
 *
7568
 * parse ENTITY references declarations
7569
 *
7570
 * [68] EntityRef ::= '&' Name ';'
7571
 *
7572
 * [ WFC: Entity Declared ]
7573
 * In a document without any DTD, a document with only an internal DTD
7574
 * subset which contains no parameter entity references, or a document
7575
 * with "standalone='yes'", the Name given in the entity reference
7576
 * must match that in an entity declaration, except that well-formed
7577
 * documents need not declare any of the following entities: amp, lt,
7578
 * gt, apos, quot.  The declaration of a parameter entity must precede
7579
 * any reference to it.  Similarly, the declaration of a general entity
7580
 * must precede any reference to it which appears in a default value in an
7581
 * attribute-list declaration. Note that if entities are declared in the
7582
 * external subset or in external parameter entities, a non-validating
7583
 * processor is not obligated to read and process their declarations;
7584
 * for such documents, the rule that an entity must be declared is a
7585
 * well-formedness constraint only if standalone='yes'.
7586
 *
7587
 * [ WFC: Parsed Entity ]
7588
 * An entity reference must not contain the name of an unparsed entity
7589
 *
7590
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7591
 */
7592
xmlEntityPtr
7593
55.7M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7594
55.7M
    const xmlChar *name;
7595
55.7M
    xmlEntityPtr ent = NULL;
7596
7597
55.7M
    GROW;
7598
55.7M
    if (ctxt->instate == XML_PARSER_EOF)
7599
0
        return(NULL);
7600
7601
55.7M
    if (RAW != '&')
7602
0
        return(NULL);
7603
55.7M
    NEXT;
7604
55.7M
    name = xmlParseName(ctxt);
7605
55.7M
    if (name == NULL) {
7606
26.3M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7607
26.3M
           "xmlParseEntityRef: no name\n");
7608
26.3M
        return(NULL);
7609
26.3M
    }
7610
29.3M
    if (RAW != ';') {
7611
1.87M
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7612
1.87M
  return(NULL);
7613
1.87M
    }
7614
27.5M
    NEXT;
7615
7616
    /*
7617
     * Predefined entities override any extra definition
7618
     */
7619
27.5M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7620
16.6M
        ent = xmlGetPredefinedEntity(name);
7621
16.6M
        if (ent != NULL)
7622
5.33M
            return(ent);
7623
16.6M
    }
7624
7625
    /*
7626
     * Increase the number of entity references parsed
7627
     */
7628
22.1M
    ctxt->nbentities++;
7629
7630
    /*
7631
     * Ask first SAX for entity resolution, otherwise try the
7632
     * entities which may have stored in the parser context.
7633
     */
7634
22.1M
    if (ctxt->sax != NULL) {
7635
22.1M
  if (ctxt->sax->getEntity != NULL)
7636
22.1M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7637
22.1M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7638
22.1M
      (ctxt->options & XML_PARSE_OLDSAX))
7639
7.92k
      ent = xmlGetPredefinedEntity(name);
7640
22.1M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7641
22.1M
      (ctxt->userData==ctxt)) {
7642
29.5k
      ent = xmlSAX2GetEntity(ctxt, name);
7643
29.5k
  }
7644
22.1M
    }
7645
22.1M
    if (ctxt->instate == XML_PARSER_EOF)
7646
0
  return(NULL);
7647
    /*
7648
     * [ WFC: Entity Declared ]
7649
     * In a document without any DTD, a document with only an
7650
     * internal DTD subset which contains no parameter entity
7651
     * references, or a document with "standalone='yes'", the
7652
     * Name given in the entity reference must match that in an
7653
     * entity declaration, except that well-formed documents
7654
     * need not declare any of the following entities: amp, lt,
7655
     * gt, apos, quot.
7656
     * The declaration of a parameter entity must precede any
7657
     * reference to it.
7658
     * Similarly, the declaration of a general entity must
7659
     * precede any reference to it which appears in a default
7660
     * value in an attribute-list declaration. Note that if
7661
     * entities are declared in the external subset or in
7662
     * external parameter entities, a non-validating processor
7663
     * is not obligated to read and process their declarations;
7664
     * for such documents, the rule that an entity must be
7665
     * declared is a well-formedness constraint only if
7666
     * standalone='yes'.
7667
     */
7668
22.1M
    if (ent == NULL) {
7669
9.50M
  if ((ctxt->standalone == 1) ||
7670
9.50M
      ((ctxt->hasExternalSubset == 0) &&
7671
9.49M
       (ctxt->hasPErefs == 0))) {
7672
9.38M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7673
9.38M
         "Entity '%s' not defined\n", name);
7674
9.38M
  } else {
7675
116k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7676
116k
         "Entity '%s' not defined\n", name);
7677
116k
      if ((ctxt->inSubset == 0) &&
7678
116k
    (ctxt->sax != NULL) &&
7679
116k
    (ctxt->sax->reference != NULL)) {
7680
115k
    ctxt->sax->reference(ctxt->userData, name);
7681
115k
      }
7682
116k
  }
7683
9.50M
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7684
9.50M
  ctxt->valid = 0;
7685
9.50M
    }
7686
7687
    /*
7688
     * [ WFC: Parsed Entity ]
7689
     * An entity reference must not contain the name of an
7690
     * unparsed entity
7691
     */
7692
12.6M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7693
1.49k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7694
1.49k
     "Entity reference to unparsed entity %s\n", name);
7695
1.49k
    }
7696
7697
    /*
7698
     * [ WFC: No External Entity References ]
7699
     * Attribute values cannot contain direct or indirect
7700
     * entity references to external entities.
7701
     */
7702
12.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7703
12.6M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7704
28.1k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7705
28.1k
       "Attribute references external entity '%s'\n", name);
7706
28.1k
    }
7707
    /*
7708
     * [ WFC: No < in Attribute Values ]
7709
     * The replacement text of any entity referred to directly or
7710
     * indirectly in an attribute value (other than "&lt;") must
7711
     * not contain a <.
7712
     */
7713
12.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7714
12.6M
       (ent != NULL) && 
7715
12.6M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7716
218k
  if (((ent->checked & 1) || (ent->checked == 0)) &&
7717
218k
       (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7718
6.02k
      xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7719
6.02k
  "'<' in entity '%s' is not allowed in attributes values\n", name);
7720
6.02k
        }
7721
218k
    }
7722
7723
    /*
7724
     * Internal check, no parameter entities here ...
7725
     */
7726
12.4M
    else {
7727
12.4M
  switch (ent->etype) {
7728
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7729
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7730
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7731
0
       "Attempt to reference the parameter entity '%s'\n",
7732
0
            name);
7733
0
      break;
7734
12.4M
      default:
7735
12.4M
      break;
7736
12.4M
  }
7737
12.4M
    }
7738
7739
    /*
7740
     * [ WFC: No Recursion ]
7741
     * A parsed entity must not contain a recursive reference
7742
     * to itself, either directly or indirectly.
7743
     * Done somewhere else
7744
     */
7745
22.1M
    return(ent);
7746
22.1M
}
7747
7748
/**
7749
 * xmlParseStringEntityRef:
7750
 * @ctxt:  an XML parser context
7751
 * @str:  a pointer to an index in the string
7752
 *
7753
 * parse ENTITY references declarations, but this version parses it from
7754
 * a string value.
7755
 *
7756
 * [68] EntityRef ::= '&' Name ';'
7757
 *
7758
 * [ WFC: Entity Declared ]
7759
 * In a document without any DTD, a document with only an internal DTD
7760
 * subset which contains no parameter entity references, or a document
7761
 * with "standalone='yes'", the Name given in the entity reference
7762
 * must match that in an entity declaration, except that well-formed
7763
 * documents need not declare any of the following entities: amp, lt,
7764
 * gt, apos, quot.  The declaration of a parameter entity must precede
7765
 * any reference to it.  Similarly, the declaration of a general entity
7766
 * must precede any reference to it which appears in a default value in an
7767
 * attribute-list declaration. Note that if entities are declared in the
7768
 * external subset or in external parameter entities, a non-validating
7769
 * processor is not obligated to read and process their declarations;
7770
 * for such documents, the rule that an entity must be declared is a
7771
 * well-formedness constraint only if standalone='yes'.
7772
 *
7773
 * [ WFC: Parsed Entity ]
7774
 * An entity reference must not contain the name of an unparsed entity
7775
 *
7776
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7777
 * is updated to the current location in the string.
7778
 */
7779
static xmlEntityPtr
7780
831k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7781
831k
    xmlChar *name;
7782
831k
    const xmlChar *ptr;
7783
831k
    xmlChar cur;
7784
831k
    xmlEntityPtr ent = NULL;
7785
7786
831k
    if ((str == NULL) || (*str == NULL))
7787
0
        return(NULL);
7788
831k
    ptr = *str;
7789
831k
    cur = *ptr;
7790
831k
    if (cur != '&')
7791
0
  return(NULL);
7792
7793
831k
    ptr++;
7794
831k
    name = xmlParseStringName(ctxt, &ptr);
7795
831k
    if (name == NULL) {
7796
35.2k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7797
35.2k
           "xmlParseStringEntityRef: no name\n");
7798
35.2k
  *str = ptr;
7799
35.2k
  return(NULL);
7800
35.2k
    }
7801
796k
    if (*ptr != ';') {
7802
9.64k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7803
9.64k
        xmlFree(name);
7804
9.64k
  *str = ptr;
7805
9.64k
  return(NULL);
7806
9.64k
    }
7807
786k
    ptr++;
7808
7809
7810
    /*
7811
     * Predefined entities override any extra definition
7812
     */
7813
786k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7814
426k
        ent = xmlGetPredefinedEntity(name);
7815
426k
        if (ent != NULL) {
7816
30.4k
            xmlFree(name);
7817
30.4k
            *str = ptr;
7818
30.4k
            return(ent);
7819
30.4k
        }
7820
426k
    }
7821
7822
    /*
7823
     * Increase the number of entity references parsed
7824
     */
7825
756k
    ctxt->nbentities++;
7826
7827
    /*
7828
     * Ask first SAX for entity resolution, otherwise try the
7829
     * entities which may have stored in the parser context.
7830
     */
7831
756k
    if (ctxt->sax != NULL) {
7832
756k
  if (ctxt->sax->getEntity != NULL)
7833
756k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7834
756k
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7835
44.9k
      ent = xmlGetPredefinedEntity(name);
7836
756k
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7837
83.8k
      ent = xmlSAX2GetEntity(ctxt, name);
7838
83.8k
  }
7839
756k
    }
7840
756k
    if (ctxt->instate == XML_PARSER_EOF) {
7841
0
  xmlFree(name);
7842
0
  return(NULL);
7843
0
    }
7844
7845
    /*
7846
     * [ WFC: Entity Declared ]
7847
     * In a document without any DTD, a document with only an
7848
     * internal DTD subset which contains no parameter entity
7849
     * references, or a document with "standalone='yes'", the
7850
     * Name given in the entity reference must match that in an
7851
     * entity declaration, except that well-formed documents
7852
     * need not declare any of the following entities: amp, lt,
7853
     * gt, apos, quot.
7854
     * The declaration of a parameter entity must precede any
7855
     * reference to it.
7856
     * Similarly, the declaration of a general entity must
7857
     * precede any reference to it which appears in a default
7858
     * value in an attribute-list declaration. Note that if
7859
     * entities are declared in the external subset or in
7860
     * external parameter entities, a non-validating processor
7861
     * is not obligated to read and process their declarations;
7862
     * for such documents, the rule that an entity must be
7863
     * declared is a well-formedness constraint only if
7864
     * standalone='yes'.
7865
     */
7866
756k
    if (ent == NULL) {
7867
83.8k
  if ((ctxt->standalone == 1) ||
7868
83.8k
      ((ctxt->hasExternalSubset == 0) &&
7869
81.7k
       (ctxt->hasPErefs == 0))) {
7870
78.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7871
78.9k
         "Entity '%s' not defined\n", name);
7872
78.9k
  } else {
7873
4.89k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7874
4.89k
        "Entity '%s' not defined\n",
7875
4.89k
        name);
7876
4.89k
  }
7877
83.8k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7878
  /* TODO ? check regressions ctxt->valid = 0; */
7879
83.8k
    }
7880
7881
    /*
7882
     * [ WFC: Parsed Entity ]
7883
     * An entity reference must not contain the name of an
7884
     * unparsed entity
7885
     */
7886
672k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7887
15.7k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7888
15.7k
     "Entity reference to unparsed entity %s\n", name);
7889
15.7k
    }
7890
7891
    /*
7892
     * [ WFC: No External Entity References ]
7893
     * Attribute values cannot contain direct or indirect
7894
     * entity references to external entities.
7895
     */
7896
656k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7897
656k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7898
1.07k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7899
1.07k
   "Attribute references external entity '%s'\n", name);
7900
1.07k
    }
7901
    /*
7902
     * [ WFC: No < in Attribute Values ]
7903
     * The replacement text of any entity referred to directly or
7904
     * indirectly in an attribute value (other than "&lt;") must
7905
     * not contain a <.
7906
     */
7907
655k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7908
655k
       (ent != NULL) && (ent->content != NULL) &&
7909
655k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7910
655k
       (xmlStrchr(ent->content, '<'))) {
7911
84.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7912
84.5k
     "'<' in entity '%s' is not allowed in attributes values\n",
7913
84.5k
        name);
7914
84.5k
    }
7915
7916
    /*
7917
     * Internal check, no parameter entities here ...
7918
     */
7919
571k
    else {
7920
571k
  switch (ent->etype) {
7921
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7922
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7923
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7924
0
       "Attempt to reference the parameter entity '%s'\n",
7925
0
          name);
7926
0
      break;
7927
571k
      default:
7928
571k
      break;
7929
571k
  }
7930
571k
    }
7931
7932
    /*
7933
     * [ WFC: No Recursion ]
7934
     * A parsed entity must not contain a recursive reference
7935
     * to itself, either directly or indirectly.
7936
     * Done somewhere else
7937
     */
7938
7939
756k
    xmlFree(name);
7940
756k
    *str = ptr;
7941
756k
    return(ent);
7942
756k
}
7943
7944
/**
7945
 * xmlParsePEReference:
7946
 * @ctxt:  an XML parser context
7947
 *
7948
 * DEPRECATED: Internal function, don't use.
7949
 *
7950
 * parse PEReference declarations
7951
 * The entity content is handled directly by pushing it's content as
7952
 * a new input stream.
7953
 *
7954
 * [69] PEReference ::= '%' Name ';'
7955
 *
7956
 * [ WFC: No Recursion ]
7957
 * A parsed entity must not contain a recursive
7958
 * reference to itself, either directly or indirectly.
7959
 *
7960
 * [ WFC: Entity Declared ]
7961
 * In a document without any DTD, a document with only an internal DTD
7962
 * subset which contains no parameter entity references, or a document
7963
 * with "standalone='yes'", ...  ... The declaration of a parameter
7964
 * entity must precede any reference to it...
7965
 *
7966
 * [ VC: Entity Declared ]
7967
 * In a document with an external subset or external parameter entities
7968
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7969
 * must precede any reference to it...
7970
 *
7971
 * [ WFC: In DTD ]
7972
 * Parameter-entity references may only appear in the DTD.
7973
 * NOTE: misleading but this is handled.
7974
 */
7975
void
7976
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7977
6.50M
{
7978
6.50M
    const xmlChar *name;
7979
6.50M
    xmlEntityPtr entity = NULL;
7980
6.50M
    xmlParserInputPtr input;
7981
7982
6.50M
    if (RAW != '%')
7983
4.37M
        return;
7984
2.13M
    NEXT;
7985
2.13M
    name = xmlParseName(ctxt);
7986
2.13M
    if (name == NULL) {
7987
285k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7988
285k
  return;
7989
285k
    }
7990
1.84M
    if (xmlParserDebugEntities)
7991
0
  xmlGenericError(xmlGenericErrorContext,
7992
0
    "PEReference: %s\n", name);
7993
1.84M
    if (RAW != ';') {
7994
52.6k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7995
52.6k
        return;
7996
52.6k
    }
7997
7998
1.79M
    NEXT;
7999
8000
    /*
8001
     * Increase the number of entity references parsed
8002
     */
8003
1.79M
    ctxt->nbentities++;
8004
8005
    /*
8006
     * Request the entity from SAX
8007
     */
8008
1.79M
    if ((ctxt->sax != NULL) &&
8009
1.79M
  (ctxt->sax->getParameterEntity != NULL))
8010
1.79M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8011
1.79M
    if (ctxt->instate == XML_PARSER_EOF)
8012
0
  return;
8013
1.79M
    if (entity == NULL) {
8014
  /*
8015
   * [ WFC: Entity Declared ]
8016
   * In a document without any DTD, a document with only an
8017
   * internal DTD subset which contains no parameter entity
8018
   * references, or a document with "standalone='yes'", ...
8019
   * ... The declaration of a parameter entity must precede
8020
   * any reference to it...
8021
   */
8022
100k
  if ((ctxt->standalone == 1) ||
8023
100k
      ((ctxt->hasExternalSubset == 0) &&
8024
99.6k
       (ctxt->hasPErefs == 0))) {
8025
2.28k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8026
2.28k
            "PEReference: %%%s; not found\n",
8027
2.28k
            name);
8028
97.9k
  } else {
8029
      /*
8030
       * [ VC: Entity Declared ]
8031
       * In a document with an external subset or external
8032
       * parameter entities with "standalone='no'", ...
8033
       * ... The declaration of a parameter entity must
8034
       * precede any reference to it...
8035
       */
8036
97.9k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8037
17.0k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8038
17.0k
                                 "PEReference: %%%s; not found\n",
8039
17.0k
                                 name, NULL);
8040
17.0k
            } else
8041
80.8k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8042
80.8k
                              "PEReference: %%%s; not found\n",
8043
80.8k
                              name, NULL);
8044
97.9k
            ctxt->valid = 0;
8045
97.9k
  }
8046
100k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8047
1.69M
    } else {
8048
  /*
8049
   * Internal checking in case the entity quest barfed
8050
   */
8051
1.69M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8052
1.69M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8053
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8054
0
      "Internal: %%%s; is not a parameter entity\n",
8055
0
        name, NULL);
8056
1.69M
  } else {
8057
1.69M
            xmlChar start[4];
8058
1.69M
            xmlCharEncoding enc;
8059
8060
1.69M
      if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8061
75
          return;
8062
8063
1.69M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8064
1.69M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8065
1.69M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8066
1.69M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8067
1.69M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8068
1.69M
    (ctxt->replaceEntities == 0) &&
8069
1.69M
    (ctxt->validate == 0))
8070
72
    return;
8071
8072
1.69M
      input = xmlNewEntityInputStream(ctxt, entity);
8073
1.69M
      if (xmlPushInput(ctxt, input) < 0) {
8074
2.74k
                xmlFreeInputStream(input);
8075
2.74k
    return;
8076
2.74k
            }
8077
8078
1.69M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8079
                /*
8080
                 * Get the 4 first bytes and decode the charset
8081
                 * if enc != XML_CHAR_ENCODING_NONE
8082
                 * plug some encoding conversion routines.
8083
                 * Note that, since we may have some non-UTF8
8084
                 * encoding (like UTF16, bug 135229), the 'length'
8085
                 * is not known, but we can calculate based upon
8086
                 * the amount of data in the buffer.
8087
                 */
8088
83.4k
                GROW
8089
83.4k
                if (ctxt->instate == XML_PARSER_EOF)
8090
0
                    return;
8091
83.4k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8092
83.4k
                    start[0] = RAW;
8093
83.4k
                    start[1] = NXT(1);
8094
83.4k
                    start[2] = NXT(2);
8095
83.4k
                    start[3] = NXT(3);
8096
83.4k
                    enc = xmlDetectCharEncoding(start, 4);
8097
83.4k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8098
17.0k
                        xmlSwitchEncoding(ctxt, enc);
8099
17.0k
                    }
8100
83.4k
                }
8101
8102
83.4k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8103
83.4k
                    (IS_BLANK_CH(NXT(5)))) {
8104
16.9k
                    xmlParseTextDecl(ctxt);
8105
16.9k
                }
8106
83.4k
            }
8107
1.69M
  }
8108
1.69M
    }
8109
1.79M
    ctxt->hasPErefs = 1;
8110
1.79M
}
8111
8112
/**
8113
 * xmlLoadEntityContent:
8114
 * @ctxt:  an XML parser context
8115
 * @entity: an unloaded system entity
8116
 *
8117
 * Load the original content of the given system entity from the
8118
 * ExternalID/SystemID given. This is to be used for Included in Literal
8119
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8120
 *
8121
 * Returns 0 in case of success and -1 in case of failure
8122
 */
8123
static int
8124
16.2k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8125
16.2k
    xmlParserInputPtr input;
8126
16.2k
    xmlBufferPtr buf;
8127
16.2k
    int l, c;
8128
16.2k
    int count = 0;
8129
8130
16.2k
    if ((ctxt == NULL) || (entity == NULL) ||
8131
16.2k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8132
16.2k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8133
16.2k
  (entity->content != NULL)) {
8134
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8135
0
              "xmlLoadEntityContent parameter error");
8136
0
        return(-1);
8137
0
    }
8138
8139
16.2k
    if (xmlParserDebugEntities)
8140
0
  xmlGenericError(xmlGenericErrorContext,
8141
0
    "Reading %s entity content input\n", entity->name);
8142
8143
16.2k
    buf = xmlBufferCreate();
8144
16.2k
    if (buf == NULL) {
8145
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8146
0
              "xmlLoadEntityContent parameter error");
8147
0
        return(-1);
8148
0
    }
8149
16.2k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8150
8151
16.2k
    input = xmlNewEntityInputStream(ctxt, entity);
8152
16.2k
    if (input == NULL) {
8153
484
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8154
484
              "xmlLoadEntityContent input error");
8155
484
  xmlBufferFree(buf);
8156
484
        return(-1);
8157
484
    }
8158
8159
    /*
8160
     * Push the entity as the current input, read char by char
8161
     * saving to the buffer until the end of the entity or an error
8162
     */
8163
15.7k
    if (xmlPushInput(ctxt, input) < 0) {
8164
95
        xmlBufferFree(buf);
8165
95
  xmlFreeInputStream(input);
8166
95
  return(-1);
8167
95
    }
8168
8169
15.6k
    GROW;
8170
15.6k
    c = CUR_CHAR(l);
8171
26.5M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8172
26.5M
           (IS_CHAR(c))) {
8173
26.5M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8174
26.5M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8175
253k
      count = 0;
8176
253k
      GROW;
8177
253k
            if (ctxt->instate == XML_PARSER_EOF) {
8178
0
                xmlBufferFree(buf);
8179
0
                return(-1);
8180
0
            }
8181
253k
  }
8182
26.5M
  NEXTL(l);
8183
26.5M
  c = CUR_CHAR(l);
8184
26.5M
  if (c == 0) {
8185
14.1k
      count = 0;
8186
14.1k
      GROW;
8187
14.1k
            if (ctxt->instate == XML_PARSER_EOF) {
8188
0
                xmlBufferFree(buf);
8189
0
                return(-1);
8190
0
            }
8191
14.1k
      c = CUR_CHAR(l);
8192
14.1k
  }
8193
26.5M
    }
8194
8195
15.6k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8196
5.11k
        xmlPopInput(ctxt);
8197
10.5k
    } else if (!IS_CHAR(c)) {
8198
10.5k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8199
10.5k
                          "xmlLoadEntityContent: invalid char value %d\n",
8200
10.5k
                    c);
8201
10.5k
  xmlBufferFree(buf);
8202
10.5k
  return(-1);
8203
10.5k
    }
8204
5.11k
    entity->content = buf->content;
8205
5.11k
    buf->content = NULL;
8206
5.11k
    xmlBufferFree(buf);
8207
8208
5.11k
    return(0);
8209
15.6k
}
8210
8211
/**
8212
 * xmlParseStringPEReference:
8213
 * @ctxt:  an XML parser context
8214
 * @str:  a pointer to an index in the string
8215
 *
8216
 * parse PEReference declarations
8217
 *
8218
 * [69] PEReference ::= '%' Name ';'
8219
 *
8220
 * [ WFC: No Recursion ]
8221
 * A parsed entity must not contain a recursive
8222
 * reference to itself, either directly or indirectly.
8223
 *
8224
 * [ WFC: Entity Declared ]
8225
 * In a document without any DTD, a document with only an internal DTD
8226
 * subset which contains no parameter entity references, or a document
8227
 * with "standalone='yes'", ...  ... The declaration of a parameter
8228
 * entity must precede any reference to it...
8229
 *
8230
 * [ VC: Entity Declared ]
8231
 * In a document with an external subset or external parameter entities
8232
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8233
 * must precede any reference to it...
8234
 *
8235
 * [ WFC: In DTD ]
8236
 * Parameter-entity references may only appear in the DTD.
8237
 * NOTE: misleading but this is handled.
8238
 *
8239
 * Returns the string of the entity content.
8240
 *         str is updated to the current value of the index
8241
 */
8242
static xmlEntityPtr
8243
3.13M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8244
3.13M
    const xmlChar *ptr;
8245
3.13M
    xmlChar cur;
8246
3.13M
    xmlChar *name;
8247
3.13M
    xmlEntityPtr entity = NULL;
8248
8249
3.13M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8250
3.13M
    ptr = *str;
8251
3.13M
    cur = *ptr;
8252
3.13M
    if (cur != '%')
8253
0
        return(NULL);
8254
3.13M
    ptr++;
8255
3.13M
    name = xmlParseStringName(ctxt, &ptr);
8256
3.13M
    if (name == NULL) {
8257
1.74M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8258
1.74M
           "xmlParseStringPEReference: no name\n");
8259
1.74M
  *str = ptr;
8260
1.74M
  return(NULL);
8261
1.74M
    }
8262
1.39M
    cur = *ptr;
8263
1.39M
    if (cur != ';') {
8264
91.8k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8265
91.8k
  xmlFree(name);
8266
91.8k
  *str = ptr;
8267
91.8k
  return(NULL);
8268
91.8k
    }
8269
1.30M
    ptr++;
8270
8271
    /*
8272
     * Increase the number of entity references parsed
8273
     */
8274
1.30M
    ctxt->nbentities++;
8275
8276
    /*
8277
     * Request the entity from SAX
8278
     */
8279
1.30M
    if ((ctxt->sax != NULL) &&
8280
1.30M
  (ctxt->sax->getParameterEntity != NULL))
8281
1.30M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8282
1.30M
    if (ctxt->instate == XML_PARSER_EOF) {
8283
0
  xmlFree(name);
8284
0
  *str = ptr;
8285
0
  return(NULL);
8286
0
    }
8287
1.30M
    if (entity == NULL) {
8288
  /*
8289
   * [ WFC: Entity Declared ]
8290
   * In a document without any DTD, a document with only an
8291
   * internal DTD subset which contains no parameter entity
8292
   * references, or a document with "standalone='yes'", ...
8293
   * ... The declaration of a parameter entity must precede
8294
   * any reference to it...
8295
   */
8296
88.6k
  if ((ctxt->standalone == 1) ||
8297
88.6k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8298
361
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8299
361
     "PEReference: %%%s; not found\n", name);
8300
88.2k
  } else {
8301
      /*
8302
       * [ VC: Entity Declared ]
8303
       * In a document with an external subset or external
8304
       * parameter entities with "standalone='no'", ...
8305
       * ... The declaration of a parameter entity must
8306
       * precede any reference to it...
8307
       */
8308
88.2k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8309
88.2k
        "PEReference: %%%s; not found\n",
8310
88.2k
        name, NULL);
8311
88.2k
      ctxt->valid = 0;
8312
88.2k
  }
8313
88.6k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8314
1.21M
    } else {
8315
  /*
8316
   * Internal checking in case the entity quest barfed
8317
   */
8318
1.21M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8319
1.21M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8320
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8321
0
        "%%%s; is not a parameter entity\n",
8322
0
        name, NULL);
8323
0
  }
8324
1.21M
    }
8325
1.30M
    ctxt->hasPErefs = 1;
8326
1.30M
    xmlFree(name);
8327
1.30M
    *str = ptr;
8328
1.30M
    return(entity);
8329
1.30M
}
8330
8331
/**
8332
 * xmlParseDocTypeDecl:
8333
 * @ctxt:  an XML parser context
8334
 *
8335
 * DEPRECATED: Internal function, don't use.
8336
 *
8337
 * parse a DOCTYPE declaration
8338
 *
8339
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8340
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8341
 *
8342
 * [ VC: Root Element Type ]
8343
 * The Name in the document type declaration must match the element
8344
 * type of the root element.
8345
 */
8346
8347
void
8348
557k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8349
557k
    const xmlChar *name = NULL;
8350
557k
    xmlChar *ExternalID = NULL;
8351
557k
    xmlChar *URI = NULL;
8352
8353
    /*
8354
     * We know that '<!DOCTYPE' has been detected.
8355
     */
8356
557k
    SKIP(9);
8357
8358
557k
    SKIP_BLANKS;
8359
8360
    /*
8361
     * Parse the DOCTYPE name.
8362
     */
8363
557k
    name = xmlParseName(ctxt);
8364
557k
    if (name == NULL) {
8365
2.68k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8366
2.68k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8367
2.68k
    }
8368
557k
    ctxt->intSubName = name;
8369
8370
557k
    SKIP_BLANKS;
8371
8372
    /*
8373
     * Check for SystemID and ExternalID
8374
     */
8375
557k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8376
8377
557k
    if ((URI != NULL) || (ExternalID != NULL)) {
8378
203k
        ctxt->hasExternalSubset = 1;
8379
203k
    }
8380
557k
    ctxt->extSubURI = URI;
8381
557k
    ctxt->extSubSystem = ExternalID;
8382
8383
557k
    SKIP_BLANKS;
8384
8385
    /*
8386
     * Create and update the internal subset.
8387
     */
8388
557k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8389
557k
  (!ctxt->disableSAX))
8390
538k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8391
557k
    if (ctxt->instate == XML_PARSER_EOF)
8392
0
  return;
8393
8394
    /*
8395
     * Is there any internal subset declarations ?
8396
     * they are handled separately in xmlParseInternalSubset()
8397
     */
8398
557k
    if (RAW == '[')
8399
412k
  return;
8400
8401
    /*
8402
     * We should be at the end of the DOCTYPE declaration.
8403
     */
8404
144k
    if (RAW != '>') {
8405
24.1k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8406
24.1k
    }
8407
144k
    NEXT;
8408
144k
}
8409
8410
/**
8411
 * xmlParseInternalSubset:
8412
 * @ctxt:  an XML parser context
8413
 *
8414
 * parse the internal subset declaration
8415
 *
8416
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8417
 */
8418
8419
static void
8420
359k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8421
    /*
8422
     * Is there any DTD definition ?
8423
     */
8424
359k
    if (RAW == '[') {
8425
359k
        int baseInputNr = ctxt->inputNr;
8426
359k
        ctxt->instate = XML_PARSER_DTD;
8427
359k
        NEXT;
8428
  /*
8429
   * Parse the succession of Markup declarations and
8430
   * PEReferences.
8431
   * Subsequence (markupdecl | PEReference | S)*
8432
   */
8433
4.93M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8434
4.93M
               (ctxt->instate != XML_PARSER_EOF)) {
8435
4.67M
      int id = ctxt->input->id;
8436
4.67M
      unsigned long cons = CUR_CONSUMED;
8437
8438
4.67M
      SKIP_BLANKS;
8439
4.67M
      xmlParseMarkupDecl(ctxt);
8440
4.67M
      xmlParsePEReference(ctxt);
8441
8442
            /*
8443
             * Conditional sections are allowed from external entities included
8444
             * by PE References in the internal subset.
8445
             */
8446
4.67M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8447
4.67M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8448
0
                xmlParseConditionalSections(ctxt);
8449
0
            }
8450
8451
4.67M
      if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8452
105k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8453
105k
       "xmlParseInternalSubset: error detected in Markup declaration\n");
8454
105k
                if (ctxt->inputNr > baseInputNr)
8455
4.95k
                    xmlPopInput(ctxt);
8456
100k
                else
8457
100k
        break;
8458
105k
      }
8459
4.67M
  }
8460
359k
  if (RAW == ']') {
8461
240k
      NEXT;
8462
240k
      SKIP_BLANKS;
8463
240k
  }
8464
359k
    }
8465
8466
    /*
8467
     * We should be at the end of the DOCTYPE declaration.
8468
     */
8469
359k
    if (RAW != '>') {
8470
118k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8471
118k
  return;
8472
118k
    }
8473
240k
    NEXT;
8474
240k
}
8475
8476
#ifdef LIBXML_SAX1_ENABLED
8477
/**
8478
 * xmlParseAttribute:
8479
 * @ctxt:  an XML parser context
8480
 * @value:  a xmlChar ** used to store the value of the attribute
8481
 *
8482
 * DEPRECATED: Internal function, don't use.
8483
 *
8484
 * parse an attribute
8485
 *
8486
 * [41] Attribute ::= Name Eq AttValue
8487
 *
8488
 * [ WFC: No External Entity References ]
8489
 * Attribute values cannot contain direct or indirect entity references
8490
 * to external entities.
8491
 *
8492
 * [ WFC: No < in Attribute Values ]
8493
 * The replacement text of any entity referred to directly or indirectly in
8494
 * an attribute value (other than "&lt;") must not contain a <.
8495
 *
8496
 * [ VC: Attribute Value Type ]
8497
 * The attribute must have been declared; the value must be of the type
8498
 * declared for it.
8499
 *
8500
 * [25] Eq ::= S? '=' S?
8501
 *
8502
 * With namespace:
8503
 *
8504
 * [NS 11] Attribute ::= QName Eq AttValue
8505
 *
8506
 * Also the case QName == xmlns:??? is handled independently as a namespace
8507
 * definition.
8508
 *
8509
 * Returns the attribute name, and the value in *value.
8510
 */
8511
8512
const xmlChar *
8513
149M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8514
149M
    const xmlChar *name;
8515
149M
    xmlChar *val;
8516
8517
149M
    *value = NULL;
8518
149M
    GROW;
8519
149M
    name = xmlParseName(ctxt);
8520
149M
    if (name == NULL) {
8521
10.2M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8522
10.2M
                 "error parsing attribute name\n");
8523
10.2M
        return(NULL);
8524
10.2M
    }
8525
8526
    /*
8527
     * read the value
8528
     */
8529
139M
    SKIP_BLANKS;
8530
139M
    if (RAW == '=') {
8531
132M
        NEXT;
8532
132M
  SKIP_BLANKS;
8533
132M
  val = xmlParseAttValue(ctxt);
8534
132M
  ctxt->instate = XML_PARSER_CONTENT;
8535
132M
    } else {
8536
7.37M
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8537
7.37M
         "Specification mandates value for attribute %s\n", name);
8538
7.37M
  return(NULL);
8539
7.37M
    }
8540
8541
    /*
8542
     * Check that xml:lang conforms to the specification
8543
     * No more registered as an error, just generate a warning now
8544
     * since this was deprecated in XML second edition
8545
     */
8546
132M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8547
25.3k
  if (!xmlCheckLanguageID(val)) {
8548
10.9k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8549
10.9k
              "Malformed value for xml:lang : %s\n",
8550
10.9k
        val, NULL);
8551
10.9k
  }
8552
25.3k
    }
8553
8554
    /*
8555
     * Check that xml:space conforms to the specification
8556
     */
8557
132M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8558
1.09k
  if (xmlStrEqual(val, BAD_CAST "default"))
8559
0
      *(ctxt->space) = 0;
8560
1.09k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8561
639
      *(ctxt->space) = 1;
8562
451
  else {
8563
451
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8564
451
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8565
451
                                 val, NULL);
8566
451
  }
8567
1.09k
    }
8568
8569
132M
    *value = val;
8570
132M
    return(name);
8571
139M
}
8572
8573
/**
8574
 * xmlParseStartTag:
8575
 * @ctxt:  an XML parser context
8576
 *
8577
 * DEPRECATED: Internal function, don't use.
8578
 *
8579
 * parse a start of tag either for rule element or
8580
 * EmptyElement. In both case we don't parse the tag closing chars.
8581
 *
8582
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8583
 *
8584
 * [ WFC: Unique Att Spec ]
8585
 * No attribute name may appear more than once in the same start-tag or
8586
 * empty-element tag.
8587
 *
8588
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8589
 *
8590
 * [ WFC: Unique Att Spec ]
8591
 * No attribute name may appear more than once in the same start-tag or
8592
 * empty-element tag.
8593
 *
8594
 * With namespace:
8595
 *
8596
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8597
 *
8598
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8599
 *
8600
 * Returns the element name parsed
8601
 */
8602
8603
const xmlChar *
8604
262M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8605
262M
    const xmlChar *name;
8606
262M
    const xmlChar *attname;
8607
262M
    xmlChar *attvalue;
8608
262M
    const xmlChar **atts = ctxt->atts;
8609
262M
    int nbatts = 0;
8610
262M
    int maxatts = ctxt->maxatts;
8611
262M
    int i;
8612
8613
262M
    if (RAW != '<') return(NULL);
8614
262M
    NEXT1;
8615
8616
262M
    name = xmlParseName(ctxt);
8617
262M
    if (name == NULL) {
8618
87.3M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8619
87.3M
       "xmlParseStartTag: invalid element name\n");
8620
87.3M
        return(NULL);
8621
87.3M
    }
8622
8623
    /*
8624
     * Now parse the attributes, it ends up with the ending
8625
     *
8626
     * (S Attribute)* S?
8627
     */
8628
174M
    SKIP_BLANKS;
8629
174M
    GROW;
8630
8631
229M
    while (((RAW != '>') &&
8632
229M
     ((RAW != '/') || (NXT(1) != '>')) &&
8633
229M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8634
149M
        int id = ctxt->input->id;
8635
149M
  unsigned long cons = CUR_CONSUMED;
8636
8637
149M
  attname = xmlParseAttribute(ctxt, &attvalue);
8638
149M
        if ((attname != NULL) && (attvalue != NULL)) {
8639
      /*
8640
       * [ WFC: Unique Att Spec ]
8641
       * No attribute name may appear more than once in the same
8642
       * start-tag or empty-element tag.
8643
       */
8644
175M
      for (i = 0; i < nbatts;i += 2) {
8645
43.5M
          if (xmlStrEqual(atts[i], attname)) {
8646
49.5k
        xmlErrAttributeDup(ctxt, NULL, attname);
8647
49.5k
        xmlFree(attvalue);
8648
49.5k
        goto failed;
8649
49.5k
    }
8650
43.5M
      }
8651
      /*
8652
       * Add the pair to atts
8653
       */
8654
131M
      if (atts == NULL) {
8655
5.34M
          maxatts = 22; /* allow for 10 attrs by default */
8656
5.34M
          atts = (const xmlChar **)
8657
5.34M
           xmlMalloc(maxatts * sizeof(xmlChar *));
8658
5.34M
    if (atts == NULL) {
8659
0
        xmlErrMemory(ctxt, NULL);
8660
0
        if (attvalue != NULL)
8661
0
      xmlFree(attvalue);
8662
0
        goto failed;
8663
0
    }
8664
5.34M
    ctxt->atts = atts;
8665
5.34M
    ctxt->maxatts = maxatts;
8666
126M
      } else if (nbatts + 4 > maxatts) {
8667
1.74k
          const xmlChar **n;
8668
8669
1.74k
          maxatts *= 2;
8670
1.74k
          n = (const xmlChar **) xmlRealloc((void *) atts,
8671
1.74k
               maxatts * sizeof(const xmlChar *));
8672
1.74k
    if (n == NULL) {
8673
0
        xmlErrMemory(ctxt, NULL);
8674
0
        if (attvalue != NULL)
8675
0
      xmlFree(attvalue);
8676
0
        goto failed;
8677
0
    }
8678
1.74k
    atts = n;
8679
1.74k
    ctxt->atts = atts;
8680
1.74k
    ctxt->maxatts = maxatts;
8681
1.74k
      }
8682
131M
      atts[nbatts++] = attname;
8683
131M
      atts[nbatts++] = attvalue;
8684
131M
      atts[nbatts] = NULL;
8685
131M
      atts[nbatts + 1] = NULL;
8686
131M
  } else {
8687
18.0M
      if (attvalue != NULL)
8688
0
    xmlFree(attvalue);
8689
18.0M
  }
8690
8691
149M
failed:
8692
8693
149M
  GROW
8694
149M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8695
85.5M
      break;
8696
64.3M
  if (SKIP_BLANKS == 0) {
8697
21.0M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8698
21.0M
         "attributes construct error\n");
8699
21.0M
  }
8700
64.3M
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8701
64.3M
            (attname == NULL) && (attvalue == NULL)) {
8702
10.2M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8703
10.2M
         "xmlParseStartTag: problem parsing attributes\n");
8704
10.2M
      break;
8705
10.2M
  }
8706
54.1M
  SHRINK;
8707
54.1M
        GROW;
8708
54.1M
    }
8709
8710
    /*
8711
     * SAX: Start of Element !
8712
     */
8713
174M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8714
174M
  (!ctxt->disableSAX)) {
8715
19.3M
  if (nbatts > 0)
8716
12.5M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8717
6.74M
  else
8718
6.74M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8719
19.3M
    }
8720
8721
174M
    if (atts != NULL) {
8722
        /* Free only the content strings */
8723
305M
        for (i = 1;i < nbatts;i+=2)
8724
131M
      if (atts[i] != NULL)
8725
131M
         xmlFree((xmlChar *) atts[i]);
8726
173M
    }
8727
174M
    return(name);
8728
174M
}
8729
8730
/**
8731
 * xmlParseEndTag1:
8732
 * @ctxt:  an XML parser context
8733
 * @line:  line of the start tag
8734
 * @nsNr:  number of namespaces on the start tag
8735
 *
8736
 * parse an end of tag
8737
 *
8738
 * [42] ETag ::= '</' Name S? '>'
8739
 *
8740
 * With namespace
8741
 *
8742
 * [NS 9] ETag ::= '</' QName S? '>'
8743
 */
8744
8745
static void
8746
95.9M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8747
95.9M
    const xmlChar *name;
8748
8749
95.9M
    GROW;
8750
95.9M
    if ((RAW != '<') || (NXT(1) != '/')) {
8751
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8752
0
           "xmlParseEndTag: '</' not found\n");
8753
0
  return;
8754
0
    }
8755
95.9M
    SKIP(2);
8756
8757
95.9M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8758
8759
    /*
8760
     * We should definitely be at the ending "S? '>'" part
8761
     */
8762
95.9M
    GROW;
8763
95.9M
    SKIP_BLANKS;
8764
95.9M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8765
1.62M
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8766
1.62M
    } else
8767
94.2M
  NEXT1;
8768
8769
    /*
8770
     * [ WFC: Element Type Match ]
8771
     * The Name in an element's end-tag must match the element type in the
8772
     * start-tag.
8773
     *
8774
     */
8775
95.9M
    if (name != (xmlChar*)1) {
8776
7.91M
        if (name == NULL) name = BAD_CAST "unparsable";
8777
7.91M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8778
7.91M
         "Opening and ending tag mismatch: %s line %d and %s\n",
8779
7.91M
                    ctxt->name, line, name);
8780
7.91M
    }
8781
8782
    /*
8783
     * SAX: End of Tag
8784
     */
8785
95.9M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8786
95.9M
  (!ctxt->disableSAX))
8787
5.14M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8788
8789
95.9M
    namePop(ctxt);
8790
95.9M
    spacePop(ctxt);
8791
95.9M
    return;
8792
95.9M
}
8793
8794
/**
8795
 * xmlParseEndTag:
8796
 * @ctxt:  an XML parser context
8797
 *
8798
 * DEPRECATED: Internal function, don't use.
8799
 *
8800
 * parse an end of tag
8801
 *
8802
 * [42] ETag ::= '</' Name S? '>'
8803
 *
8804
 * With namespace
8805
 *
8806
 * [NS 9] ETag ::= '</' QName S? '>'
8807
 */
8808
8809
void
8810
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8811
0
    xmlParseEndTag1(ctxt, 0);
8812
0
}
8813
#endif /* LIBXML_SAX1_ENABLED */
8814
8815
/************************************************************************
8816
 *                  *
8817
 *          SAX 2 specific operations       *
8818
 *                  *
8819
 ************************************************************************/
8820
8821
/*
8822
 * xmlGetNamespace:
8823
 * @ctxt:  an XML parser context
8824
 * @prefix:  the prefix to lookup
8825
 *
8826
 * Lookup the namespace name for the @prefix (which ca be NULL)
8827
 * The prefix must come from the @ctxt->dict dictionary
8828
 *
8829
 * Returns the namespace name or NULL if not bound
8830
 */
8831
static const xmlChar *
8832
13.4M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8833
13.4M
    int i;
8834
8835
13.4M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8836
20.8M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8837
10.5M
        if (ctxt->nsTab[i] == prefix) {
8838
2.97M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8839
9.27k
          return(NULL);
8840
2.96M
      return(ctxt->nsTab[i + 1]);
8841
2.97M
  }
8842
10.3M
    return(NULL);
8843
13.2M
}
8844
8845
/**
8846
 * xmlParseQName:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  pointer to store the prefix part
8849
 *
8850
 * parse an XML Namespace QName
8851
 *
8852
 * [6]  QName  ::= (Prefix ':')? LocalPart
8853
 * [7]  Prefix  ::= NCName
8854
 * [8]  LocalPart  ::= NCName
8855
 *
8856
 * Returns the Name parsed or NULL
8857
 */
8858
8859
static const xmlChar *
8860
26.7M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8861
26.7M
    const xmlChar *l, *p;
8862
8863
26.7M
    GROW;
8864
8865
26.7M
    l = xmlParseNCName(ctxt);
8866
26.7M
    if (l == NULL) {
8867
2.63M
        if (CUR == ':') {
8868
8.19k
      l = xmlParseName(ctxt);
8869
8.19k
      if (l != NULL) {
8870
8.19k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8871
8.19k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8872
8.19k
    *prefix = NULL;
8873
8.19k
    return(l);
8874
8.19k
      }
8875
8.19k
  }
8876
2.62M
        return(NULL);
8877
2.63M
    }
8878
24.1M
    if (CUR == ':') {
8879
4.53M
        NEXT;
8880
4.53M
  p = l;
8881
4.53M
  l = xmlParseNCName(ctxt);
8882
4.53M
  if (l == NULL) {
8883
54.5k
      xmlChar *tmp;
8884
8885
54.5k
            if (ctxt->instate == XML_PARSER_EOF)
8886
0
                return(NULL);
8887
54.5k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8888
54.5k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8889
54.5k
      l = xmlParseNmtoken(ctxt);
8890
54.5k
      if (l == NULL) {
8891
39.5k
                if (ctxt->instate == XML_PARSER_EOF)
8892
0
                    return(NULL);
8893
39.5k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8894
39.5k
            } else {
8895
14.9k
    tmp = xmlBuildQName(l, p, NULL, 0);
8896
14.9k
    xmlFree((char *)l);
8897
14.9k
      }
8898
54.5k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8899
54.5k
      if (tmp != NULL) xmlFree(tmp);
8900
54.5k
      *prefix = NULL;
8901
54.5k
      return(p);
8902
54.5k
  }
8903
4.47M
  if (CUR == ':') {
8904
25.3k
      xmlChar *tmp;
8905
8906
25.3k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8907
25.3k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8908
25.3k
      NEXT;
8909
25.3k
      tmp = (xmlChar *) xmlParseName(ctxt);
8910
25.3k
      if (tmp != NULL) {
8911
17.0k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8912
17.0k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8913
17.0k
    if (tmp != NULL) xmlFree(tmp);
8914
17.0k
    *prefix = p;
8915
17.0k
    return(l);
8916
17.0k
      }
8917
8.36k
            if (ctxt->instate == XML_PARSER_EOF)
8918
0
                return(NULL);
8919
8.36k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8920
8.36k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8921
8.36k
      if (tmp != NULL) xmlFree(tmp);
8922
8.36k
      *prefix = p;
8923
8.36k
      return(l);
8924
8.36k
  }
8925
4.45M
  *prefix = p;
8926
4.45M
    } else
8927
19.6M
        *prefix = NULL;
8928
24.0M
    return(l);
8929
24.1M
}
8930
8931
/**
8932
 * xmlParseQNameAndCompare:
8933
 * @ctxt:  an XML parser context
8934
 * @name:  the localname
8935
 * @prefix:  the prefix, if any.
8936
 *
8937
 * parse an XML name and compares for match
8938
 * (specialized for endtag parsing)
8939
 *
8940
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8941
 * and the name for mismatch
8942
 */
8943
8944
static const xmlChar *
8945
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8946
787k
                        xmlChar const *prefix) {
8947
787k
    const xmlChar *cmp;
8948
787k
    const xmlChar *in;
8949
787k
    const xmlChar *ret;
8950
787k
    const xmlChar *prefix2;
8951
8952
787k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8953
8954
787k
    GROW;
8955
787k
    in = ctxt->input->cur;
8956
8957
787k
    cmp = prefix;
8958
2.69M
    while (*in != 0 && *in == *cmp) {
8959
1.91M
  ++in;
8960
1.91M
  ++cmp;
8961
1.91M
    }
8962
787k
    if ((*cmp == 0) && (*in == ':')) {
8963
717k
        in++;
8964
717k
  cmp = name;
8965
5.66M
  while (*in != 0 && *in == *cmp) {
8966
4.95M
      ++in;
8967
4.95M
      ++cmp;
8968
4.95M
  }
8969
717k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8970
      /* success */
8971
594k
            ctxt->input->col += in - ctxt->input->cur;
8972
594k
      ctxt->input->cur = in;
8973
594k
      return((const xmlChar*) 1);
8974
594k
  }
8975
717k
    }
8976
    /*
8977
     * all strings coms from the dictionary, equality can be done directly
8978
     */
8979
193k
    ret = xmlParseQName (ctxt, &prefix2);
8980
193k
    if ((ret == name) && (prefix == prefix2))
8981
1.77k
  return((const xmlChar*) 1);
8982
191k
    return ret;
8983
193k
}
8984
8985
/**
8986
 * xmlParseAttValueInternal:
8987
 * @ctxt:  an XML parser context
8988
 * @len:  attribute len result
8989
 * @alloc:  whether the attribute was reallocated as a new string
8990
 * @normalize:  if 1 then further non-CDATA normalization must be done
8991
 *
8992
 * parse a value for an attribute.
8993
 * NOTE: if no normalization is needed, the routine will return pointers
8994
 *       directly from the data buffer.
8995
 *
8996
 * 3.3.3 Attribute-Value Normalization:
8997
 * Before the value of an attribute is passed to the application or
8998
 * checked for validity, the XML processor must normalize it as follows:
8999
 * - a character reference is processed by appending the referenced
9000
 *   character to the attribute value
9001
 * - an entity reference is processed by recursively processing the
9002
 *   replacement text of the entity
9003
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9004
 *   appending #x20 to the normalized value, except that only a single
9005
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9006
 *   parsed entity or the literal entity value of an internal parsed entity
9007
 * - other characters are processed by appending them to the normalized value
9008
 * If the declared value is not CDATA, then the XML processor must further
9009
 * process the normalized attribute value by discarding any leading and
9010
 * trailing space (#x20) characters, and by replacing sequences of space
9011
 * (#x20) characters by a single space (#x20) character.
9012
 * All attributes for which no declaration has been read should be treated
9013
 * by a non-validating parser as if declared CDATA.
9014
 *
9015
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9016
 *     caller if it was copied, this can be detected by val[*len] == 0.
9017
 */
9018
9019
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9020
23.9k
    const xmlChar *oldbase = ctxt->input->base;\
9021
23.9k
    GROW;\
9022
23.9k
    if (ctxt->instate == XML_PARSER_EOF)\
9023
23.9k
        return(NULL);\
9024
23.9k
    if (oldbase != ctxt->input->base) {\
9025
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9026
0
        start = start + delta;\
9027
0
        in = in + delta;\
9028
0
    }\
9029
23.9k
    end = ctxt->input->end;
9030
9031
static xmlChar *
9032
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9033
                         int normalize)
9034
144M
{
9035
144M
    xmlChar limit = 0;
9036
144M
    const xmlChar *in = NULL, *start, *end, *last;
9037
144M
    xmlChar *ret = NULL;
9038
144M
    int line, col;
9039
144M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9040
120M
                    XML_MAX_HUGE_LENGTH :
9041
144M
                    XML_MAX_TEXT_LENGTH;
9042
9043
144M
    GROW;
9044
144M
    in = (xmlChar *) CUR_PTR;
9045
144M
    line = ctxt->input->line;
9046
144M
    col = ctxt->input->col;
9047
144M
    if (*in != '"' && *in != '\'') {
9048
449k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9049
449k
        return (NULL);
9050
449k
    }
9051
143M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9052
9053
    /*
9054
     * try to handle in this routine the most common case where no
9055
     * allocation of a new string is required and where content is
9056
     * pure ASCII.
9057
     */
9058
143M
    limit = *in++;
9059
143M
    col++;
9060
143M
    end = ctxt->input->end;
9061
143M
    start = in;
9062
143M
    if (in >= end) {
9063
2.95k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9064
2.95k
    }
9065
143M
    if (normalize) {
9066
        /*
9067
   * Skip any leading spaces
9068
   */
9069
848k
  while ((in < end) && (*in != limit) &&
9070
848k
         ((*in == 0x20) || (*in == 0x9) ||
9071
841k
          (*in == 0xA) || (*in == 0xD))) {
9072
243k
      if (*in == 0xA) {
9073
81.7k
          line++; col = 1;
9074
161k
      } else {
9075
161k
          col++;
9076
161k
      }
9077
243k
      in++;
9078
243k
      start = in;
9079
243k
      if (in >= end) {
9080
140
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9081
140
                if ((in - start) > maxLength) {
9082
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9083
0
                                   "AttValue length too long\n");
9084
0
                    return(NULL);
9085
0
                }
9086
140
      }
9087
243k
  }
9088
5.07M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9089
5.07M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9090
4.47M
      col++;
9091
4.47M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9092
4.46M
      if (in >= end) {
9093
318
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9094
318
                if ((in - start) > maxLength) {
9095
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9096
0
                                   "AttValue length too long\n");
9097
0
                    return(NULL);
9098
0
                }
9099
318
      }
9100
4.46M
  }
9101
604k
  last = in;
9102
  /*
9103
   * skip the trailing blanks
9104
   */
9105
617k
  while ((last[-1] == 0x20) && (last > start)) last--;
9106
769k
  while ((in < end) && (*in != limit) &&
9107
769k
         ((*in == 0x20) || (*in == 0x9) ||
9108
247k
          (*in == 0xA) || (*in == 0xD))) {
9109
164k
      if (*in == 0xA) {
9110
55.3k
          line++, col = 1;
9111
109k
      } else {
9112
109k
          col++;
9113
109k
      }
9114
164k
      in++;
9115
164k
      if (in >= end) {
9116
261
    const xmlChar *oldbase = ctxt->input->base;
9117
261
    GROW;
9118
261
                if (ctxt->instate == XML_PARSER_EOF)
9119
0
                    return(NULL);
9120
261
    if (oldbase != ctxt->input->base) {
9121
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9122
0
        start = start + delta;
9123
0
        in = in + delta;
9124
0
        last = last + delta;
9125
0
    }
9126
261
    end = ctxt->input->end;
9127
261
                if ((in - start) > maxLength) {
9128
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9129
0
                                   "AttValue length too long\n");
9130
0
                    return(NULL);
9131
0
                }
9132
261
      }
9133
164k
  }
9134
604k
        if ((in - start) > maxLength) {
9135
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9136
0
                           "AttValue length too long\n");
9137
0
            return(NULL);
9138
0
        }
9139
604k
  if (*in != limit) goto need_complex;
9140
143M
    } else {
9141
1.45G
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9142
1.45G
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9143
1.31G
      in++;
9144
1.31G
      col++;
9145
1.31G
      if (in >= end) {
9146
20.5k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9147
20.5k
                if ((in - start) > maxLength) {
9148
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9149
0
                                   "AttValue length too long\n");
9150
0
                    return(NULL);
9151
0
                }
9152
20.5k
      }
9153
1.31G
  }
9154
143M
  last = in;
9155
143M
        if ((in - start) > maxLength) {
9156
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9157
0
                           "AttValue length too long\n");
9158
0
            return(NULL);
9159
0
        }
9160
143M
  if (*in != limit) goto need_complex;
9161
143M
    }
9162
134M
    in++;
9163
134M
    col++;
9164
134M
    if (len != NULL) {
9165
10.7M
        if (alloc) *alloc = 0;
9166
10.7M
        *len = last - start;
9167
10.7M
        ret = (xmlChar *) start;
9168
123M
    } else {
9169
123M
        if (alloc) *alloc = 1;
9170
123M
        ret = xmlStrndup(start, last - start);
9171
123M
    }
9172
134M
    CUR_PTR = in;
9173
134M
    ctxt->input->line = line;
9174
134M
    ctxt->input->col = col;
9175
134M
    return ret;
9176
9.33M
need_complex:
9177
9.33M
    if (alloc) *alloc = 1;
9178
9.33M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9179
143M
}
9180
9181
/**
9182
 * xmlParseAttribute2:
9183
 * @ctxt:  an XML parser context
9184
 * @pref:  the element prefix
9185
 * @elem:  the element name
9186
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9187
 * @value:  a xmlChar ** used to store the value of the attribute
9188
 * @len:  an int * to save the length of the attribute
9189
 * @alloc:  an int * to indicate if the attribute was allocated
9190
 *
9191
 * parse an attribute in the new SAX2 framework.
9192
 *
9193
 * Returns the attribute name, and the value in *value, .
9194
 */
9195
9196
static const xmlChar *
9197
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9198
                   const xmlChar * pref, const xmlChar * elem,
9199
                   const xmlChar ** prefix, xmlChar ** value,
9200
                   int *len, int *alloc)
9201
12.0M
{
9202
12.0M
    const xmlChar *name;
9203
12.0M
    xmlChar *val, *internal_val = NULL;
9204
12.0M
    int normalize = 0;
9205
9206
12.0M
    *value = NULL;
9207
12.0M
    GROW;
9208
12.0M
    name = xmlParseQName(ctxt, prefix);
9209
12.0M
    if (name == NULL) {
9210
359k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9211
359k
                       "error parsing attribute name\n");
9212
359k
        return (NULL);
9213
359k
    }
9214
9215
    /*
9216
     * get the type if needed
9217
     */
9218
11.7M
    if (ctxt->attsSpecial != NULL) {
9219
2.36M
        int type;
9220
9221
2.36M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9222
2.36M
                                                 pref, elem, *prefix, name);
9223
2.36M
        if (type != 0)
9224
618k
            normalize = 1;
9225
2.36M
    }
9226
9227
    /*
9228
     * read the value
9229
     */
9230
11.7M
    SKIP_BLANKS;
9231
11.7M
    if (RAW == '=') {
9232
11.5M
        NEXT;
9233
11.5M
        SKIP_BLANKS;
9234
11.5M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9235
11.5M
  if (normalize) {
9236
      /*
9237
       * Sometimes a second normalisation pass for spaces is needed
9238
       * but that only happens if charrefs or entities references
9239
       * have been used in the attribute value, i.e. the attribute
9240
       * value have been extracted in an allocated string already.
9241
       */
9242
606k
      if (*alloc) {
9243
83.9k
          const xmlChar *val2;
9244
9245
83.9k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9246
83.9k
    if ((val2 != NULL) && (val2 != val)) {
9247
11.0k
        xmlFree(val);
9248
11.0k
        val = (xmlChar *) val2;
9249
11.0k
    }
9250
83.9k
      }
9251
606k
  }
9252
11.5M
        ctxt->instate = XML_PARSER_CONTENT;
9253
11.5M
    } else {
9254
202k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9255
202k
                          "Specification mandates value for attribute %s\n",
9256
202k
                          name);
9257
202k
        return (NULL);
9258
202k
    }
9259
9260
11.5M
    if (*prefix == ctxt->str_xml) {
9261
        /*
9262
         * Check that xml:lang conforms to the specification
9263
         * No more registered as an error, just generate a warning now
9264
         * since this was deprecated in XML second edition
9265
         */
9266
77.9k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9267
12.9k
            internal_val = xmlStrndup(val, *len);
9268
12.9k
            if (!xmlCheckLanguageID(internal_val)) {
9269
7.79k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9270
7.79k
                              "Malformed value for xml:lang : %s\n",
9271
7.79k
                              internal_val, NULL);
9272
7.79k
            }
9273
12.9k
        }
9274
9275
        /*
9276
         * Check that xml:space conforms to the specification
9277
         */
9278
77.9k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9279
897
            internal_val = xmlStrndup(val, *len);
9280
897
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9281
0
                *(ctxt->space) = 0;
9282
897
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9283
306
                *(ctxt->space) = 1;
9284
591
            else {
9285
591
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9286
591
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9287
591
                              internal_val, NULL);
9288
591
            }
9289
897
        }
9290
77.9k
        if (internal_val) {
9291
13.1k
            xmlFree(internal_val);
9292
13.1k
        }
9293
77.9k
    }
9294
9295
11.5M
    *value = val;
9296
11.5M
    return (name);
9297
11.7M
}
9298
/**
9299
 * xmlParseStartTag2:
9300
 * @ctxt:  an XML parser context
9301
 *
9302
 * parse a start of tag either for rule element or
9303
 * EmptyElement. In both case we don't parse the tag closing chars.
9304
 * This routine is called when running SAX2 parsing
9305
 *
9306
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9307
 *
9308
 * [ WFC: Unique Att Spec ]
9309
 * No attribute name may appear more than once in the same start-tag or
9310
 * empty-element tag.
9311
 *
9312
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9313
 *
9314
 * [ WFC: Unique Att Spec ]
9315
 * No attribute name may appear more than once in the same start-tag or
9316
 * empty-element tag.
9317
 *
9318
 * With namespace:
9319
 *
9320
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9321
 *
9322
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9323
 *
9324
 * Returns the element name parsed
9325
 */
9326
9327
static const xmlChar *
9328
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9329
14.4M
                  const xmlChar **URI, int *tlen) {
9330
14.4M
    const xmlChar *localname;
9331
14.4M
    const xmlChar *prefix;
9332
14.4M
    const xmlChar *attname;
9333
14.4M
    const xmlChar *aprefix;
9334
14.4M
    const xmlChar *nsname;
9335
14.4M
    xmlChar *attvalue;
9336
14.4M
    const xmlChar **atts = ctxt->atts;
9337
14.4M
    int maxatts = ctxt->maxatts;
9338
14.4M
    int nratts, nbatts, nbdef, inputid;
9339
14.4M
    int i, j, nbNs, attval;
9340
14.4M
    unsigned long cur;
9341
14.4M
    int nsNr = ctxt->nsNr;
9342
9343
14.4M
    if (RAW != '<') return(NULL);
9344
14.4M
    NEXT1;
9345
9346
    /*
9347
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9348
     *       point since the attribute values may be stored as pointers to
9349
     *       the buffer and calling SHRINK would destroy them !
9350
     *       The Shrinking is only possible once the full set of attribute
9351
     *       callbacks have been done.
9352
     */
9353
14.4M
    SHRINK;
9354
14.4M
    cur = ctxt->input->cur - ctxt->input->base;
9355
14.4M
    inputid = ctxt->input->id;
9356
14.4M
    nbatts = 0;
9357
14.4M
    nratts = 0;
9358
14.4M
    nbdef = 0;
9359
14.4M
    nbNs = 0;
9360
14.4M
    attval = 0;
9361
    /* Forget any namespaces added during an earlier parse of this element. */
9362
14.4M
    ctxt->nsNr = nsNr;
9363
9364
14.4M
    localname = xmlParseQName(ctxt, &prefix);
9365
14.4M
    if (localname == NULL) {
9366
2.26M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9367
2.26M
           "StartTag: invalid element name\n");
9368
2.26M
        return(NULL);
9369
2.26M
    }
9370
12.2M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9371
9372
    /*
9373
     * Now parse the attributes, it ends up with the ending
9374
     *
9375
     * (S Attribute)* S?
9376
     */
9377
12.2M
    SKIP_BLANKS;
9378
12.2M
    GROW;
9379
9380
16.4M
    while (((RAW != '>') &&
9381
16.4M
     ((RAW != '/') || (NXT(1) != '>')) &&
9382
16.4M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9383
12.0M
  int id = ctxt->input->id;
9384
12.0M
  unsigned long cons = CUR_CONSUMED;
9385
12.0M
  int len = -1, alloc = 0;
9386
9387
12.0M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9388
12.0M
                               &aprefix, &attvalue, &len, &alloc);
9389
12.0M
        if ((attname == NULL) || (attvalue == NULL))
9390
594k
            goto next_attr;
9391
11.5M
  if (len < 0) len = xmlStrlen(attvalue);
9392
9393
11.5M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9394
87.2k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9395
87.2k
            xmlURIPtr uri;
9396
9397
87.2k
            if (URL == NULL) {
9398
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9399
0
                if ((attvalue != NULL) && (alloc != 0))
9400
0
                    xmlFree(attvalue);
9401
0
                localname = NULL;
9402
0
                goto done;
9403
0
            }
9404
87.2k
            if (*URL != 0) {
9405
85.2k
                uri = xmlParseURI((const char *) URL);
9406
85.2k
                if (uri == NULL) {
9407
21.2k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9408
21.2k
                             "xmlns: '%s' is not a valid URI\n",
9409
21.2k
                                       URL, NULL, NULL);
9410
64.0k
                } else {
9411
64.0k
                    if (uri->scheme == NULL) {
9412
27.5k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9413
27.5k
                                  "xmlns: URI %s is not absolute\n",
9414
27.5k
                                  URL, NULL, NULL);
9415
27.5k
                    }
9416
64.0k
                    xmlFreeURI(uri);
9417
64.0k
                }
9418
85.2k
                if (URL == ctxt->str_xml_ns) {
9419
0
                    if (attname != ctxt->str_xml) {
9420
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9421
0
                     "xml namespace URI cannot be the default namespace\n",
9422
0
                                 NULL, NULL, NULL);
9423
0
                    }
9424
0
                    goto next_attr;
9425
0
                }
9426
85.2k
                if ((len == 29) &&
9427
85.2k
                    (xmlStrEqual(URL,
9428
726
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9429
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9430
0
                         "reuse of the xmlns namespace name is forbidden\n",
9431
0
                             NULL, NULL, NULL);
9432
0
                    goto next_attr;
9433
0
                }
9434
85.2k
            }
9435
            /*
9436
             * check that it's not a defined namespace
9437
             */
9438
103k
            for (j = 1;j <= nbNs;j++)
9439
18.3k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9440
1.90k
                    break;
9441
87.2k
            if (j <= nbNs)
9442
1.90k
                xmlErrAttributeDup(ctxt, NULL, attname);
9443
85.3k
            else
9444
85.3k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9445
9446
11.4M
        } else if (aprefix == ctxt->str_xmlns) {
9447
458k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9448
458k
            xmlURIPtr uri;
9449
9450
458k
            if (attname == ctxt->str_xml) {
9451
370
                if (URL != ctxt->str_xml_ns) {
9452
370
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9453
370
                             "xml namespace prefix mapped to wrong URI\n",
9454
370
                             NULL, NULL, NULL);
9455
370
                }
9456
                /*
9457
                 * Do not keep a namespace definition node
9458
                 */
9459
370
                goto next_attr;
9460
370
            }
9461
458k
            if (URL == ctxt->str_xml_ns) {
9462
0
                if (attname != ctxt->str_xml) {
9463
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9464
0
                             "xml namespace URI mapped to wrong prefix\n",
9465
0
                             NULL, NULL, NULL);
9466
0
                }
9467
0
                goto next_attr;
9468
0
            }
9469
458k
            if (attname == ctxt->str_xmlns) {
9470
537
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9471
537
                         "redefinition of the xmlns prefix is forbidden\n",
9472
537
                         NULL, NULL, NULL);
9473
537
                goto next_attr;
9474
537
            }
9475
457k
            if ((len == 29) &&
9476
457k
                (xmlStrEqual(URL,
9477
2.54k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9478
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9479
0
                         "reuse of the xmlns namespace name is forbidden\n",
9480
0
                         NULL, NULL, NULL);
9481
0
                goto next_attr;
9482
0
            }
9483
457k
            if ((URL == NULL) || (URL[0] == 0)) {
9484
1.61k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9485
1.61k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9486
1.61k
                              attname, NULL, NULL);
9487
1.61k
                goto next_attr;
9488
455k
            } else {
9489
455k
                uri = xmlParseURI((const char *) URL);
9490
455k
                if (uri == NULL) {
9491
50.7k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9492
50.7k
                         "xmlns:%s: '%s' is not a valid URI\n",
9493
50.7k
                                       attname, URL, NULL);
9494
405k
                } else {
9495
405k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9496
6.63k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9497
6.63k
                                  "xmlns:%s: URI %s is not absolute\n",
9498
6.63k
                                  attname, URL, NULL);
9499
6.63k
                    }
9500
405k
                    xmlFreeURI(uri);
9501
405k
                }
9502
455k
            }
9503
9504
            /*
9505
             * check that it's not a defined namespace
9506
             */
9507
515k
            for (j = 1;j <= nbNs;j++)
9508
62.8k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9509
2.88k
                    break;
9510
455k
            if (j <= nbNs)
9511
2.88k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9512
453k
            else
9513
453k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9514
9515
10.9M
        } else {
9516
            /*
9517
             * Add the pair to atts
9518
             */
9519
10.9M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9520
355k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9521
0
                    goto next_attr;
9522
0
                }
9523
355k
                maxatts = ctxt->maxatts;
9524
355k
                atts = ctxt->atts;
9525
355k
            }
9526
10.9M
            ctxt->attallocs[nratts++] = alloc;
9527
10.9M
            atts[nbatts++] = attname;
9528
10.9M
            atts[nbatts++] = aprefix;
9529
            /*
9530
             * The namespace URI field is used temporarily to point at the
9531
             * base of the current input buffer for non-alloced attributes.
9532
             * When the input buffer is reallocated, all the pointers become
9533
             * invalid, but they can be reconstructed later.
9534
             */
9535
10.9M
            if (alloc)
9536
692k
                atts[nbatts++] = NULL;
9537
10.2M
            else
9538
10.2M
                atts[nbatts++] = ctxt->input->base;
9539
10.9M
            atts[nbatts++] = attvalue;
9540
10.9M
            attvalue += len;
9541
10.9M
            atts[nbatts++] = attvalue;
9542
            /*
9543
             * tag if some deallocation is needed
9544
             */
9545
10.9M
            if (alloc != 0) attval = 1;
9546
10.9M
            attvalue = NULL; /* moved into atts */
9547
10.9M
        }
9548
9549
12.0M
next_attr:
9550
12.0M
        if ((attvalue != NULL) && (alloc != 0)) {
9551
89.4k
            xmlFree(attvalue);
9552
89.4k
            attvalue = NULL;
9553
89.4k
        }
9554
9555
12.0M
  GROW
9556
12.0M
        if (ctxt->instate == XML_PARSER_EOF)
9557
0
            break;
9558
12.0M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9559
6.88M
      break;
9560
5.21M
  if (SKIP_BLANKS == 0) {
9561
970k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9562
970k
         "attributes construct error\n");
9563
970k
      break;
9564
970k
  }
9565
4.24M
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9566
4.24M
            (attname == NULL) && (attvalue == NULL)) {
9567
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9568
0
           "xmlParseStartTag: problem parsing attributes\n");
9569
0
      break;
9570
0
  }
9571
4.24M
        GROW;
9572
4.24M
    }
9573
9574
12.2M
    if (ctxt->input->id != inputid) {
9575
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9576
0
                    "Unexpected change of input\n");
9577
0
        localname = NULL;
9578
0
        goto done;
9579
0
    }
9580
9581
    /* Reconstruct attribute value pointers. */
9582
23.1M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9583
10.9M
        if (atts[i+2] != NULL) {
9584
            /*
9585
             * Arithmetic on dangling pointers is technically undefined
9586
             * behavior, but well...
9587
             */
9588
10.2M
            ptrdiff_t offset = ctxt->input->base - atts[i+2];
9589
10.2M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9590
10.2M
            atts[i+3] += offset;  /* value */
9591
10.2M
            atts[i+4] += offset;  /* valuend */
9592
10.2M
        }
9593
10.9M
    }
9594
9595
    /*
9596
     * The attributes defaulting
9597
     */
9598
12.2M
    if (ctxt->attsDefault != NULL) {
9599
2.03M
        xmlDefAttrsPtr defaults;
9600
9601
2.03M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9602
2.03M
  if (defaults != NULL) {
9603
320k
      for (i = 0;i < defaults->nbAttrs;i++) {
9604
217k
          attname = defaults->values[5 * i];
9605
217k
    aprefix = defaults->values[5 * i + 1];
9606
9607
                /*
9608
     * special work for namespaces defaulted defs
9609
     */
9610
217k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9611
        /*
9612
         * check that it's not a defined namespace
9613
         */
9614
3.32k
        for (j = 1;j <= nbNs;j++)
9615
2.34k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9616
1.28k
          break;
9617
2.27k
              if (j <= nbNs) continue;
9618
9619
985
        nsname = xmlGetNamespace(ctxt, NULL);
9620
985
        if (nsname != defaults->values[5 * i + 2]) {
9621
868
      if (nsPush(ctxt, NULL,
9622
868
                 defaults->values[5 * i + 2]) > 0)
9623
868
          nbNs++;
9624
868
        }
9625
214k
    } else if (aprefix == ctxt->str_xmlns) {
9626
        /*
9627
         * check that it's not a defined namespace
9628
         */
9629
21.1k
        for (j = 1;j <= nbNs;j++)
9630
17.1k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9631
16.1k
          break;
9632
20.1k
              if (j <= nbNs) continue;
9633
9634
4.03k
        nsname = xmlGetNamespace(ctxt, attname);
9635
4.03k
        if (nsname != defaults->values[2]) {
9636
3.40k
      if (nsPush(ctxt, attname,
9637
3.40k
                 defaults->values[5 * i + 2]) > 0)
9638
3.27k
          nbNs++;
9639
3.40k
        }
9640
194k
    } else {
9641
        /*
9642
         * check that it's not a defined attribute
9643
         */
9644
533k
        for (j = 0;j < nbatts;j+=5) {
9645
342k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9646
3.18k
          break;
9647
342k
        }
9648
194k
        if (j < nbatts) continue;
9649
9650
191k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9651
5.03k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9652
0
                            localname = NULL;
9653
0
                            goto done;
9654
0
      }
9655
5.03k
      maxatts = ctxt->maxatts;
9656
5.03k
      atts = ctxt->atts;
9657
5.03k
        }
9658
191k
        atts[nbatts++] = attname;
9659
191k
        atts[nbatts++] = aprefix;
9660
191k
        if (aprefix == NULL)
9661
147k
      atts[nbatts++] = NULL;
9662
44.2k
        else
9663
44.2k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9664
191k
        atts[nbatts++] = defaults->values[5 * i + 2];
9665
191k
        atts[nbatts++] = defaults->values[5 * i + 3];
9666
191k
        if ((ctxt->standalone == 1) &&
9667
191k
            (defaults->values[5 * i + 4] != NULL)) {
9668
207
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9669
207
    "standalone: attribute %s on %s defaulted from external subset\n",
9670
207
                                   attname, localname);
9671
207
        }
9672
191k
        nbdef++;
9673
191k
    }
9674
217k
      }
9675
103k
  }
9676
2.03M
    }
9677
9678
    /*
9679
     * The attributes checkings
9680
     */
9681
23.3M
    for (i = 0; i < nbatts;i += 5) {
9682
        /*
9683
  * The default namespace does not apply to attribute names.
9684
  */
9685
11.1M
  if (atts[i + 1] != NULL) {
9686
1.19M
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9687
1.19M
      if (nsname == NULL) {
9688
238k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9689
238k
        "Namespace prefix %s for %s on %s is not defined\n",
9690
238k
        atts[i + 1], atts[i], localname);
9691
238k
      }
9692
1.19M
      atts[i + 2] = nsname;
9693
1.19M
  } else
9694
9.95M
      nsname = NULL;
9695
  /*
9696
   * [ WFC: Unique Att Spec ]
9697
   * No attribute name may appear more than once in the same
9698
   * start-tag or empty-element tag.
9699
   * As extended by the Namespace in XML REC.
9700
   */
9701
15.6M
        for (j = 0; j < i;j += 5) {
9702
4.55M
      if (atts[i] == atts[j]) {
9703
30.9k
          if (atts[i+1] == atts[j+1]) {
9704
9.37k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9705
9.37k
        break;
9706
9.37k
    }
9707
21.6k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9708
178
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9709
178
           "Namespaced Attribute %s in '%s' redefined\n",
9710
178
           atts[i], nsname, NULL);
9711
178
        break;
9712
178
    }
9713
21.6k
      }
9714
4.55M
  }
9715
11.1M
    }
9716
9717
12.2M
    nsname = xmlGetNamespace(ctxt, prefix);
9718
12.2M
    if ((prefix != NULL) && (nsname == NULL)) {
9719
900k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9720
900k
           "Namespace prefix %s on %s is not defined\n",
9721
900k
     prefix, localname, NULL);
9722
900k
    }
9723
12.2M
    *pref = prefix;
9724
12.2M
    *URI = nsname;
9725
9726
    /*
9727
     * SAX: Start of Element !
9728
     */
9729
12.2M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9730
12.2M
  (!ctxt->disableSAX)) {
9731
10.7M
  if (nbNs > 0)
9732
400k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9733
400k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9734
400k
        nbatts / 5, nbdef, atts);
9735
10.3M
  else
9736
10.3M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9737
10.3M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9738
10.7M
    }
9739
9740
12.2M
done:
9741
    /*
9742
     * Free up attribute allocated strings if needed
9743
     */
9744
12.2M
    if (attval != 0) {
9745
1.41M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9746
773k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9747
692k
          xmlFree((xmlChar *) atts[i]);
9748
645k
    }
9749
9750
12.2M
    return(localname);
9751
12.2M
}
9752
9753
/**
9754
 * xmlParseEndTag2:
9755
 * @ctxt:  an XML parser context
9756
 * @line:  line of the start tag
9757
 * @nsNr:  number of namespaces on the start tag
9758
 *
9759
 * parse an end of tag
9760
 *
9761
 * [42] ETag ::= '</' Name S? '>'
9762
 *
9763
 * With namespace
9764
 *
9765
 * [NS 9] ETag ::= '</' QName S? '>'
9766
 */
9767
9768
static void
9769
4.23M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9770
4.23M
    const xmlChar *name;
9771
9772
4.23M
    GROW;
9773
4.23M
    if ((RAW != '<') || (NXT(1) != '/')) {
9774
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9775
0
  return;
9776
0
    }
9777
4.23M
    SKIP(2);
9778
9779
4.23M
    if (tag->prefix == NULL)
9780
3.44M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9781
787k
    else
9782
787k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9783
9784
    /*
9785
     * We should definitely be at the ending "S? '>'" part
9786
     */
9787
4.23M
    GROW;
9788
4.23M
    if (ctxt->instate == XML_PARSER_EOF)
9789
0
        return;
9790
4.23M
    SKIP_BLANKS;
9791
4.23M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9792
117k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9793
117k
    } else
9794
4.11M
  NEXT1;
9795
9796
    /*
9797
     * [ WFC: Element Type Match ]
9798
     * The Name in an element's end-tag must match the element type in the
9799
     * start-tag.
9800
     *
9801
     */
9802
4.23M
    if (name != (xmlChar*)1) {
9803
401k
        if (name == NULL) name = BAD_CAST "unparsable";
9804
401k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9805
401k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9806
401k
                    ctxt->name, tag->line, name);
9807
401k
    }
9808
9809
    /*
9810
     * SAX: End of Tag
9811
     */
9812
4.23M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9813
4.23M
  (!ctxt->disableSAX))
9814
3.36M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9815
3.36M
                                tag->URI);
9816
9817
4.23M
    spacePop(ctxt);
9818
4.23M
    if (tag->nsNr != 0)
9819
92.8k
  nsPop(ctxt, tag->nsNr);
9820
4.23M
}
9821
9822
/**
9823
 * xmlParseCDSect:
9824
 * @ctxt:  an XML parser context
9825
 *
9826
 * DEPRECATED: Internal function, don't use.
9827
 *
9828
 * Parse escaped pure raw content.
9829
 *
9830
 * [18] CDSect ::= CDStart CData CDEnd
9831
 *
9832
 * [19] CDStart ::= '<![CDATA['
9833
 *
9834
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9835
 *
9836
 * [21] CDEnd ::= ']]>'
9837
 */
9838
void
9839
1.67M
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9840
1.67M
    xmlChar *buf = NULL;
9841
1.67M
    int len = 0;
9842
1.67M
    int size = XML_PARSER_BUFFER_SIZE;
9843
1.67M
    int r, rl;
9844
1.67M
    int s, sl;
9845
1.67M
    int cur, l;
9846
1.67M
    int count = 0;
9847
1.67M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9848
1.48M
                    XML_MAX_HUGE_LENGTH :
9849
1.67M
                    XML_MAX_TEXT_LENGTH;
9850
9851
    /* Check 2.6.0 was NXT(0) not RAW */
9852
1.67M
    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9853
1.67M
  SKIP(9);
9854
1.67M
    } else
9855
0
        return;
9856
9857
1.67M
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9858
1.67M
    r = CUR_CHAR(rl);
9859
1.67M
    if (!IS_CHAR(r)) {
9860
60.8k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9861
60.8k
  ctxt->instate = XML_PARSER_CONTENT;
9862
60.8k
        return;
9863
60.8k
    }
9864
1.61M
    NEXTL(rl);
9865
1.61M
    s = CUR_CHAR(sl);
9866
1.61M
    if (!IS_CHAR(s)) {
9867
46.9k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9868
46.9k
  ctxt->instate = XML_PARSER_CONTENT;
9869
46.9k
        return;
9870
46.9k
    }
9871
1.57M
    NEXTL(sl);
9872
1.57M
    cur = CUR_CHAR(l);
9873
1.57M
    buf = (xmlChar *) xmlMallocAtomic(size);
9874
1.57M
    if (buf == NULL) {
9875
0
  xmlErrMemory(ctxt, NULL);
9876
0
  return;
9877
0
    }
9878
687M
    while (IS_CHAR(cur) &&
9879
687M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9880
685M
  if (len + 5 >= size) {
9881
1.72M
      xmlChar *tmp;
9882
9883
1.72M
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9884
1.72M
      if (tmp == NULL) {
9885
0
          xmlFree(buf);
9886
0
    xmlErrMemory(ctxt, NULL);
9887
0
    return;
9888
0
      }
9889
1.72M
      buf = tmp;
9890
1.72M
      size *= 2;
9891
1.72M
  }
9892
685M
  COPY_BUF(rl,buf,len,r);
9893
685M
  r = s;
9894
685M
  rl = sl;
9895
685M
  s = cur;
9896
685M
  sl = l;
9897
685M
  count++;
9898
685M
  if (count > 50) {
9899
13.0M
      SHRINK;
9900
13.0M
      GROW;
9901
13.0M
            if (ctxt->instate == XML_PARSER_EOF) {
9902
0
    xmlFree(buf);
9903
0
    return;
9904
0
            }
9905
13.0M
      count = 0;
9906
13.0M
  }
9907
685M
  NEXTL(l);
9908
685M
  cur = CUR_CHAR(l);
9909
685M
        if (len > maxLength) {
9910
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9911
0
                           "CData section too big found\n");
9912
0
            xmlFree(buf);
9913
0
            return;
9914
0
        }
9915
685M
    }
9916
1.57M
    buf[len] = 0;
9917
1.57M
    ctxt->instate = XML_PARSER_CONTENT;
9918
1.57M
    if (cur != '>') {
9919
517k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9920
517k
                       "CData section not finished\n%.50s\n", buf);
9921
517k
  xmlFree(buf);
9922
517k
        return;
9923
517k
    }
9924
1.05M
    NEXTL(l);
9925
9926
    /*
9927
     * OK the buffer is to be consumed as cdata.
9928
     */
9929
1.05M
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9930
328k
  if (ctxt->sax->cdataBlock != NULL)
9931
66.1k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9932
262k
  else if (ctxt->sax->characters != NULL)
9933
262k
      ctxt->sax->characters(ctxt->userData, buf, len);
9934
328k
    }
9935
1.05M
    xmlFree(buf);
9936
1.05M
}
9937
9938
/**
9939
 * xmlParseContentInternal:
9940
 * @ctxt:  an XML parser context
9941
 *
9942
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9943
 * unexpected EOF to the caller.
9944
 */
9945
9946
static void
9947
6.19M
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9948
6.19M
    int nameNr = ctxt->nameNr;
9949
9950
6.19M
    GROW;
9951
994M
    while ((RAW != 0) &&
9952
994M
     (ctxt->instate != XML_PARSER_EOF)) {
9953
988M
        int id = ctxt->input->id;
9954
988M
  unsigned long cons = CUR_CONSUMED;
9955
988M
  const xmlChar *cur = ctxt->input->cur;
9956
9957
  /*
9958
   * First case : a Processing Instruction.
9959
   */
9960
988M
  if ((*cur == '<') && (cur[1] == '?')) {
9961
2.52M
      xmlParsePI(ctxt);
9962
2.52M
  }
9963
9964
  /*
9965
   * Second case : a CDSection
9966
   */
9967
  /* 2.6.0 test was *cur not RAW */
9968
986M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9969
1.67M
      xmlParseCDSect(ctxt);
9970
1.67M
  }
9971
9972
  /*
9973
   * Third case :  a comment
9974
   */
9975
984M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9976
984M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9977
30.1M
      xmlParseComment(ctxt);
9978
30.1M
      ctxt->instate = XML_PARSER_CONTENT;
9979
30.1M
  }
9980
9981
  /*
9982
   * Fourth case :  a sub-element.
9983
   */
9984
954M
  else if (*cur == '<') {
9985
365M
            if (NXT(1) == '/') {
9986
97.6M
                if (ctxt->nameNr <= nameNr)
9987
713k
                    break;
9988
96.8M
          xmlParseElementEnd(ctxt);
9989
267M
            } else {
9990
267M
          xmlParseElementStart(ctxt);
9991
267M
            }
9992
365M
  }
9993
9994
  /*
9995
   * Fifth case : a reference. If if has not been resolved,
9996
   *    parsing returns it's Name, create the node
9997
   */
9998
9999
589M
  else if (*cur == '&') {
10000
68.6M
      xmlParseReference(ctxt);
10001
68.6M
  }
10002
10003
  /*
10004
   * Last case, text. Note that References are handled directly.
10005
   */
10006
520M
  else {
10007
520M
      xmlParseCharData(ctxt, 0);
10008
520M
  }
10009
10010
988M
  GROW;
10011
988M
  SHRINK;
10012
10013
988M
  if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
10014
86.6k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10015
86.6k
                  "detected an error in element content\n");
10016
86.6k
      xmlHaltParser(ctxt);
10017
86.6k
            break;
10018
86.6k
  }
10019
988M
    }
10020
6.19M
}
10021
10022
/**
10023
 * xmlParseContent:
10024
 * @ctxt:  an XML parser context
10025
 *
10026
 * Parse a content sequence. Stops at EOF or '</'.
10027
 *
10028
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10029
 */
10030
10031
void
10032
5.89M
xmlParseContent(xmlParserCtxtPtr ctxt) {
10033
5.89M
    int nameNr = ctxt->nameNr;
10034
10035
5.89M
    xmlParseContentInternal(ctxt);
10036
10037
5.89M
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10038
3.00M
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10039
3.00M
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10040
3.00M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10041
3.00M
                "Premature end of data in tag %s line %d\n",
10042
3.00M
    name, line, NULL);
10043
3.00M
    }
10044
5.89M
}
10045
10046
/**
10047
 * xmlParseElement:
10048
 * @ctxt:  an XML parser context
10049
 *
10050
 * DEPRECATED: Internal function, don't use.
10051
 *
10052
 * parse an XML element
10053
 *
10054
 * [39] element ::= EmptyElemTag | STag content ETag
10055
 *
10056
 * [ WFC: Element Type Match ]
10057
 * The Name in an element's end-tag must match the element type in the
10058
 * start-tag.
10059
 *
10060
 */
10061
10062
void
10063
404k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10064
404k
    if (xmlParseElementStart(ctxt) != 0)
10065
98.5k
        return;
10066
10067
305k
    xmlParseContentInternal(ctxt);
10068
305k
    if (ctxt->instate == XML_PARSER_EOF)
10069
2.92k
  return;
10070
10071
302k
    if (CUR == 0) {
10072
184k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10073
184k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10074
184k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10075
184k
                "Premature end of data in tag %s line %d\n",
10076
184k
    name, line, NULL);
10077
184k
        return;
10078
184k
    }
10079
10080
117k
    xmlParseElementEnd(ctxt);
10081
117k
}
10082
10083
/**
10084
 * xmlParseElementStart:
10085
 * @ctxt:  an XML parser context
10086
 *
10087
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10088
 * opening tag was parsed, 1 if an empty element was parsed.
10089
 */
10090
static int
10091
268M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10092
268M
    const xmlChar *name;
10093
268M
    const xmlChar *prefix = NULL;
10094
268M
    const xmlChar *URI = NULL;
10095
268M
    xmlParserNodeInfo node_info;
10096
268M
    int line, tlen = 0;
10097
268M
    xmlNodePtr ret;
10098
268M
    int nsNr = ctxt->nsNr;
10099
10100
268M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10101
268M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10102
47
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10103
47
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10104
47
        xmlParserMaxDepth);
10105
47
  xmlHaltParser(ctxt);
10106
47
  return(-1);
10107
47
    }
10108
10109
    /* Capture start position */
10110
268M
    if (ctxt->record_info) {
10111
0
        node_info.begin_pos = ctxt->input->consumed +
10112
0
                          (CUR_PTR - ctxt->input->base);
10113
0
  node_info.begin_line = ctxt->input->line;
10114
0
    }
10115
10116
268M
    if (ctxt->spaceNr == 0)
10117
0
  spacePush(ctxt, -1);
10118
268M
    else if (*ctxt->space == -2)
10119
155M
  spacePush(ctxt, -1);
10120
112M
    else
10121
112M
  spacePush(ctxt, *ctxt->space);
10122
10123
268M
    line = ctxt->input->line;
10124
268M
#ifdef LIBXML_SAX1_ENABLED
10125
268M
    if (ctxt->sax2)
10126
10.0M
#endif /* LIBXML_SAX1_ENABLED */
10127
10.0M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10128
258M
#ifdef LIBXML_SAX1_ENABLED
10129
258M
    else
10130
258M
  name = xmlParseStartTag(ctxt);
10131
268M
#endif /* LIBXML_SAX1_ENABLED */
10132
268M
    if (ctxt->instate == XML_PARSER_EOF)
10133
9.04k
  return(-1);
10134
268M
    if (name == NULL) {
10135
89.5M
  spacePop(ctxt);
10136
89.5M
        return(-1);
10137
89.5M
    }
10138
178M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10139
178M
    ret = ctxt->node;
10140
10141
178M
#ifdef LIBXML_VALID_ENABLED
10142
    /*
10143
     * [ VC: Root Element Type ]
10144
     * The Name in the document type declaration must match the element
10145
     * type of the root element.
10146
     */
10147
178M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10148
178M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10149
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10150
178M
#endif /* LIBXML_VALID_ENABLED */
10151
10152
    /*
10153
     * Check for an Empty Element.
10154
     */
10155
178M
    if ((RAW == '/') && (NXT(1) == '>')) {
10156
55.6M
        SKIP(2);
10157
55.6M
  if (ctxt->sax2) {
10158
3.17M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10159
3.17M
    (!ctxt->disableSAX))
10160
2.72M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10161
3.17M
#ifdef LIBXML_SAX1_ENABLED
10162
52.4M
  } else {
10163
52.4M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10164
52.4M
    (!ctxt->disableSAX))
10165
7.40M
    ctxt->sax->endElement(ctxt->userData, name);
10166
52.4M
#endif /* LIBXML_SAX1_ENABLED */
10167
52.4M
  }
10168
55.6M
  namePop(ctxt);
10169
55.6M
  spacePop(ctxt);
10170
55.6M
  if (nsNr != ctxt->nsNr)
10171
61.8k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10172
55.6M
  if ( ret != NULL && ctxt->record_info ) {
10173
0
     node_info.end_pos = ctxt->input->consumed +
10174
0
            (CUR_PTR - ctxt->input->base);
10175
0
     node_info.end_line = ctxt->input->line;
10176
0
     node_info.node = ret;
10177
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10178
0
  }
10179
55.6M
  return(1);
10180
55.6M
    }
10181
123M
    if (RAW == '>') {
10182
110M
        NEXT1;
10183
110M
    } else {
10184
12.0M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10185
12.0M
         "Couldn't find end of Start Tag %s line %d\n",
10186
12.0M
                    name, line, NULL);
10187
10188
  /*
10189
   * end of parsing of this node.
10190
   */
10191
12.0M
  nodePop(ctxt);
10192
12.0M
  namePop(ctxt);
10193
12.0M
  spacePop(ctxt);
10194
12.0M
  if (nsNr != ctxt->nsNr)
10195
71.4k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10196
10197
  /*
10198
   * Capture end position and add node
10199
   */
10200
12.0M
  if ( ret != NULL && ctxt->record_info ) {
10201
0
     node_info.end_pos = ctxt->input->consumed +
10202
0
            (CUR_PTR - ctxt->input->base);
10203
0
     node_info.end_line = ctxt->input->line;
10204
0
     node_info.node = ret;
10205
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10206
0
  }
10207
12.0M
  return(-1);
10208
12.0M
    }
10209
10210
110M
    return(0);
10211
123M
}
10212
10213
/**
10214
 * xmlParseElementEnd:
10215
 * @ctxt:  an XML parser context
10216
 *
10217
 * Parse the end of an XML element.
10218
 */
10219
static void
10220
97.0M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10221
97.0M
    xmlParserNodeInfo node_info;
10222
97.0M
    xmlNodePtr ret = ctxt->node;
10223
10224
97.0M
    if (ctxt->nameNr <= 0)
10225
0
        return;
10226
10227
    /*
10228
     * parse the end of tag: '</' should be here.
10229
     */
10230
97.0M
    if (ctxt->sax2) {
10231
2.71M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10232
2.71M
  namePop(ctxt);
10233
2.71M
    }
10234
94.2M
#ifdef LIBXML_SAX1_ENABLED
10235
94.2M
    else
10236
94.2M
  xmlParseEndTag1(ctxt, 0);
10237
97.0M
#endif /* LIBXML_SAX1_ENABLED */
10238
10239
    /*
10240
     * Capture end position and add node
10241
     */
10242
97.0M
    if ( ret != NULL && ctxt->record_info ) {
10243
0
       node_info.end_pos = ctxt->input->consumed +
10244
0
                          (CUR_PTR - ctxt->input->base);
10245
0
       node_info.end_line = ctxt->input->line;
10246
0
       node_info.node = ret;
10247
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10248
0
    }
10249
97.0M
}
10250
10251
/**
10252
 * xmlParseVersionNum:
10253
 * @ctxt:  an XML parser context
10254
 *
10255
 * DEPRECATED: Internal function, don't use.
10256
 *
10257
 * parse the XML version value.
10258
 *
10259
 * [26] VersionNum ::= '1.' [0-9]+
10260
 *
10261
 * In practice allow [0-9].[0-9]+ at that level
10262
 *
10263
 * Returns the string giving the XML version number, or NULL
10264
 */
10265
xmlChar *
10266
438k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10267
438k
    xmlChar *buf = NULL;
10268
438k
    int len = 0;
10269
438k
    int size = 10;
10270
438k
    xmlChar cur;
10271
10272
438k
    buf = (xmlChar *) xmlMallocAtomic(size);
10273
438k
    if (buf == NULL) {
10274
0
  xmlErrMemory(ctxt, NULL);
10275
0
  return(NULL);
10276
0
    }
10277
438k
    cur = CUR;
10278
438k
    if (!((cur >= '0') && (cur <= '9'))) {
10279
6.01k
  xmlFree(buf);
10280
6.01k
  return(NULL);
10281
6.01k
    }
10282
432k
    buf[len++] = cur;
10283
432k
    NEXT;
10284
432k
    cur=CUR;
10285
432k
    if (cur != '.') {
10286
7.93k
  xmlFree(buf);
10287
7.93k
  return(NULL);
10288
7.93k
    }
10289
425k
    buf[len++] = cur;
10290
425k
    NEXT;
10291
425k
    cur=CUR;
10292
1.60M
    while ((cur >= '0') && (cur <= '9')) {
10293
1.17M
  if (len + 1 >= size) {
10294
1.93k
      xmlChar *tmp;
10295
10296
1.93k
      size *= 2;
10297
1.93k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10298
1.93k
      if (tmp == NULL) {
10299
0
          xmlFree(buf);
10300
0
    xmlErrMemory(ctxt, NULL);
10301
0
    return(NULL);
10302
0
      }
10303
1.93k
      buf = tmp;
10304
1.93k
  }
10305
1.17M
  buf[len++] = cur;
10306
1.17M
  NEXT;
10307
1.17M
  cur=CUR;
10308
1.17M
    }
10309
425k
    buf[len] = 0;
10310
425k
    return(buf);
10311
425k
}
10312
10313
/**
10314
 * xmlParseVersionInfo:
10315
 * @ctxt:  an XML parser context
10316
 *
10317
 * DEPRECATED: Internal function, don't use.
10318
 *
10319
 * parse the XML version.
10320
 *
10321
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10322
 *
10323
 * [25] Eq ::= S? '=' S?
10324
 *
10325
 * Returns the version string, e.g. "1.0"
10326
 */
10327
10328
xmlChar *
10329
524k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10330
524k
    xmlChar *version = NULL;
10331
10332
524k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10333
453k
  SKIP(7);
10334
453k
  SKIP_BLANKS;
10335
453k
  if (RAW != '=') {
10336
8.32k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10337
8.32k
      return(NULL);
10338
8.32k
        }
10339
444k
  NEXT;
10340
444k
  SKIP_BLANKS;
10341
444k
  if (RAW == '"') {
10342
381k
      NEXT;
10343
381k
      version = xmlParseVersionNum(ctxt);
10344
381k
      if (RAW != '"') {
10345
21.4k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10346
21.4k
      } else
10347
359k
          NEXT;
10348
381k
  } else if (RAW == '\''){
10349
57.6k
      NEXT;
10350
57.6k
      version = xmlParseVersionNum(ctxt);
10351
57.6k
      if (RAW != '\'') {
10352
2.77k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10353
2.77k
      } else
10354
54.8k
          NEXT;
10355
57.6k
  } else {
10356
5.71k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10357
5.71k
  }
10358
444k
    }
10359
516k
    return(version);
10360
524k
}
10361
10362
/**
10363
 * xmlParseEncName:
10364
 * @ctxt:  an XML parser context
10365
 *
10366
 * DEPRECATED: Internal function, don't use.
10367
 *
10368
 * parse the XML encoding name
10369
 *
10370
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10371
 *
10372
 * Returns the encoding name value or NULL
10373
 */
10374
xmlChar *
10375
229k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10376
229k
    xmlChar *buf = NULL;
10377
229k
    int len = 0;
10378
229k
    int size = 10;
10379
229k
    xmlChar cur;
10380
10381
229k
    cur = CUR;
10382
229k
    if (((cur >= 'a') && (cur <= 'z')) ||
10383
229k
        ((cur >= 'A') && (cur <= 'Z'))) {
10384
228k
  buf = (xmlChar *) xmlMallocAtomic(size);
10385
228k
  if (buf == NULL) {
10386
0
      xmlErrMemory(ctxt, NULL);
10387
0
      return(NULL);
10388
0
  }
10389
10390
228k
  buf[len++] = cur;
10391
228k
  NEXT;
10392
228k
  cur = CUR;
10393
3.39M
  while (((cur >= 'a') && (cur <= 'z')) ||
10394
3.39M
         ((cur >= 'A') && (cur <= 'Z')) ||
10395
3.39M
         ((cur >= '0') && (cur <= '9')) ||
10396
3.39M
         (cur == '.') || (cur == '_') ||
10397
3.39M
         (cur == '-')) {
10398
3.16M
      if (len + 1 >= size) {
10399
81.6k
          xmlChar *tmp;
10400
10401
81.6k
    size *= 2;
10402
81.6k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10403
81.6k
    if (tmp == NULL) {
10404
0
        xmlErrMemory(ctxt, NULL);
10405
0
        xmlFree(buf);
10406
0
        return(NULL);
10407
0
    }
10408
81.6k
    buf = tmp;
10409
81.6k
      }
10410
3.16M
      buf[len++] = cur;
10411
3.16M
      NEXT;
10412
3.16M
      cur = CUR;
10413
3.16M
      if (cur == 0) {
10414
1.06k
          SHRINK;
10415
1.06k
    GROW;
10416
1.06k
    cur = CUR;
10417
1.06k
      }
10418
3.16M
        }
10419
228k
  buf[len] = 0;
10420
228k
    } else {
10421
1.37k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10422
1.37k
    }
10423
229k
    return(buf);
10424
229k
}
10425
10426
/**
10427
 * xmlParseEncodingDecl:
10428
 * @ctxt:  an XML parser context
10429
 *
10430
 * DEPRECATED: Internal function, don't use.
10431
 *
10432
 * parse the XML encoding declaration
10433
 *
10434
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10435
 *
10436
 * this setups the conversion filters.
10437
 *
10438
 * Returns the encoding value or NULL
10439
 */
10440
10441
const xmlChar *
10442
406k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10443
406k
    xmlChar *encoding = NULL;
10444
10445
406k
    SKIP_BLANKS;
10446
406k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10447
237k
  SKIP(8);
10448
237k
  SKIP_BLANKS;
10449
237k
  if (RAW != '=') {
10450
2.37k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10451
2.37k
      return(NULL);
10452
2.37k
        }
10453
235k
  NEXT;
10454
235k
  SKIP_BLANKS;
10455
235k
  if (RAW == '"') {
10456
189k
      NEXT;
10457
189k
      encoding = xmlParseEncName(ctxt);
10458
189k
      if (RAW != '"') {
10459
8.43k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10460
8.43k
    xmlFree((xmlChar *) encoding);
10461
8.43k
    return(NULL);
10462
8.43k
      } else
10463
181k
          NEXT;
10464
189k
  } else if (RAW == '\''){
10465
39.8k
      NEXT;
10466
39.8k
      encoding = xmlParseEncName(ctxt);
10467
39.8k
      if (RAW != '\'') {
10468
1.74k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10469
1.74k
    xmlFree((xmlChar *) encoding);
10470
1.74k
    return(NULL);
10471
1.74k
      } else
10472
38.0k
          NEXT;
10473
39.8k
  } else {
10474
5.35k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10475
5.35k
  }
10476
10477
        /*
10478
         * Non standard parsing, allowing the user to ignore encoding
10479
         */
10480
224k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10481
37.6k
      xmlFree((xmlChar *) encoding);
10482
37.6k
            return(NULL);
10483
37.6k
  }
10484
10485
  /*
10486
   * UTF-16 encoding switch has already taken place at this stage,
10487
   * more over the little-endian/big-endian selection is already done
10488
   */
10489
187k
        if ((encoding != NULL) &&
10490
187k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10491
185k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10492
      /*
10493
       * If no encoding was passed to the parser, that we are
10494
       * using UTF-16 and no decoder is present i.e. the
10495
       * document is apparently UTF-8 compatible, then raise an
10496
       * encoding mismatch fatal error
10497
       */
10498
3.35k
      if ((ctxt->encoding == NULL) &&
10499
3.35k
          (ctxt->input->buf != NULL) &&
10500
3.35k
          (ctxt->input->buf->encoder == NULL)) {
10501
3.35k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10502
3.35k
      "Document labelled UTF-16 but has UTF-8 content\n");
10503
3.35k
      }
10504
3.35k
      if (ctxt->encoding != NULL)
10505
0
    xmlFree((xmlChar *) ctxt->encoding);
10506
3.35k
      ctxt->encoding = encoding;
10507
3.35k
  }
10508
  /*
10509
   * UTF-8 encoding is handled natively
10510
   */
10511
183k
        else if ((encoding != NULL) &&
10512
183k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10513
182k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10514
104k
      if (ctxt->encoding != NULL)
10515
10
    xmlFree((xmlChar *) ctxt->encoding);
10516
104k
      ctxt->encoding = encoding;
10517
104k
  }
10518
79.3k
  else if (encoding != NULL) {
10519
77.6k
      xmlCharEncodingHandlerPtr handler;
10520
10521
77.6k
      if (ctxt->input->encoding != NULL)
10522
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10523
77.6k
      ctxt->input->encoding = encoding;
10524
10525
77.6k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10526
77.6k
      if (handler != NULL) {
10527
75.7k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10528
        /* failed to convert */
10529
162
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10530
162
        return(NULL);
10531
162
    }
10532
75.7k
      } else {
10533
1.92k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10534
1.92k
      "Unsupported encoding %s\n", encoding);
10535
1.92k
    return(NULL);
10536
1.92k
      }
10537
77.6k
  }
10538
187k
    }
10539
354k
    return(encoding);
10540
406k
}
10541
10542
/**
10543
 * xmlParseSDDecl:
10544
 * @ctxt:  an XML parser context
10545
 *
10546
 * DEPRECATED: Internal function, don't use.
10547
 *
10548
 * parse the XML standalone declaration
10549
 *
10550
 * [32] SDDecl ::= S 'standalone' Eq
10551
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10552
 *
10553
 * [ VC: Standalone Document Declaration ]
10554
 * TODO The standalone document declaration must have the value "no"
10555
 * if any external markup declarations contain declarations of:
10556
 *  - attributes with default values, if elements to which these
10557
 *    attributes apply appear in the document without specifications
10558
 *    of values for these attributes, or
10559
 *  - entities (other than amp, lt, gt, apos, quot), if references
10560
 *    to those entities appear in the document, or
10561
 *  - attributes with values subject to normalization, where the
10562
 *    attribute appears in the document with a value which will change
10563
 *    as a result of normalization, or
10564
 *  - element types with element content, if white space occurs directly
10565
 *    within any instance of those types.
10566
 *
10567
 * Returns:
10568
 *   1 if standalone="yes"
10569
 *   0 if standalone="no"
10570
 *  -2 if standalone attribute is missing or invalid
10571
 *    (A standalone value of -2 means that the XML declaration was found,
10572
 *     but no value was specified for the standalone attribute).
10573
 */
10574
10575
int
10576
304k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10577
304k
    int standalone = -2;
10578
10579
304k
    SKIP_BLANKS;
10580
304k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10581
43.5k
  SKIP(10);
10582
43.5k
        SKIP_BLANKS;
10583
43.5k
  if (RAW != '=') {
10584
392
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10585
392
      return(standalone);
10586
392
        }
10587
43.1k
  NEXT;
10588
43.1k
  SKIP_BLANKS;
10589
43.1k
        if (RAW == '\''){
10590
25.1k
      NEXT;
10591
25.1k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10592
19.2k
          standalone = 0;
10593
19.2k
                SKIP(2);
10594
19.2k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10595
5.82k
                 (NXT(2) == 's')) {
10596
5.20k
          standalone = 1;
10597
5.20k
    SKIP(3);
10598
5.20k
            } else {
10599
619
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10600
619
      }
10601
25.1k
      if (RAW != '\'') {
10602
934
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10603
934
      } else
10604
24.1k
          NEXT;
10605
25.1k
  } else if (RAW == '"'){
10606
17.7k
      NEXT;
10607
17.7k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10608
8.37k
          standalone = 0;
10609
8.37k
    SKIP(2);
10610
9.38k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10611
9.38k
                 (NXT(2) == 's')) {
10612
8.62k
          standalone = 1;
10613
8.62k
                SKIP(3);
10614
8.62k
            } else {
10615
763
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10616
763
      }
10617
17.7k
      if (RAW != '"') {
10618
1.06k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10619
1.06k
      } else
10620
16.6k
          NEXT;
10621
17.7k
  } else {
10622
298
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10623
298
        }
10624
43.1k
    }
10625
304k
    return(standalone);
10626
304k
}
10627
10628
/**
10629
 * xmlParseXMLDecl:
10630
 * @ctxt:  an XML parser context
10631
 *
10632
 * DEPRECATED: Internal function, don't use.
10633
 *
10634
 * parse an XML declaration header
10635
 *
10636
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10637
 */
10638
10639
void
10640
466k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10641
466k
    xmlChar *version;
10642
10643
    /*
10644
     * This value for standalone indicates that the document has an
10645
     * XML declaration but it does not have a standalone attribute.
10646
     * It will be overwritten later if a standalone attribute is found.
10647
     */
10648
466k
    ctxt->input->standalone = -2;
10649
10650
    /*
10651
     * We know that '<?xml' is here.
10652
     */
10653
466k
    SKIP(5);
10654
10655
466k
    if (!IS_BLANK_CH(RAW)) {
10656
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10657
0
                 "Blank needed after '<?xml'\n");
10658
0
    }
10659
466k
    SKIP_BLANKS;
10660
10661
    /*
10662
     * We must have the VersionInfo here.
10663
     */
10664
466k
    version = xmlParseVersionInfo(ctxt);
10665
466k
    if (version == NULL) {
10666
87.9k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10667
378k
    } else {
10668
378k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10669
      /*
10670
       * Changed here for XML-1.0 5th edition
10671
       */
10672
8.13k
      if (ctxt->options & XML_PARSE_OLD10) {
10673
2.33k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10674
2.33k
                "Unsupported version '%s'\n",
10675
2.33k
                version);
10676
5.80k
      } else {
10677
5.80k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10678
5.14k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10679
5.14k
                      "Unsupported version '%s'\n",
10680
5.14k
          version, NULL);
10681
5.14k
    } else {
10682
656
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10683
656
              "Unsupported version '%s'\n",
10684
656
              version);
10685
656
    }
10686
5.80k
      }
10687
8.13k
  }
10688
378k
  if (ctxt->version != NULL)
10689
0
      xmlFree((void *) ctxt->version);
10690
378k
  ctxt->version = version;
10691
378k
    }
10692
10693
    /*
10694
     * We may have the encoding declaration
10695
     */
10696
466k
    if (!IS_BLANK_CH(RAW)) {
10697
217k
        if ((RAW == '?') && (NXT(1) == '>')) {
10698
117k
      SKIP(2);
10699
117k
      return;
10700
117k
  }
10701
99.6k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10702
99.6k
    }
10703
349k
    xmlParseEncodingDecl(ctxt);
10704
349k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10705
349k
         (ctxt->instate == XML_PARSER_EOF)) {
10706
  /*
10707
   * The XML REC instructs us to stop parsing right here
10708
   */
10709
1.84k
        return;
10710
1.84k
    }
10711
10712
    /*
10713
     * We may have the standalone status.
10714
     */
10715
347k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10716
46.2k
        if ((RAW == '?') && (NXT(1) == '>')) {
10717
42.5k
      SKIP(2);
10718
42.5k
      return;
10719
42.5k
  }
10720
3.73k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10721
3.73k
    }
10722
10723
    /*
10724
     * We can grow the input buffer freely at that point
10725
     */
10726
304k
    GROW;
10727
10728
304k
    SKIP_BLANKS;
10729
304k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10730
10731
304k
    SKIP_BLANKS;
10732
304k
    if ((RAW == '?') && (NXT(1) == '>')) {
10733
156k
        SKIP(2);
10734
156k
    } else if (RAW == '>') {
10735
        /* Deprecated old WD ... */
10736
1.04k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10737
1.04k
  NEXT;
10738
147k
    } else {
10739
147k
        int c;
10740
10741
147k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10742
6.29M
        while ((c = CUR) != 0) {
10743
6.28M
            NEXT;
10744
6.28M
            if (c == '>')
10745
132k
                break;
10746
6.28M
        }
10747
147k
    }
10748
304k
}
10749
10750
/**
10751
 * xmlParseMisc:
10752
 * @ctxt:  an XML parser context
10753
 *
10754
 * DEPRECATED: Internal function, don't use.
10755
 *
10756
 * parse an XML Misc* optional field.
10757
 *
10758
 * [27] Misc ::= Comment | PI |  S
10759
 */
10760
10761
void
10762
1.12M
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10763
1.22M
    while (ctxt->instate != XML_PARSER_EOF) {
10764
1.22M
        SKIP_BLANKS;
10765
1.22M
        GROW;
10766
1.22M
        if ((RAW == '<') && (NXT(1) == '?')) {
10767
57.8k
      xmlParsePI(ctxt);
10768
1.16M
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10769
39.3k
      xmlParseComment(ctxt);
10770
1.12M
        } else {
10771
1.12M
            break;
10772
1.12M
        }
10773
1.22M
    }
10774
1.12M
}
10775
10776
/**
10777
 * xmlParseDocument:
10778
 * @ctxt:  an XML parser context
10779
 *
10780
 * parse an XML document (and build a tree if using the standard SAX
10781
 * interface).
10782
 *
10783
 * [1] document ::= prolog element Misc*
10784
 *
10785
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10786
 *
10787
 * Returns 0, -1 in case of error. the parser context is augmented
10788
 *                as a result of the parsing.
10789
 */
10790
10791
int
10792
526k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10793
526k
    xmlChar start[4];
10794
526k
    xmlCharEncoding enc;
10795
10796
526k
    xmlInitParser();
10797
10798
526k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10799
0
        return(-1);
10800
10801
526k
    GROW;
10802
10803
    /*
10804
     * SAX: detecting the level.
10805
     */
10806
526k
    xmlDetectSAX2(ctxt);
10807
10808
    /*
10809
     * SAX: beginning of the document processing.
10810
     */
10811
526k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10812
526k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10813
526k
    if (ctxt->instate == XML_PARSER_EOF)
10814
0
  return(-1);
10815
10816
526k
    if ((ctxt->encoding == NULL) &&
10817
526k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10818
  /*
10819
   * Get the 4 first bytes and decode the charset
10820
   * if enc != XML_CHAR_ENCODING_NONE
10821
   * plug some encoding conversion routines.
10822
   */
10823
520k
  start[0] = RAW;
10824
520k
  start[1] = NXT(1);
10825
520k
  start[2] = NXT(2);
10826
520k
  start[3] = NXT(3);
10827
520k
  enc = xmlDetectCharEncoding(&start[0], 4);
10828
520k
  if (enc != XML_CHAR_ENCODING_NONE) {
10829
177k
      xmlSwitchEncoding(ctxt, enc);
10830
177k
  }
10831
520k
    }
10832
10833
10834
526k
    if (CUR == 0) {
10835
3.49k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10836
3.49k
  return(-1);
10837
3.49k
    }
10838
10839
    /*
10840
     * Check for the XMLDecl in the Prolog.
10841
     * do not GROW here to avoid the detected encoder to decode more
10842
     * than just the first line, unless the amount of data is really
10843
     * too small to hold "<?xml version="1.0" encoding="foo"
10844
     */
10845
522k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10846
22.5k
       GROW;
10847
22.5k
    }
10848
522k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10849
10850
  /*
10851
   * Note that we will switch encoding on the fly.
10852
   */
10853
160k
  xmlParseXMLDecl(ctxt);
10854
160k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10855
160k
      (ctxt->instate == XML_PARSER_EOF)) {
10856
      /*
10857
       * The XML REC instructs us to stop parsing right here
10858
       */
10859
690
      return(-1);
10860
690
  }
10861
159k
  ctxt->standalone = ctxt->input->standalone;
10862
159k
  SKIP_BLANKS;
10863
362k
    } else {
10864
362k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10865
362k
    }
10866
522k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10867
496k
        ctxt->sax->startDocument(ctxt->userData);
10868
522k
    if (ctxt->instate == XML_PARSER_EOF)
10869
0
  return(-1);
10870
522k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10871
522k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10872
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10873
0
    }
10874
10875
    /*
10876
     * The Misc part of the Prolog
10877
     */
10878
522k
    xmlParseMisc(ctxt);
10879
10880
    /*
10881
     * Then possibly doc type declaration(s) and more Misc
10882
     * (doctypedecl Misc*)?
10883
     */
10884
522k
    GROW;
10885
522k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10886
10887
217k
  ctxt->inSubset = 1;
10888
217k
  xmlParseDocTypeDecl(ctxt);
10889
217k
  if (RAW == '[') {
10890
165k
      ctxt->instate = XML_PARSER_DTD;
10891
165k
      xmlParseInternalSubset(ctxt);
10892
165k
      if (ctxt->instate == XML_PARSER_EOF)
10893
11.8k
    return(-1);
10894
165k
  }
10895
10896
  /*
10897
   * Create and update the external subset.
10898
   */
10899
205k
  ctxt->inSubset = 2;
10900
205k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10901
205k
      (!ctxt->disableSAX))
10902
161k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10903
161k
                                ctxt->extSubSystem, ctxt->extSubURI);
10904
205k
  if (ctxt->instate == XML_PARSER_EOF)
10905
4.48k
      return(-1);
10906
201k
  ctxt->inSubset = 0;
10907
10908
201k
        xmlCleanSpecialAttr(ctxt);
10909
10910
201k
  ctxt->instate = XML_PARSER_PROLOG;
10911
201k
  xmlParseMisc(ctxt);
10912
201k
    }
10913
10914
    /*
10915
     * Time to start parsing the tree itself
10916
     */
10917
505k
    GROW;
10918
505k
    if (RAW != '<') {
10919
101k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10920
101k
           "Start tag expected, '<' not found\n");
10921
404k
    } else {
10922
404k
  ctxt->instate = XML_PARSER_CONTENT;
10923
404k
  xmlParseElement(ctxt);
10924
404k
  ctxt->instate = XML_PARSER_EPILOG;
10925
10926
10927
  /*
10928
   * The Misc part at the end
10929
   */
10930
404k
  xmlParseMisc(ctxt);
10931
10932
404k
  if (RAW != 0) {
10933
126k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10934
126k
  }
10935
404k
  ctxt->instate = XML_PARSER_EOF;
10936
404k
    }
10937
10938
    /*
10939
     * SAX: end of the document processing.
10940
     */
10941
505k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10942
505k
        ctxt->sax->endDocument(ctxt->userData);
10943
10944
    /*
10945
     * Remove locally kept entity definitions if the tree was not built
10946
     */
10947
505k
    if ((ctxt->myDoc != NULL) &&
10948
505k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10949
2.10k
  xmlFreeDoc(ctxt->myDoc);
10950
2.10k
  ctxt->myDoc = NULL;
10951
2.10k
    }
10952
10953
505k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10954
35.7k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10955
35.7k
  if (ctxt->valid)
10956
20.5k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10957
35.7k
  if (ctxt->nsWellFormed)
10958
34.0k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10959
35.7k
  if (ctxt->options & XML_PARSE_OLD10)
10960
10.9k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10961
35.7k
    }
10962
505k
    if (! ctxt->wellFormed) {
10963
470k
  ctxt->valid = 0;
10964
470k
  return(-1);
10965
470k
    }
10966
35.7k
    return(0);
10967
505k
}
10968
10969
/**
10970
 * xmlParseExtParsedEnt:
10971
 * @ctxt:  an XML parser context
10972
 *
10973
 * parse a general parsed entity
10974
 * An external general parsed entity is well-formed if it matches the
10975
 * production labeled extParsedEnt.
10976
 *
10977
 * [78] extParsedEnt ::= TextDecl? content
10978
 *
10979
 * Returns 0, -1 in case of error. the parser context is augmented
10980
 *                as a result of the parsing.
10981
 */
10982
10983
int
10984
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10985
0
    xmlChar start[4];
10986
0
    xmlCharEncoding enc;
10987
10988
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10989
0
        return(-1);
10990
10991
0
    xmlDetectSAX2(ctxt);
10992
10993
0
    GROW;
10994
10995
    /*
10996
     * SAX: beginning of the document processing.
10997
     */
10998
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10999
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11000
11001
    /*
11002
     * Get the 4 first bytes and decode the charset
11003
     * if enc != XML_CHAR_ENCODING_NONE
11004
     * plug some encoding conversion routines.
11005
     */
11006
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11007
0
  start[0] = RAW;
11008
0
  start[1] = NXT(1);
11009
0
  start[2] = NXT(2);
11010
0
  start[3] = NXT(3);
11011
0
  enc = xmlDetectCharEncoding(start, 4);
11012
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11013
0
      xmlSwitchEncoding(ctxt, enc);
11014
0
  }
11015
0
    }
11016
11017
11018
0
    if (CUR == 0) {
11019
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11020
0
    }
11021
11022
    /*
11023
     * Check for the XMLDecl in the Prolog.
11024
     */
11025
0
    GROW;
11026
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11027
11028
  /*
11029
   * Note that we will switch encoding on the fly.
11030
   */
11031
0
  xmlParseXMLDecl(ctxt);
11032
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11033
      /*
11034
       * The XML REC instructs us to stop parsing right here
11035
       */
11036
0
      return(-1);
11037
0
  }
11038
0
  SKIP_BLANKS;
11039
0
    } else {
11040
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11041
0
    }
11042
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11043
0
        ctxt->sax->startDocument(ctxt->userData);
11044
0
    if (ctxt->instate == XML_PARSER_EOF)
11045
0
  return(-1);
11046
11047
    /*
11048
     * Doing validity checking on chunk doesn't make sense
11049
     */
11050
0
    ctxt->instate = XML_PARSER_CONTENT;
11051
0
    ctxt->validate = 0;
11052
0
    ctxt->loadsubset = 0;
11053
0
    ctxt->depth = 0;
11054
11055
0
    xmlParseContent(ctxt);
11056
0
    if (ctxt->instate == XML_PARSER_EOF)
11057
0
  return(-1);
11058
11059
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11060
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11061
0
    } else if (RAW != 0) {
11062
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11063
0
    }
11064
11065
    /*
11066
     * SAX: end of the document processing.
11067
     */
11068
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11069
0
        ctxt->sax->endDocument(ctxt->userData);
11070
11071
0
    if (! ctxt->wellFormed) return(-1);
11072
0
    return(0);
11073
0
}
11074
11075
#ifdef LIBXML_PUSH_ENABLED
11076
/************************************************************************
11077
 *                  *
11078
 *    Progressive parsing interfaces        *
11079
 *                  *
11080
 ************************************************************************/
11081
11082
/**
11083
 * xmlParseLookupSequence:
11084
 * @ctxt:  an XML parser context
11085
 * @first:  the first char to lookup
11086
 * @next:  the next char to lookup or zero
11087
 * @third:  the next char to lookup or zero
11088
 *
11089
 * Try to find if a sequence (first, next, third) or  just (first next) or
11090
 * (first) is available in the input stream.
11091
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11092
 * to avoid rescanning sequences of bytes, it DOES change the state of the
11093
 * parser, do not use liberally.
11094
 *
11095
 * Returns the index to the current parsing point if the full sequence
11096
 *      is available, -1 otherwise.
11097
 */
11098
static int
11099
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11100
12.2M
                       xmlChar next, xmlChar third) {
11101
12.2M
    int base, len;
11102
12.2M
    xmlParserInputPtr in;
11103
12.2M
    const xmlChar *buf;
11104
11105
12.2M
    in = ctxt->input;
11106
12.2M
    if (in == NULL) return(-1);
11107
12.2M
    base = in->cur - in->base;
11108
12.2M
    if (base < 0) return(-1);
11109
12.2M
    if (ctxt->checkIndex > base)
11110
1.59M
        base = ctxt->checkIndex;
11111
12.2M
    if (in->buf == NULL) {
11112
0
  buf = in->base;
11113
0
  len = in->length;
11114
12.2M
    } else {
11115
12.2M
  buf = xmlBufContent(in->buf->buffer);
11116
12.2M
  len = xmlBufUse(in->buf->buffer);
11117
12.2M
    }
11118
    /* take into account the sequence length */
11119
12.2M
    if (third) len -= 2;
11120
10.3M
    else if (next) len --;
11121
237G
    for (;base < len;base++) {
11122
237G
        if (buf[base] == first) {
11123
15.3M
      if (third != 0) {
11124
5.51M
    if ((buf[base + 1] != next) ||
11125
5.51M
        (buf[base + 2] != third)) continue;
11126
9.86M
      } else if (next != 0) {
11127
1.25M
    if (buf[base + 1] != next) continue;
11128
1.25M
      }
11129
9.76M
      ctxt->checkIndex = 0;
11130
#ifdef DEBUG_PUSH
11131
      if (next == 0)
11132
    xmlGenericError(xmlGenericErrorContext,
11133
      "PP: lookup '%c' found at %d\n",
11134
      first, base);
11135
      else if (third == 0)
11136
    xmlGenericError(xmlGenericErrorContext,
11137
      "PP: lookup '%c%c' found at %d\n",
11138
      first, next, base);
11139
      else
11140
    xmlGenericError(xmlGenericErrorContext,
11141
      "PP: lookup '%c%c%c' found at %d\n",
11142
      first, next, third, base);
11143
#endif
11144
9.76M
      return(base - (in->cur - in->base));
11145
15.3M
  }
11146
237G
    }
11147
2.51M
    ctxt->checkIndex = base;
11148
#ifdef DEBUG_PUSH
11149
    if (next == 0)
11150
  xmlGenericError(xmlGenericErrorContext,
11151
    "PP: lookup '%c' failed\n", first);
11152
    else if (third == 0)
11153
  xmlGenericError(xmlGenericErrorContext,
11154
    "PP: lookup '%c%c' failed\n", first, next);
11155
    else
11156
  xmlGenericError(xmlGenericErrorContext,
11157
    "PP: lookup '%c%c%c' failed\n", first, next, third);
11158
#endif
11159
2.51M
    return(-1);
11160
12.2M
}
11161
11162
/**
11163
 * xmlParseGetLasts:
11164
 * @ctxt:  an XML parser context
11165
 * @lastlt:  pointer to store the last '<' from the input
11166
 * @lastgt:  pointer to store the last '>' from the input
11167
 *
11168
 * Lookup the last < and > in the current chunk
11169
 */
11170
static void
11171
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11172
11.5M
                 const xmlChar **lastgt) {
11173
11.5M
    const xmlChar *tmp;
11174
11175
11.5M
    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11176
0
  xmlGenericError(xmlGenericErrorContext,
11177
0
        "Internal error: xmlParseGetLasts\n");
11178
0
  return;
11179
0
    }
11180
11.5M
    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11181
5.31M
        tmp = ctxt->input->end;
11182
5.31M
  tmp--;
11183
1.52G
  while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11184
5.31M
  if (tmp < ctxt->input->base) {
11185
126k
      *lastlt = NULL;
11186
126k
      *lastgt = NULL;
11187
5.18M
  } else {
11188
5.18M
      *lastlt = tmp;
11189
5.18M
      tmp++;
11190
264M
      while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11191
259M
          if (*tmp == '\'') {
11192
879k
        tmp++;
11193
75.2M
        while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11194
879k
        if (tmp < ctxt->input->end) tmp++;
11195
258M
    } else if (*tmp == '"') {
11196
3.32M
        tmp++;
11197
165M
        while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11198
3.32M
        if (tmp < ctxt->input->end) tmp++;
11199
3.32M
    } else
11200
255M
        tmp++;
11201
259M
      }
11202
5.18M
      if (tmp < ctxt->input->end)
11203
2.28M
          *lastgt = tmp;
11204
2.90M
      else {
11205
2.90M
          tmp = *lastlt;
11206
2.90M
    tmp--;
11207
75.9M
    while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11208
2.90M
    if (tmp >= ctxt->input->base)
11209
2.85M
        *lastgt = tmp;
11210
53.7k
    else
11211
53.7k
        *lastgt = NULL;
11212
2.90M
      }
11213
5.18M
  }
11214
6.24M
    } else {
11215
6.24M
        *lastlt = NULL;
11216
6.24M
  *lastgt = NULL;
11217
6.24M
    }
11218
11.5M
}
11219
/**
11220
 * xmlCheckCdataPush:
11221
 * @cur: pointer to the block of characters
11222
 * @len: length of the block in bytes
11223
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11224
 *
11225
 * Check that the block of characters is okay as SCdata content [20]
11226
 *
11227
 * Returns the number of bytes to pass if okay, a negative index where an
11228
 *         UTF-8 error occurred otherwise
11229
 */
11230
static int
11231
1.26M
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11232
1.26M
    int ix;
11233
1.26M
    unsigned char c;
11234
1.26M
    int codepoint;
11235
11236
1.26M
    if ((utf == NULL) || (len <= 0))
11237
1.13k
        return(0);
11238
11239
16.1M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11240
16.0M
        c = utf[ix];
11241
16.0M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11242
10.8M
      if (c >= 0x20)
11243
10.1M
    ix++;
11244
679k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11245
341k
          ix++;
11246
338k
      else
11247
338k
          return(-ix);
11248
10.8M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11249
2.25M
      if (ix + 2 > len) return(complete ? -ix : ix);
11250
2.24M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11251
283k
          return(-ix);
11252
1.95M
      codepoint = (utf[ix] & 0x1f) << 6;
11253
1.95M
      codepoint |= utf[ix+1] & 0x3f;
11254
1.95M
      if (!xmlIsCharQ(codepoint))
11255
70.0k
          return(-ix);
11256
1.88M
      ix += 2;
11257
2.95M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11258
1.20M
      if (ix + 3 > len) return(complete ? -ix : ix);
11259
1.19M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11260
1.19M
          ((utf[ix+2] & 0xc0) != 0x80))
11261
216k
        return(-ix);
11262
981k
      codepoint = (utf[ix] & 0xf) << 12;
11263
981k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11264
981k
      codepoint |= utf[ix+2] & 0x3f;
11265
981k
      if (!xmlIsCharQ(codepoint))
11266
2.82k
          return(-ix);
11267
978k
      ix += 3;
11268
1.75M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11269
1.55M
      if (ix + 4 > len) return(complete ? -ix : ix);
11270
1.54M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11271
1.54M
          ((utf[ix+2] & 0xc0) != 0x80) ||
11272
1.54M
    ((utf[ix+3] & 0xc0) != 0x80))
11273
58.1k
        return(-ix);
11274
1.48M
      codepoint = (utf[ix] & 0x7) << 18;
11275
1.48M
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11276
1.48M
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11277
1.48M
      codepoint |= utf[ix+3] & 0x3f;
11278
1.48M
      if (!xmlIsCharQ(codepoint))
11279
12.2k
          return(-ix);
11280
1.47M
      ix += 4;
11281
1.47M
  } else       /* unknown encoding */
11282
202k
      return(-ix);
11283
16.0M
      }
11284
59.4k
      return(ix);
11285
1.26M
}
11286
11287
/**
11288
 * xmlParseTryOrFinish:
11289
 * @ctxt:  an XML parser context
11290
 * @terminate:  last chunk indicator
11291
 *
11292
 * Try to progress on parsing
11293
 *
11294
 * Returns zero if no parsing was possible
11295
 */
11296
static int
11297
10.9M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11298
10.9M
    int ret = 0;
11299
10.9M
    int avail, tlen;
11300
10.9M
    xmlChar cur, next;
11301
10.9M
    const xmlChar *lastlt, *lastgt;
11302
11303
10.9M
    if (ctxt->input == NULL)
11304
0
        return(0);
11305
11306
#ifdef DEBUG_PUSH
11307
    switch (ctxt->instate) {
11308
  case XML_PARSER_EOF:
11309
      xmlGenericError(xmlGenericErrorContext,
11310
        "PP: try EOF\n"); break;
11311
  case XML_PARSER_START:
11312
      xmlGenericError(xmlGenericErrorContext,
11313
        "PP: try START\n"); break;
11314
  case XML_PARSER_MISC:
11315
      xmlGenericError(xmlGenericErrorContext,
11316
        "PP: try MISC\n");break;
11317
  case XML_PARSER_COMMENT:
11318
      xmlGenericError(xmlGenericErrorContext,
11319
        "PP: try COMMENT\n");break;
11320
  case XML_PARSER_PROLOG:
11321
      xmlGenericError(xmlGenericErrorContext,
11322
        "PP: try PROLOG\n");break;
11323
  case XML_PARSER_START_TAG:
11324
      xmlGenericError(xmlGenericErrorContext,
11325
        "PP: try START_TAG\n");break;
11326
  case XML_PARSER_CONTENT:
11327
      xmlGenericError(xmlGenericErrorContext,
11328
        "PP: try CONTENT\n");break;
11329
  case XML_PARSER_CDATA_SECTION:
11330
      xmlGenericError(xmlGenericErrorContext,
11331
        "PP: try CDATA_SECTION\n");break;
11332
  case XML_PARSER_END_TAG:
11333
      xmlGenericError(xmlGenericErrorContext,
11334
        "PP: try END_TAG\n");break;
11335
  case XML_PARSER_ENTITY_DECL:
11336
      xmlGenericError(xmlGenericErrorContext,
11337
        "PP: try ENTITY_DECL\n");break;
11338
  case XML_PARSER_ENTITY_VALUE:
11339
      xmlGenericError(xmlGenericErrorContext,
11340
        "PP: try ENTITY_VALUE\n");break;
11341
  case XML_PARSER_ATTRIBUTE_VALUE:
11342
      xmlGenericError(xmlGenericErrorContext,
11343
        "PP: try ATTRIBUTE_VALUE\n");break;
11344
  case XML_PARSER_DTD:
11345
      xmlGenericError(xmlGenericErrorContext,
11346
        "PP: try DTD\n");break;
11347
  case XML_PARSER_EPILOG:
11348
      xmlGenericError(xmlGenericErrorContext,
11349
        "PP: try EPILOG\n");break;
11350
  case XML_PARSER_PI:
11351
      xmlGenericError(xmlGenericErrorContext,
11352
        "PP: try PI\n");break;
11353
        case XML_PARSER_IGNORE:
11354
            xmlGenericError(xmlGenericErrorContext,
11355
        "PP: try IGNORE\n");break;
11356
    }
11357
#endif
11358
11359
10.9M
    if ((ctxt->input != NULL) &&
11360
10.9M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11361
66.6k
  xmlSHRINK(ctxt);
11362
66.6k
  ctxt->checkIndex = 0;
11363
66.6k
    }
11364
10.9M
    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11365
11366
57.5M
    while (ctxt->instate != XML_PARSER_EOF) {
11367
57.3M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11368
257k
      return(0);
11369
11370
57.1M
  if (ctxt->input == NULL) break;
11371
57.1M
  if (ctxt->input->buf == NULL)
11372
0
      avail = ctxt->input->length -
11373
0
              (ctxt->input->cur - ctxt->input->base);
11374
57.1M
  else {
11375
      /*
11376
       * If we are operating on converted input, try to flush
11377
       * remaining chars to avoid them stalling in the non-converted
11378
       * buffer. But do not do this in document start where
11379
       * encoding="..." may not have been read and we work on a
11380
       * guessed encoding.
11381
       */
11382
57.1M
      if ((ctxt->instate != XML_PARSER_START) &&
11383
57.1M
          (ctxt->input->buf->raw != NULL) &&
11384
57.1M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11385
202k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11386
202k
                                                 ctxt->input);
11387
202k
    size_t current = ctxt->input->cur - ctxt->input->base;
11388
11389
202k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11390
202k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11391
202k
                                      base, current);
11392
202k
      }
11393
57.1M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11394
57.1M
        (ctxt->input->cur - ctxt->input->base);
11395
57.1M
  }
11396
57.1M
        if (avail < 1)
11397
326k
      goto done;
11398
56.8M
        switch (ctxt->instate) {
11399
0
            case XML_PARSER_EOF:
11400
          /*
11401
     * Document parsing is done !
11402
     */
11403
0
          goto done;
11404
2.15M
            case XML_PARSER_START:
11405
2.15M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11406
459k
        xmlChar start[4];
11407
459k
        xmlCharEncoding enc;
11408
11409
        /*
11410
         * Very first chars read from the document flow.
11411
         */
11412
459k
        if (avail < 4)
11413
28.9k
      goto done;
11414
11415
        /*
11416
         * Get the 4 first bytes and decode the charset
11417
         * if enc != XML_CHAR_ENCODING_NONE
11418
         * plug some encoding conversion routines,
11419
         * else xmlSwitchEncoding will set to (default)
11420
         * UTF8.
11421
         */
11422
430k
        start[0] = RAW;
11423
430k
        start[1] = NXT(1);
11424
430k
        start[2] = NXT(2);
11425
430k
        start[3] = NXT(3);
11426
430k
        enc = xmlDetectCharEncoding(start, 4);
11427
430k
        xmlSwitchEncoding(ctxt, enc);
11428
430k
        break;
11429
459k
    }
11430
11431
1.69M
    if (avail < 2)
11432
434
        goto done;
11433
1.69M
    cur = ctxt->input->cur[0];
11434
1.69M
    next = ctxt->input->cur[1];
11435
1.69M
    if (cur == 0) {
11436
4.71k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11437
4.71k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11438
4.71k
                  &xmlDefaultSAXLocator);
11439
4.71k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11440
4.71k
        xmlHaltParser(ctxt);
11441
#ifdef DEBUG_PUSH
11442
        xmlGenericError(xmlGenericErrorContext,
11443
          "PP: entering EOF\n");
11444
#endif
11445
4.71k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11446
4.71k
      ctxt->sax->endDocument(ctxt->userData);
11447
4.71k
        goto done;
11448
4.71k
    }
11449
1.68M
          if ((cur == '<') && (next == '?')) {
11450
        /* PI or XML decl */
11451
1.19M
        if (avail < 5) return(ret);
11452
1.19M
        if ((!terminate) &&
11453
1.19M
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11454
838k
      return(ret);
11455
352k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11456
352k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11457
352k
                  &xmlDefaultSAXLocator);
11458
352k
        if ((ctxt->input->cur[2] == 'x') &&
11459
352k
      (ctxt->input->cur[3] == 'm') &&
11460
352k
      (ctxt->input->cur[4] == 'l') &&
11461
352k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11462
306k
      ret += 5;
11463
#ifdef DEBUG_PUSH
11464
      xmlGenericError(xmlGenericErrorContext,
11465
        "PP: Parsing XML Decl\n");
11466
#endif
11467
306k
      xmlParseXMLDecl(ctxt);
11468
306k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11469
          /*
11470
           * The XML REC instructs us to stop parsing right
11471
           * here
11472
           */
11473
1.15k
          xmlHaltParser(ctxt);
11474
1.15k
          return(0);
11475
1.15k
      }
11476
305k
      ctxt->standalone = ctxt->input->standalone;
11477
305k
      if ((ctxt->encoding == NULL) &&
11478
305k
          (ctxt->input->encoding != NULL))
11479
44.5k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11480
305k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11481
305k
          (!ctxt->disableSAX))
11482
259k
          ctxt->sax->startDocument(ctxt->userData);
11483
305k
      ctxt->instate = XML_PARSER_MISC;
11484
#ifdef DEBUG_PUSH
11485
      xmlGenericError(xmlGenericErrorContext,
11486
        "PP: entering MISC\n");
11487
#endif
11488
305k
        } else {
11489
45.7k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11490
45.7k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11491
45.7k
          (!ctxt->disableSAX))
11492
45.7k
          ctxt->sax->startDocument(ctxt->userData);
11493
45.7k
      ctxt->instate = XML_PARSER_MISC;
11494
#ifdef DEBUG_PUSH
11495
      xmlGenericError(xmlGenericErrorContext,
11496
        "PP: entering MISC\n");
11497
#endif
11498
45.7k
        }
11499
496k
    } else {
11500
496k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11501
496k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11502
496k
                  &xmlDefaultSAXLocator);
11503
496k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11504
496k
        if (ctxt->version == NULL) {
11505
0
            xmlErrMemory(ctxt, NULL);
11506
0
      break;
11507
0
        }
11508
496k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11509
496k
            (!ctxt->disableSAX))
11510
496k
      ctxt->sax->startDocument(ctxt->userData);
11511
496k
        ctxt->instate = XML_PARSER_MISC;
11512
#ifdef DEBUG_PUSH
11513
        xmlGenericError(xmlGenericErrorContext,
11514
          "PP: entering MISC\n");
11515
#endif
11516
496k
    }
11517
847k
    break;
11518
9.67M
            case XML_PARSER_START_TAG: {
11519
9.67M
          const xmlChar *name;
11520
9.67M
    const xmlChar *prefix = NULL;
11521
9.67M
    const xmlChar *URI = NULL;
11522
9.67M
                int line = ctxt->input->line;
11523
9.67M
    int nsNr = ctxt->nsNr;
11524
11525
9.67M
    if ((avail < 2) && (ctxt->inputNr == 1))
11526
0
        goto done;
11527
9.67M
    cur = ctxt->input->cur[0];
11528
9.67M
          if (cur != '<') {
11529
47.3k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11530
47.3k
        xmlHaltParser(ctxt);
11531
47.3k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11532
47.3k
      ctxt->sax->endDocument(ctxt->userData);
11533
47.3k
        goto done;
11534
47.3k
    }
11535
9.62M
    if (!terminate) {
11536
9.21M
        if (ctxt->progressive) {
11537
            /* > can be found unescaped in attribute values */
11538
9.21M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11539
1.09M
          goto done;
11540
9.21M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11541
0
      goto done;
11542
0
        }
11543
9.21M
    }
11544
8.53M
    if (ctxt->spaceNr == 0)
11545
90.3k
        spacePush(ctxt, -1);
11546
8.44M
    else if (*ctxt->space == -2)
11547
1.00M
        spacePush(ctxt, -1);
11548
7.44M
    else
11549
7.44M
        spacePush(ctxt, *ctxt->space);
11550
8.53M
#ifdef LIBXML_SAX1_ENABLED
11551
8.53M
    if (ctxt->sax2)
11552
4.44M
#endif /* LIBXML_SAX1_ENABLED */
11553
4.44M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11554
4.09M
#ifdef LIBXML_SAX1_ENABLED
11555
4.09M
    else
11556
4.09M
        name = xmlParseStartTag(ctxt);
11557
8.53M
#endif /* LIBXML_SAX1_ENABLED */
11558
8.53M
    if (ctxt->instate == XML_PARSER_EOF)
11559
30
        goto done;
11560
8.53M
    if (name == NULL) {
11561
65.7k
        spacePop(ctxt);
11562
65.7k
        xmlHaltParser(ctxt);
11563
65.7k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11564
65.7k
      ctxt->sax->endDocument(ctxt->userData);
11565
65.7k
        goto done;
11566
65.7k
    }
11567
8.46M
#ifdef LIBXML_VALID_ENABLED
11568
    /*
11569
     * [ VC: Root Element Type ]
11570
     * The Name in the document type declaration must match
11571
     * the element type of the root element.
11572
     */
11573
8.46M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11574
8.46M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11575
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11576
8.46M
#endif /* LIBXML_VALID_ENABLED */
11577
11578
    /*
11579
     * Check for an Empty Element.
11580
     */
11581
8.46M
    if ((RAW == '/') && (NXT(1) == '>')) {
11582
3.38M
        SKIP(2);
11583
11584
3.38M
        if (ctxt->sax2) {
11585
1.95M
      if ((ctxt->sax != NULL) &&
11586
1.95M
          (ctxt->sax->endElementNs != NULL) &&
11587
1.95M
          (!ctxt->disableSAX))
11588
1.95M
          ctxt->sax->endElementNs(ctxt->userData, name,
11589
1.95M
                                  prefix, URI);
11590
1.95M
      if (ctxt->nsNr - nsNr > 0)
11591
16.5k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11592
1.95M
#ifdef LIBXML_SAX1_ENABLED
11593
1.95M
        } else {
11594
1.42M
      if ((ctxt->sax != NULL) &&
11595
1.42M
          (ctxt->sax->endElement != NULL) &&
11596
1.42M
          (!ctxt->disableSAX))
11597
1.42M
          ctxt->sax->endElement(ctxt->userData, name);
11598
1.42M
#endif /* LIBXML_SAX1_ENABLED */
11599
1.42M
        }
11600
3.38M
        if (ctxt->instate == XML_PARSER_EOF)
11601
0
      goto done;
11602
3.38M
        spacePop(ctxt);
11603
3.38M
        if (ctxt->nameNr == 0) {
11604
12.6k
      ctxt->instate = XML_PARSER_EPILOG;
11605
3.37M
        } else {
11606
3.37M
      ctxt->instate = XML_PARSER_CONTENT;
11607
3.37M
        }
11608
3.38M
                    ctxt->progressive = 1;
11609
3.38M
        break;
11610
3.38M
    }
11611
5.08M
    if (RAW == '>') {
11612
4.27M
        NEXT;
11613
4.27M
    } else {
11614
807k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11615
807k
           "Couldn't find end of Start Tag %s\n",
11616
807k
           name);
11617
807k
        nodePop(ctxt);
11618
807k
        spacePop(ctxt);
11619
807k
    }
11620
5.08M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11621
11622
5.08M
    ctxt->instate = XML_PARSER_CONTENT;
11623
5.08M
                ctxt->progressive = 1;
11624
5.08M
                break;
11625
8.46M
      }
11626
34.2M
            case XML_PARSER_CONTENT: {
11627
34.2M
    int id;
11628
34.2M
    unsigned long cons;
11629
34.2M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
150k
        goto done;
11631
34.0M
    cur = ctxt->input->cur[0];
11632
34.0M
    next = ctxt->input->cur[1];
11633
11634
34.0M
    id = ctxt->input->id;
11635
34.0M
          cons = CUR_CONSUMED;
11636
34.0M
    if ((cur == '<') && (next == '/')) {
11637
3.14M
        ctxt->instate = XML_PARSER_END_TAG;
11638
3.14M
        break;
11639
30.9M
          } else if ((cur == '<') && (next == '?')) {
11640
158k
        if ((!terminate) &&
11641
158k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11642
91.8k
                        ctxt->progressive = XML_PARSER_PI;
11643
91.8k
      goto done;
11644
91.8k
                    }
11645
66.9k
        xmlParsePI(ctxt);
11646
66.9k
        ctxt->instate = XML_PARSER_CONTENT;
11647
66.9k
                    ctxt->progressive = 1;
11648
30.7M
    } else if ((cur == '<') && (next != '!')) {
11649
7.97M
        ctxt->instate = XML_PARSER_START_TAG;
11650
7.97M
        break;
11651
22.8M
    } else if ((cur == '<') && (next == '!') &&
11652
22.8M
               (ctxt->input->cur[2] == '-') &&
11653
22.8M
         (ctxt->input->cur[3] == '-')) {
11654
449k
        int term;
11655
11656
449k
              if (avail < 4)
11657
0
            goto done;
11658
449k
        ctxt->input->cur += 4;
11659
449k
        term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11660
449k
        ctxt->input->cur -= 4;
11661
449k
        if ((!terminate) && (term < 0)) {
11662
226k
                        ctxt->progressive = XML_PARSER_COMMENT;
11663
226k
      goto done;
11664
226k
                    }
11665
223k
        xmlParseComment(ctxt);
11666
223k
        ctxt->instate = XML_PARSER_CONTENT;
11667
223k
                    ctxt->progressive = 1;
11668
22.3M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11669
22.3M
        (ctxt->input->cur[2] == '[') &&
11670
22.3M
        (ctxt->input->cur[3] == 'C') &&
11671
22.3M
        (ctxt->input->cur[4] == 'D') &&
11672
22.3M
        (ctxt->input->cur[5] == 'A') &&
11673
22.3M
        (ctxt->input->cur[6] == 'T') &&
11674
22.3M
        (ctxt->input->cur[7] == 'A') &&
11675
22.3M
        (ctxt->input->cur[8] == '[')) {
11676
54.2k
        SKIP(9);
11677
54.2k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11678
54.2k
        break;
11679
22.3M
    } else if ((cur == '<') && (next == '!') &&
11680
22.3M
               (avail < 9)) {
11681
23.7k
        goto done;
11682
22.2M
    } else if (cur == '&') {
11683
8.65M
        if ((!terminate) &&
11684
8.65M
            (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11685
276k
      goto done;
11686
8.37M
        xmlParseReference(ctxt);
11687
13.6M
    } else {
11688
        /* TODO Avoid the extra copy, handle directly !!! */
11689
        /*
11690
         * Goal of the following test is:
11691
         *  - minimize calls to the SAX 'character' callback
11692
         *    when they are mergeable
11693
         *  - handle an problem for isBlank when we only parse
11694
         *    a sequence of blank chars and the next one is
11695
         *    not available to check against '<' presence.
11696
         *  - tries to homogenize the differences in SAX
11697
         *    callbacks between the push and pull versions
11698
         *    of the parser.
11699
         */
11700
13.6M
        if ((ctxt->inputNr == 1) &&
11701
13.6M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11702
10.5M
      if (!terminate) {
11703
10.4M
          if (ctxt->progressive) {
11704
10.4M
        if ((lastlt == NULL) ||
11705
10.4M
            (ctxt->input->cur > lastlt))
11706
1.06M
            goto done;
11707
10.4M
          } else if (xmlParseLookupSequence(ctxt,
11708
0
                                            '<', 0, 0) < 0) {
11709
0
        goto done;
11710
0
          }
11711
10.4M
      }
11712
10.5M
                    }
11713
12.5M
        ctxt->checkIndex = 0;
11714
12.5M
        xmlParseCharData(ctxt, 0);
11715
12.5M
    }
11716
21.2M
    if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11717
176k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11718
176k
                    "detected an error in element content\n");
11719
176k
        xmlHaltParser(ctxt);
11720
176k
        break;
11721
176k
    }
11722
21.0M
    break;
11723
21.2M
      }
11724
21.0M
            case XML_PARSER_END_TAG:
11725
3.26M
    if (avail < 2)
11726
0
        goto done;
11727
3.26M
    if (!terminate) {
11728
3.18M
        if (ctxt->progressive) {
11729
            /* > can be found unescaped in attribute values */
11730
3.18M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11731
123k
          goto done;
11732
3.18M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11733
0
      goto done;
11734
0
        }
11735
3.18M
    }
11736
3.14M
    if (ctxt->sax2) {
11737
1.51M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11738
1.51M
        nameNsPop(ctxt);
11739
1.51M
    }
11740
1.62M
#ifdef LIBXML_SAX1_ENABLED
11741
1.62M
      else
11742
1.62M
        xmlParseEndTag1(ctxt, 0);
11743
3.14M
#endif /* LIBXML_SAX1_ENABLED */
11744
3.14M
    if (ctxt->instate == XML_PARSER_EOF) {
11745
        /* Nothing */
11746
3.14M
    } else if (ctxt->nameNr == 0) {
11747
81.8k
        ctxt->instate = XML_PARSER_EPILOG;
11748
3.06M
    } else {
11749
3.06M
        ctxt->instate = XML_PARSER_CONTENT;
11750
3.06M
    }
11751
3.14M
    break;
11752
1.31M
            case XML_PARSER_CDATA_SECTION: {
11753
          /*
11754
     * The Push mode need to have the SAX callback for
11755
     * cdataBlock merge back contiguous callbacks.
11756
     */
11757
1.31M
    int base;
11758
11759
1.31M
    base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11760
1.31M
    if (base < 0) {
11761
895k
        if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11762
849k
            int tmp;
11763
11764
849k
      tmp = xmlCheckCdataPush(ctxt->input->cur,
11765
849k
                              XML_PARSER_BIG_BUFFER_SIZE, 0);
11766
849k
      if (tmp < 0) {
11767
7.18k
          tmp = -tmp;
11768
7.18k
          ctxt->input->cur += tmp;
11769
7.18k
          goto encoding_error;
11770
7.18k
      }
11771
842k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11772
842k
          if (ctxt->sax->cdataBlock != NULL)
11773
305k
        ctxt->sax->cdataBlock(ctxt->userData,
11774
305k
                              ctxt->input->cur, tmp);
11775
537k
          else if (ctxt->sax->characters != NULL)
11776
537k
        ctxt->sax->characters(ctxt->userData,
11777
537k
                              ctxt->input->cur, tmp);
11778
842k
      }
11779
842k
      if (ctxt->instate == XML_PARSER_EOF)
11780
0
          goto done;
11781
842k
      SKIPL(tmp);
11782
842k
      ctxt->checkIndex = 0;
11783
842k
        }
11784
888k
        goto done;
11785
895k
    } else {
11786
417k
        int tmp;
11787
11788
417k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11789
417k
        if ((tmp < 0) || (tmp != base)) {
11790
384k
      tmp = -tmp;
11791
384k
      ctxt->input->cur += tmp;
11792
384k
      goto encoding_error;
11793
384k
        }
11794
32.5k
        if ((ctxt->sax != NULL) && (base == 0) &&
11795
32.5k
            (ctxt->sax->cdataBlock != NULL) &&
11796
32.5k
            (!ctxt->disableSAX)) {
11797
      /*
11798
       * Special case to provide identical behaviour
11799
       * between pull and push parsers on enpty CDATA
11800
       * sections
11801
       */
11802
636
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11803
636
           (!strncmp((const char *)&ctxt->input->cur[-9],
11804
636
                     "<![CDATA[", 9)))
11805
636
           ctxt->sax->cdataBlock(ctxt->userData,
11806
636
                                 BAD_CAST "", 0);
11807
31.9k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11808
31.9k
      (!ctxt->disableSAX)) {
11809
31.4k
      if (ctxt->sax->cdataBlock != NULL)
11810
24.4k
          ctxt->sax->cdataBlock(ctxt->userData,
11811
24.4k
              ctxt->input->cur, base);
11812
6.96k
      else if (ctxt->sax->characters != NULL)
11813
6.96k
          ctxt->sax->characters(ctxt->userData,
11814
6.96k
              ctxt->input->cur, base);
11815
31.4k
        }
11816
32.5k
        if (ctxt->instate == XML_PARSER_EOF)
11817
0
      goto done;
11818
32.5k
        SKIPL(base + 3);
11819
32.5k
        ctxt->checkIndex = 0;
11820
32.5k
        ctxt->instate = XML_PARSER_CONTENT;
11821
#ifdef DEBUG_PUSH
11822
        xmlGenericError(xmlGenericErrorContext,
11823
          "PP: entering CONTENT\n");
11824
#endif
11825
32.5k
    }
11826
32.5k
    break;
11827
1.31M
      }
11828
973k
            case XML_PARSER_MISC:
11829
973k
    SKIP_BLANKS;
11830
973k
    if (ctxt->input->buf == NULL)
11831
0
        avail = ctxt->input->length -
11832
0
                (ctxt->input->cur - ctxt->input->base);
11833
973k
    else
11834
973k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11835
973k
                (ctxt->input->cur - ctxt->input->base);
11836
973k
    if (avail < 2)
11837
10.3k
        goto done;
11838
963k
    cur = ctxt->input->cur[0];
11839
963k
    next = ctxt->input->cur[1];
11840
963k
          if ((cur == '<') && (next == '?')) {
11841
65.0k
        if ((!terminate) &&
11842
65.0k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11843
10.2k
                        ctxt->progressive = XML_PARSER_PI;
11844
10.2k
      goto done;
11845
10.2k
                    }
11846
#ifdef DEBUG_PUSH
11847
        xmlGenericError(xmlGenericErrorContext,
11848
          "PP: Parsing PI\n");
11849
#endif
11850
54.7k
        xmlParsePI(ctxt);
11851
54.7k
        if (ctxt->instate == XML_PARSER_EOF)
11852
0
      goto done;
11853
54.7k
        ctxt->instate = XML_PARSER_MISC;
11854
54.7k
                    ctxt->progressive = 1;
11855
54.7k
        ctxt->checkIndex = 0;
11856
898k
    } else if ((cur == '<') && (next == '!') &&
11857
898k
        (ctxt->input->cur[2] == '-') &&
11858
898k
        (ctxt->input->cur[3] == '-')) {
11859
65.4k
        if ((!terminate) &&
11860
65.4k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11861
42.5k
                        ctxt->progressive = XML_PARSER_COMMENT;
11862
42.5k
      goto done;
11863
42.5k
                    }
11864
#ifdef DEBUG_PUSH
11865
        xmlGenericError(xmlGenericErrorContext,
11866
          "PP: Parsing Comment\n");
11867
#endif
11868
22.9k
        xmlParseComment(ctxt);
11869
22.9k
        if (ctxt->instate == XML_PARSER_EOF)
11870
0
      goto done;
11871
22.9k
        ctxt->instate = XML_PARSER_MISC;
11872
22.9k
                    ctxt->progressive = 1;
11873
22.9k
        ctxt->checkIndex = 0;
11874
832k
    } else if ((cur == '<') && (next == '!') &&
11875
832k
        (ctxt->input->cur[2] == 'D') &&
11876
832k
        (ctxt->input->cur[3] == 'O') &&
11877
832k
        (ctxt->input->cur[4] == 'C') &&
11878
832k
        (ctxt->input->cur[5] == 'T') &&
11879
832k
        (ctxt->input->cur[6] == 'Y') &&
11880
832k
        (ctxt->input->cur[7] == 'P') &&
11881
832k
        (ctxt->input->cur[8] == 'E')) {
11882
387k
        if ((!terminate) &&
11883
387k
            (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11884
47.7k
                        ctxt->progressive = XML_PARSER_DTD;
11885
47.7k
      goto done;
11886
47.7k
                    }
11887
#ifdef DEBUG_PUSH
11888
        xmlGenericError(xmlGenericErrorContext,
11889
          "PP: Parsing internal subset\n");
11890
#endif
11891
340k
        ctxt->inSubset = 1;
11892
340k
                    ctxt->progressive = 0;
11893
340k
        ctxt->checkIndex = 0;
11894
340k
        xmlParseDocTypeDecl(ctxt);
11895
340k
        if (ctxt->instate == XML_PARSER_EOF)
11896
0
      goto done;
11897
340k
        if (RAW == '[') {
11898
252k
      ctxt->instate = XML_PARSER_DTD;
11899
#ifdef DEBUG_PUSH
11900
      xmlGenericError(xmlGenericErrorContext,
11901
        "PP: entering DTD\n");
11902
#endif
11903
252k
        } else {
11904
      /*
11905
       * Create and update the external subset.
11906
       */
11907
87.9k
      ctxt->inSubset = 2;
11908
87.9k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11909
87.9k
          (ctxt->sax->externalSubset != NULL))
11910
80.1k
          ctxt->sax->externalSubset(ctxt->userData,
11911
80.1k
            ctxt->intSubName, ctxt->extSubSystem,
11912
80.1k
            ctxt->extSubURI);
11913
87.9k
      ctxt->inSubset = 0;
11914
87.9k
      xmlCleanSpecialAttr(ctxt);
11915
87.9k
      ctxt->instate = XML_PARSER_PROLOG;
11916
#ifdef DEBUG_PUSH
11917
      xmlGenericError(xmlGenericErrorContext,
11918
        "PP: entering PROLOG\n");
11919
#endif
11920
87.9k
        }
11921
445k
    } else if ((cur == '<') && (next == '!') &&
11922
445k
               (avail < 9)) {
11923
7.98k
        goto done;
11924
437k
    } else {
11925
437k
        ctxt->instate = XML_PARSER_START_TAG;
11926
437k
        ctxt->progressive = XML_PARSER_START_TAG;
11927
437k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11928
#ifdef DEBUG_PUSH
11929
        xmlGenericError(xmlGenericErrorContext,
11930
          "PP: entering START_TAG\n");
11931
#endif
11932
437k
    }
11933
854k
    break;
11934
854k
            case XML_PARSER_PROLOG:
11935
311k
    SKIP_BLANKS;
11936
311k
    if (ctxt->input->buf == NULL)
11937
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11938
311k
    else
11939
311k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11940
311k
                            (ctxt->input->cur - ctxt->input->base);
11941
311k
    if (avail < 2)
11942
10.2k
        goto done;
11943
301k
    cur = ctxt->input->cur[0];
11944
301k
    next = ctxt->input->cur[1];
11945
301k
          if ((cur == '<') && (next == '?')) {
11946
44.9k
        if ((!terminate) &&
11947
44.9k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11948
16.0k
                        ctxt->progressive = XML_PARSER_PI;
11949
16.0k
      goto done;
11950
16.0k
                    }
11951
#ifdef DEBUG_PUSH
11952
        xmlGenericError(xmlGenericErrorContext,
11953
          "PP: Parsing PI\n");
11954
#endif
11955
28.9k
        xmlParsePI(ctxt);
11956
28.9k
        if (ctxt->instate == XML_PARSER_EOF)
11957
0
      goto done;
11958
28.9k
        ctxt->instate = XML_PARSER_PROLOG;
11959
28.9k
                    ctxt->progressive = 1;
11960
256k
    } else if ((cur == '<') && (next == '!') &&
11961
256k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11962
56.8k
        if ((!terminate) &&
11963
56.8k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11964
20.9k
                        ctxt->progressive = XML_PARSER_COMMENT;
11965
20.9k
      goto done;
11966
20.9k
                    }
11967
#ifdef DEBUG_PUSH
11968
        xmlGenericError(xmlGenericErrorContext,
11969
          "PP: Parsing Comment\n");
11970
#endif
11971
35.9k
        xmlParseComment(ctxt);
11972
35.9k
        if (ctxt->instate == XML_PARSER_EOF)
11973
0
      goto done;
11974
35.9k
        ctxt->instate = XML_PARSER_PROLOG;
11975
35.9k
                    ctxt->progressive = 1;
11976
199k
    } else if ((cur == '<') && (next == '!') &&
11977
199k
               (avail < 4)) {
11978
422
        goto done;
11979
198k
    } else {
11980
198k
        ctxt->instate = XML_PARSER_START_TAG;
11981
198k
        if (ctxt->progressive == 0)
11982
164k
      ctxt->progressive = XML_PARSER_START_TAG;
11983
198k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11984
#ifdef DEBUG_PUSH
11985
        xmlGenericError(xmlGenericErrorContext,
11986
          "PP: entering START_TAG\n");
11987
#endif
11988
198k
    }
11989
263k
    break;
11990
263k
            case XML_PARSER_EPILOG:
11991
133k
    SKIP_BLANKS;
11992
133k
    if (ctxt->input->buf == NULL)
11993
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11994
133k
    else
11995
133k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11996
133k
                            (ctxt->input->cur - ctxt->input->base);
11997
133k
    if (avail < 2)
11998
73.9k
        goto done;
11999
59.7k
    cur = ctxt->input->cur[0];
12000
59.7k
    next = ctxt->input->cur[1];
12001
59.7k
          if ((cur == '<') && (next == '?')) {
12002
12.2k
        if ((!terminate) &&
12003
12.2k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12004
9.78k
                        ctxt->progressive = XML_PARSER_PI;
12005
9.78k
      goto done;
12006
9.78k
                    }
12007
#ifdef DEBUG_PUSH
12008
        xmlGenericError(xmlGenericErrorContext,
12009
          "PP: Parsing PI\n");
12010
#endif
12011
2.47k
        xmlParsePI(ctxt);
12012
2.47k
        if (ctxt->instate == XML_PARSER_EOF)
12013
0
      goto done;
12014
2.47k
        ctxt->instate = XML_PARSER_EPILOG;
12015
2.47k
                    ctxt->progressive = 1;
12016
47.4k
    } else if ((cur == '<') && (next == '!') &&
12017
47.4k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12018
31.4k
        if ((!terminate) &&
12019
31.4k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12020
30.0k
                        ctxt->progressive = XML_PARSER_COMMENT;
12021
30.0k
      goto done;
12022
30.0k
                    }
12023
#ifdef DEBUG_PUSH
12024
        xmlGenericError(xmlGenericErrorContext,
12025
          "PP: Parsing Comment\n");
12026
#endif
12027
1.38k
        xmlParseComment(ctxt);
12028
1.38k
        if (ctxt->instate == XML_PARSER_EOF)
12029
0
      goto done;
12030
1.38k
        ctxt->instate = XML_PARSER_EPILOG;
12031
1.38k
                    ctxt->progressive = 1;
12032
16.0k
    } else if ((cur == '<') && (next == '!') &&
12033
16.0k
               (avail < 4)) {
12034
984
        goto done;
12035
15.0k
    } else {
12036
15.0k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12037
15.0k
        xmlHaltParser(ctxt);
12038
#ifdef DEBUG_PUSH
12039
        xmlGenericError(xmlGenericErrorContext,
12040
          "PP: entering EOF\n");
12041
#endif
12042
15.0k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12043
15.0k
      ctxt->sax->endDocument(ctxt->userData);
12044
15.0k
        goto done;
12045
15.0k
    }
12046
3.86k
    break;
12047
4.72M
            case XML_PARSER_DTD: {
12048
          /*
12049
     * Sorry but progressive parsing of the internal subset
12050
     * is not expected to be supported. We first check that
12051
     * the full content of the internal subset is available and
12052
     * the parsing is launched only at that point.
12053
     * Internal subset ends up with "']' S? '>'" in an unescaped
12054
     * section and not in a ']]>' sequence which are conditional
12055
     * sections (whoever argued to keep that crap in XML deserve
12056
     * a place in hell !).
12057
     */
12058
4.72M
    int base, i;
12059
4.72M
    xmlChar *buf;
12060
4.72M
          xmlChar quote = 0;
12061
4.72M
                size_t use;
12062
12063
4.72M
    base = ctxt->input->cur - ctxt->input->base;
12064
4.72M
    if (base < 0) return(0);
12065
4.72M
    if (ctxt->checkIndex > base)
12066
2.10M
        base = ctxt->checkIndex;
12067
4.72M
    buf = xmlBufContent(ctxt->input->buf->buffer);
12068
4.72M
                use = xmlBufUse(ctxt->input->buf->buffer);
12069
313G
    for (;(unsigned int) base < use; base++) {
12070
313G
        if (quote != 0) {
12071
200G
            if (buf[base] == quote)
12072
13.2G
          quote = 0;
12073
200G
      continue;
12074
200G
        }
12075
113G
        if ((quote == 0) && (buf[base] == '<')) {
12076
3.34G
            int found  = 0;
12077
      /* special handling of comments */
12078
3.34G
            if (((unsigned int) base + 4 < use) &&
12079
3.34G
          (buf[base + 1] == '!') &&
12080
3.34G
          (buf[base + 2] == '-') &&
12081
3.34G
          (buf[base + 3] == '-')) {
12082
5.46G
          for (;(unsigned int) base + 3 < use; base++) {
12083
5.46G
        if ((buf[base] == '-') &&
12084
5.46G
            (buf[base + 1] == '-') &&
12085
5.46G
            (buf[base + 2] == '>')) {
12086
5.70M
            found = 1;
12087
5.70M
            base += 2;
12088
5.70M
            break;
12089
5.70M
        }
12090
5.46G
                }
12091
5.87M
          if (!found) {
12092
#if 0
12093
              fprintf(stderr, "unfinished comment\n");
12094
#endif
12095
174k
              break; /* for */
12096
174k
                }
12097
5.70M
                continue;
12098
5.87M
      }
12099
3.34G
        }
12100
113G
        if (buf[base] == '"') {
12101
13.1G
            quote = '"';
12102
13.1G
      continue;
12103
13.1G
        }
12104
100G
        if (buf[base] == '\'') {
12105
58.9M
            quote = '\'';
12106
58.9M
      continue;
12107
58.9M
        }
12108
99.9G
        if (buf[base] == ']') {
12109
#if 0
12110
            fprintf(stderr, "%c%c%c%c: ", buf[base],
12111
              buf[base + 1], buf[base + 2], buf[base + 3]);
12112
#endif
12113
4.73M
            if ((unsigned int) base +1 >= use)
12114
1.28k
          break;
12115
4.73M
      if (buf[base + 1] == ']') {
12116
          /* conditional crap, skip both ']' ! */
12117
2.47M
          base++;
12118
2.47M
          continue;
12119
2.47M
      }
12120
3.87M
            for (i = 1; (unsigned int) base + i < use; i++) {
12121
3.87M
          if (buf[base + i] == '>') {
12122
#if 0
12123
              fprintf(stderr, "found\n");
12124
#endif
12125
193k
              goto found_end_int_subset;
12126
193k
          }
12127
3.67M
          if (!IS_BLANK_CH(buf[base + i])) {
12128
#if 0
12129
              fprintf(stderr, "not found\n");
12130
#endif
12131
2.06M
              goto not_end_of_int_subset;
12132
2.06M
          }
12133
3.67M
      }
12134
#if 0
12135
      fprintf(stderr, "end of stream\n");
12136
#endif
12137
475
            break;
12138
12139
2.26M
        }
12140
99.9G
not_end_of_int_subset:
12141
99.9G
                    continue; /* for */
12142
99.9G
    }
12143
    /*
12144
     * We didn't found the end of the Internal subset
12145
     */
12146
4.52M
                if (quote == 0)
12147
2.13M
                    ctxt->checkIndex = base;
12148
2.39M
                else
12149
2.39M
                    ctxt->checkIndex = 0;
12150
#ifdef DEBUG_PUSH
12151
    if (next == 0)
12152
        xmlGenericError(xmlGenericErrorContext,
12153
          "PP: lookup of int subset end filed\n");
12154
#endif
12155
4.52M
          goto done;
12156
12157
193k
found_end_int_subset:
12158
193k
                ctxt->checkIndex = 0;
12159
193k
    xmlParseInternalSubset(ctxt);
12160
193k
    if (ctxt->instate == XML_PARSER_EOF)
12161
6.19k
        goto done;
12162
187k
    ctxt->inSubset = 2;
12163
187k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12164
187k
        (ctxt->sax->externalSubset != NULL))
12165
154k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12166
154k
          ctxt->extSubSystem, ctxt->extSubURI);
12167
187k
    ctxt->inSubset = 0;
12168
187k
    xmlCleanSpecialAttr(ctxt);
12169
187k
    if (ctxt->instate == XML_PARSER_EOF)
12170
4.12k
        goto done;
12171
183k
    ctxt->instate = XML_PARSER_PROLOG;
12172
183k
    ctxt->checkIndex = 0;
12173
#ifdef DEBUG_PUSH
12174
    xmlGenericError(xmlGenericErrorContext,
12175
      "PP: entering PROLOG\n");
12176
#endif
12177
183k
                break;
12178
187k
      }
12179
0
            case XML_PARSER_COMMENT:
12180
0
    xmlGenericError(xmlGenericErrorContext,
12181
0
      "PP: internal error, state == COMMENT\n");
12182
0
    ctxt->instate = XML_PARSER_CONTENT;
12183
#ifdef DEBUG_PUSH
12184
    xmlGenericError(xmlGenericErrorContext,
12185
      "PP: entering CONTENT\n");
12186
#endif
12187
0
    break;
12188
0
            case XML_PARSER_IGNORE:
12189
0
    xmlGenericError(xmlGenericErrorContext,
12190
0
      "PP: internal error, state == IGNORE");
12191
0
          ctxt->instate = XML_PARSER_DTD;
12192
#ifdef DEBUG_PUSH
12193
    xmlGenericError(xmlGenericErrorContext,
12194
      "PP: entering DTD\n");
12195
#endif
12196
0
          break;
12197
0
            case XML_PARSER_PI:
12198
0
    xmlGenericError(xmlGenericErrorContext,
12199
0
      "PP: internal error, state == PI\n");
12200
0
    ctxt->instate = XML_PARSER_CONTENT;
12201
#ifdef DEBUG_PUSH
12202
    xmlGenericError(xmlGenericErrorContext,
12203
      "PP: entering CONTENT\n");
12204
#endif
12205
0
    break;
12206
0
            case XML_PARSER_ENTITY_DECL:
12207
0
    xmlGenericError(xmlGenericErrorContext,
12208
0
      "PP: internal error, state == ENTITY_DECL\n");
12209
0
    ctxt->instate = XML_PARSER_DTD;
12210
#ifdef DEBUG_PUSH
12211
    xmlGenericError(xmlGenericErrorContext,
12212
      "PP: entering DTD\n");
12213
#endif
12214
0
    break;
12215
0
            case XML_PARSER_ENTITY_VALUE:
12216
0
    xmlGenericError(xmlGenericErrorContext,
12217
0
      "PP: internal error, state == ENTITY_VALUE\n");
12218
0
    ctxt->instate = XML_PARSER_CONTENT;
12219
#ifdef DEBUG_PUSH
12220
    xmlGenericError(xmlGenericErrorContext,
12221
      "PP: entering DTD\n");
12222
#endif
12223
0
    break;
12224
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12225
0
    xmlGenericError(xmlGenericErrorContext,
12226
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12227
0
    ctxt->instate = XML_PARSER_START_TAG;
12228
#ifdef DEBUG_PUSH
12229
    xmlGenericError(xmlGenericErrorContext,
12230
      "PP: entering START_TAG\n");
12231
#endif
12232
0
    break;
12233
0
            case XML_PARSER_SYSTEM_LITERAL:
12234
0
    xmlGenericError(xmlGenericErrorContext,
12235
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12236
0
    ctxt->instate = XML_PARSER_START_TAG;
12237
#ifdef DEBUG_PUSH
12238
    xmlGenericError(xmlGenericErrorContext,
12239
      "PP: entering START_TAG\n");
12240
#endif
12241
0
    break;
12242
0
            case XML_PARSER_PUBLIC_LITERAL:
12243
0
    xmlGenericError(xmlGenericErrorContext,
12244
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12245
0
    ctxt->instate = XML_PARSER_START_TAG;
12246
#ifdef DEBUG_PUSH
12247
    xmlGenericError(xmlGenericErrorContext,
12248
      "PP: entering START_TAG\n");
12249
#endif
12250
0
    break;
12251
56.8M
  }
12252
56.8M
    }
12253
9.43M
done:
12254
#ifdef DEBUG_PUSH
12255
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12256
#endif
12257
9.43M
    return(ret);
12258
391k
encoding_error:
12259
391k
    {
12260
391k
        char buffer[150];
12261
12262
391k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12263
391k
      ctxt->input->cur[0], ctxt->input->cur[1],
12264
391k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12265
391k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12266
391k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12267
391k
         BAD_CAST buffer, NULL);
12268
391k
    }
12269
391k
    return(0);
12270
10.9M
}
12271
12272
/**
12273
 * xmlParseCheckTransition:
12274
 * @ctxt:  an XML parser context
12275
 * @chunk:  a char array
12276
 * @size:  the size in byte of the chunk
12277
 *
12278
 * Check depending on the current parser state if the chunk given must be
12279
 * processed immediately or one need more data to advance on parsing.
12280
 *
12281
 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12282
 */
12283
static int
12284
12.2M
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12285
12.2M
    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12286
0
        return(-1);
12287
12.2M
    if (ctxt->instate == XML_PARSER_START_TAG) {
12288
1.78M
        if (memchr(chunk, '>', size) != NULL)
12289
1.02M
            return(1);
12290
763k
        return(0);
12291
1.78M
    }
12292
10.4M
    if (ctxt->progressive == XML_PARSER_COMMENT) {
12293
417k
        if (memchr(chunk, '>', size) != NULL)
12294
309k
            return(1);
12295
108k
        return(0);
12296
417k
    }
12297
10.0M
    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12298
1.65M
        if (memchr(chunk, '>', size) != NULL)
12299
1.24M
            return(1);
12300
400k
        return(0);
12301
1.65M
    }
12302
8.42M
    if (ctxt->progressive == XML_PARSER_PI) {
12303
151k
        if (memchr(chunk, '>', size) != NULL)
12304
121k
            return(1);
12305
29.7k
        return(0);
12306
151k
    }
12307
8.27M
    if (ctxt->instate == XML_PARSER_END_TAG) {
12308
131k
        if (memchr(chunk, '>', size) != NULL)
12309
111k
            return(1);
12310
20.0k
        return(0);
12311
131k
    }
12312
8.14M
    if ((ctxt->progressive == XML_PARSER_DTD) ||
12313
8.14M
        (ctxt->instate == XML_PARSER_DTD)) {
12314
5.38M
        if (memchr(chunk, '>', size) != NULL)
12315
4.44M
            return(1);
12316
945k
        return(0);
12317
5.38M
    }
12318
2.75M
    return(1);
12319
8.14M
}
12320
12321
/**
12322
 * xmlParseChunk:
12323
 * @ctxt:  an XML parser context
12324
 * @chunk:  an char array
12325
 * @size:  the size in byte of the chunk
12326
 * @terminate:  last chunk indicator
12327
 *
12328
 * Parse a Chunk of memory
12329
 *
12330
 * Returns zero if no error, the xmlParserErrors otherwise.
12331
 */
12332
int
12333
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12334
23.8M
              int terminate) {
12335
23.8M
    int end_in_lf = 0;
12336
23.8M
    int remain = 0;
12337
23.8M
    size_t old_avail = 0;
12338
23.8M
    size_t avail = 0;
12339
12340
23.8M
    if (ctxt == NULL)
12341
0
        return(XML_ERR_INTERNAL_ERROR);
12342
23.8M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12343
10.7M
        return(ctxt->errNo);
12344
13.1M
    if (ctxt->instate == XML_PARSER_EOF)
12345
1.83k
        return(-1);
12346
13.0M
    if (ctxt->instate == XML_PARSER_START)
12347
1.63M
        xmlDetectSAX2(ctxt);
12348
13.0M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12349
13.0M
        (chunk[size - 1] == '\r')) {
12350
186k
  end_in_lf = 1;
12351
186k
  size--;
12352
186k
    }
12353
12354
13.1M
xmldecl_done:
12355
12356
13.1M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12357
13.1M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12358
12.8M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12359
12.8M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12360
12.8M
  int res;
12361
12362
12.8M
        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12363
        /*
12364
         * Specific handling if we autodetected an encoding, we should not
12365
         * push more than the first line ... which depend on the encoding
12366
         * And only push the rest once the final encoding was detected
12367
         */
12368
12.8M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12369
12.8M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12370
155k
            unsigned int len = 45;
12371
12372
155k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12373
155k
                               BAD_CAST "UTF-16")) ||
12374
155k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12375
36.6k
                               BAD_CAST "UTF16")))
12376
118k
                len = 90;
12377
36.6k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12378
36.6k
                                    BAD_CAST "UCS-4")) ||
12379
36.6k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12380
36.1k
                                    BAD_CAST "UCS4")))
12381
550
                len = 180;
12382
12383
155k
            if (ctxt->input->buf->rawconsumed < len)
12384
11.8k
                len -= ctxt->input->buf->rawconsumed;
12385
12386
            /*
12387
             * Change size for reading the initial declaration only
12388
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12389
             * will blindly copy extra bytes from memory.
12390
             */
12391
155k
            if ((unsigned int) size > len) {
12392
103k
                remain = size - len;
12393
103k
                size = len;
12394
103k
            } else {
12395
51.7k
                remain = 0;
12396
51.7k
            }
12397
155k
        }
12398
12.8M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12399
12.8M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12400
12.8M
  if (res < 0) {
12401
4.20k
      ctxt->errNo = XML_PARSER_EOF;
12402
4.20k
      xmlHaltParser(ctxt);
12403
4.20k
      return (XML_PARSER_EOF);
12404
4.20k
  }
12405
#ifdef DEBUG_PUSH
12406
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12407
#endif
12408
12409
12.8M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12410
353k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12411
353k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12412
353k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12413
353k
        (in->raw != NULL)) {
12414
28.2k
    int nbchars;
12415
28.2k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12416
28.2k
    size_t current = ctxt->input->cur - ctxt->input->base;
12417
12418
28.2k
    nbchars = xmlCharEncInput(in, terminate);
12419
28.2k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12420
28.2k
    if (nbchars < 0) {
12421
        /* TODO 2.6.0 */
12422
4.52k
        xmlGenericError(xmlGenericErrorContext,
12423
4.52k
            "xmlParseChunk: encoder error\n");
12424
4.52k
                    xmlHaltParser(ctxt);
12425
4.52k
        return(XML_ERR_INVALID_ENCODING);
12426
4.52k
    }
12427
28.2k
      }
12428
353k
  }
12429
353k
    }
12430
13.1M
    if (remain != 0) {
12431
102k
        xmlParseTryOrFinish(ctxt, 0);
12432
13.0M
    } else {
12433
13.0M
        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12434
13.0M
            avail = xmlBufUse(ctxt->input->buf->buffer);
12435
        /*
12436
         * Depending on the current state it may not be such
12437
         * a good idea to try parsing if there is nothing in the chunk
12438
         * which would be worth doing a parser state transition and we
12439
         * need to wait for more data
12440
         */
12441
13.0M
        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12442
13.0M
            (old_avail == 0) || (avail == 0) ||
12443
13.0M
            (xmlParseCheckTransition(ctxt,
12444
12.2M
                       (const char *)&ctxt->input->base[old_avail],
12445
12.2M
                                     avail - old_avail)))
12446
10.8M
            xmlParseTryOrFinish(ctxt, terminate);
12447
13.0M
    }
12448
13.1M
    if (ctxt->instate == XML_PARSER_EOF)
12449
325k
        return(ctxt->errNo);
12450
12451
12.8M
    if ((ctxt->input != NULL) &&
12452
12.8M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12453
12.8M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12454
12.8M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12455
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12456
0
        xmlHaltParser(ctxt);
12457
0
    }
12458
12.8M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12459
265k
        return(ctxt->errNo);
12460
12461
12.5M
    if (remain != 0) {
12462
100k
        chunk += size;
12463
100k
        size = remain;
12464
100k
        remain = 0;
12465
100k
        goto xmldecl_done;
12466
100k
    }
12467
12.4M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12468
12.4M
        (ctxt->input->buf != NULL)) {
12469
184k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12470
184k
           ctxt->input);
12471
184k
  size_t current = ctxt->input->cur - ctxt->input->base;
12472
12473
184k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12474
12475
184k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12476
184k
            base, current);
12477
184k
    }
12478
12.4M
    if (terminate) {
12479
  /*
12480
   * Check for termination
12481
   */
12482
182k
  int cur_avail = 0;
12483
12484
182k
  if (ctxt->input != NULL) {
12485
182k
      if (ctxt->input->buf == NULL)
12486
0
    cur_avail = ctxt->input->length -
12487
0
          (ctxt->input->cur - ctxt->input->base);
12488
182k
      else
12489
182k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12490
182k
                    (ctxt->input->cur - ctxt->input->base);
12491
182k
  }
12492
12493
182k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12494
182k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12495
118k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12496
118k
  }
12497
182k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12498
880
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12499
880
  }
12500
182k
  if (ctxt->instate != XML_PARSER_EOF) {
12501
182k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12502
182k
    ctxt->sax->endDocument(ctxt->userData);
12503
182k
  }
12504
182k
  ctxt->instate = XML_PARSER_EOF;
12505
182k
    }
12506
12.4M
    if (ctxt->wellFormed == 0)
12507
4.75M
  return((xmlParserErrors) ctxt->errNo);
12508
7.74M
    else
12509
7.74M
        return(0);
12510
12.4M
}
12511
12512
/************************************************************************
12513
 *                  *
12514
 *    I/O front end functions to the parser     *
12515
 *                  *
12516
 ************************************************************************/
12517
12518
/**
12519
 * xmlCreatePushParserCtxt:
12520
 * @sax:  a SAX handler
12521
 * @user_data:  The user data returned on SAX callbacks
12522
 * @chunk:  a pointer to an array of chars
12523
 * @size:  number of chars in the array
12524
 * @filename:  an optional file name or URI
12525
 *
12526
 * Create a parser context for using the XML parser in push mode.
12527
 * If @buffer and @size are non-NULL, the data is used to detect
12528
 * the encoding.  The remaining characters will be parsed so they
12529
 * don't need to be fed in again through xmlParseChunk.
12530
 * To allow content encoding detection, @size should be >= 4
12531
 * The value of @filename is used for fetching external entities
12532
 * and error/warning reports.
12533
 *
12534
 * Returns the new parser context or NULL
12535
 */
12536
12537
xmlParserCtxtPtr
12538
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12539
874k
                        const char *chunk, int size, const char *filename) {
12540
874k
    xmlParserCtxtPtr ctxt;
12541
874k
    xmlParserInputPtr inputStream;
12542
874k
    xmlParserInputBufferPtr buf;
12543
874k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12544
12545
    /*
12546
     * plug some encoding conversion routines
12547
     */
12548
874k
    if ((chunk != NULL) && (size >= 4))
12549
430k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12550
12551
874k
    buf = xmlAllocParserInputBuffer(enc);
12552
874k
    if (buf == NULL) return(NULL);
12553
12554
874k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12555
874k
    if (ctxt == NULL) {
12556
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12557
0
  xmlFreeParserInputBuffer(buf);
12558
0
  return(NULL);
12559
0
    }
12560
874k
    ctxt->dictNames = 1;
12561
874k
    if (filename == NULL) {
12562
437k
  ctxt->directory = NULL;
12563
437k
    } else {
12564
437k
        ctxt->directory = xmlParserGetDirectory(filename);
12565
437k
    }
12566
12567
874k
    inputStream = xmlNewInputStream(ctxt);
12568
874k
    if (inputStream == NULL) {
12569
0
  xmlFreeParserCtxt(ctxt);
12570
0
  xmlFreeParserInputBuffer(buf);
12571
0
  return(NULL);
12572
0
    }
12573
12574
874k
    if (filename == NULL)
12575
437k
  inputStream->filename = NULL;
12576
437k
    else {
12577
437k
  inputStream->filename = (char *)
12578
437k
      xmlCanonicPath((const xmlChar *) filename);
12579
437k
  if (inputStream->filename == NULL) {
12580
0
      xmlFreeParserCtxt(ctxt);
12581
0
      xmlFreeParserInputBuffer(buf);
12582
0
      return(NULL);
12583
0
  }
12584
437k
    }
12585
874k
    inputStream->buf = buf;
12586
874k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12587
874k
    inputPush(ctxt, inputStream);
12588
12589
    /*
12590
     * If the caller didn't provide an initial 'chunk' for determining
12591
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12592
     * that it can be automatically determined later
12593
     */
12594
874k
    if ((size == 0) || (chunk == NULL)) {
12595
444k
  ctxt->charset = XML_CHAR_ENCODING_NONE;
12596
444k
    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12597
430k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12598
430k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12599
12600
430k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12601
12602
430k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12603
#ifdef DEBUG_PUSH
12604
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12605
#endif
12606
430k
    }
12607
12608
874k
    if (enc != XML_CHAR_ENCODING_NONE) {
12609
171k
        xmlSwitchEncoding(ctxt, enc);
12610
171k
    }
12611
12612
874k
    return(ctxt);
12613
874k
}
12614
#endif /* LIBXML_PUSH_ENABLED */
12615
12616
/**
12617
 * xmlHaltParser:
12618
 * @ctxt:  an XML parser context
12619
 *
12620
 * Blocks further parser processing don't override error
12621
 * for internal use
12622
 */
12623
static void
12624
1.71M
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12625
1.71M
    if (ctxt == NULL)
12626
0
        return;
12627
1.71M
    ctxt->instate = XML_PARSER_EOF;
12628
1.71M
    ctxt->disableSAX = 1;
12629
1.71M
    while (ctxt->inputNr > 1)
12630
3.39k
        xmlFreeInputStream(inputPop(ctxt));
12631
1.71M
    if (ctxt->input != NULL) {
12632
        /*
12633
   * in case there was a specific allocation deallocate before
12634
   * overriding base
12635
   */
12636
1.71M
        if (ctxt->input->free != NULL) {
12637
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12638
0
      ctxt->input->free = NULL;
12639
0
  }
12640
1.71M
        if (ctxt->input->buf != NULL) {
12641
1.55M
            xmlFreeParserInputBuffer(ctxt->input->buf);
12642
1.55M
            ctxt->input->buf = NULL;
12643
1.55M
        }
12644
1.71M
  ctxt->input->cur = BAD_CAST"";
12645
1.71M
        ctxt->input->length = 0;
12646
1.71M
  ctxt->input->base = ctxt->input->cur;
12647
1.71M
        ctxt->input->end = ctxt->input->cur;
12648
1.71M
    }
12649
1.71M
}
12650
12651
/**
12652
 * xmlStopParser:
12653
 * @ctxt:  an XML parser context
12654
 *
12655
 * Blocks further parser processing
12656
 */
12657
void
12658
437k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12659
437k
    if (ctxt == NULL)
12660
0
        return;
12661
437k
    xmlHaltParser(ctxt);
12662
437k
    ctxt->errNo = XML_ERR_USER_STOP;
12663
437k
}
12664
12665
/**
12666
 * xmlCreateIOParserCtxt:
12667
 * @sax:  a SAX handler
12668
 * @user_data:  The user data returned on SAX callbacks
12669
 * @ioread:  an I/O read function
12670
 * @ioclose:  an I/O close function
12671
 * @ioctx:  an I/O handler
12672
 * @enc:  the charset encoding if known
12673
 *
12674
 * Create a parser context for using the XML parser with an existing
12675
 * I/O stream
12676
 *
12677
 * Returns the new parser context or NULL
12678
 */
12679
xmlParserCtxtPtr
12680
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12681
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12682
0
  void *ioctx, xmlCharEncoding enc) {
12683
0
    xmlParserCtxtPtr ctxt;
12684
0
    xmlParserInputPtr inputStream;
12685
0
    xmlParserInputBufferPtr buf;
12686
12687
0
    if (ioread == NULL) return(NULL);
12688
12689
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12690
0
    if (buf == NULL) {
12691
0
        if (ioclose != NULL)
12692
0
            ioclose(ioctx);
12693
0
        return (NULL);
12694
0
    }
12695
12696
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12697
0
    if (ctxt == NULL) {
12698
0
  xmlFreeParserInputBuffer(buf);
12699
0
  return(NULL);
12700
0
    }
12701
12702
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12703
0
    if (inputStream == NULL) {
12704
0
  xmlFreeParserCtxt(ctxt);
12705
0
  return(NULL);
12706
0
    }
12707
0
    inputPush(ctxt, inputStream);
12708
12709
0
    return(ctxt);
12710
0
}
12711
12712
#ifdef LIBXML_VALID_ENABLED
12713
/************************************************************************
12714
 *                  *
12715
 *    Front ends when parsing a DTD       *
12716
 *                  *
12717
 ************************************************************************/
12718
12719
/**
12720
 * xmlIOParseDTD:
12721
 * @sax:  the SAX handler block or NULL
12722
 * @input:  an Input Buffer
12723
 * @enc:  the charset encoding if known
12724
 *
12725
 * Load and parse a DTD
12726
 *
12727
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12728
 * @input will be freed by the function in any case.
12729
 */
12730
12731
xmlDtdPtr
12732
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12733
0
        xmlCharEncoding enc) {
12734
0
    xmlDtdPtr ret = NULL;
12735
0
    xmlParserCtxtPtr ctxt;
12736
0
    xmlParserInputPtr pinput = NULL;
12737
0
    xmlChar start[4];
12738
12739
0
    if (input == NULL)
12740
0
  return(NULL);
12741
12742
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12743
0
    if (ctxt == NULL) {
12744
0
        xmlFreeParserInputBuffer(input);
12745
0
  return(NULL);
12746
0
    }
12747
12748
    /* We are loading a DTD */
12749
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12750
12751
0
    xmlDetectSAX2(ctxt);
12752
12753
    /*
12754
     * generate a parser input from the I/O handler
12755
     */
12756
12757
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12758
0
    if (pinput == NULL) {
12759
0
        xmlFreeParserInputBuffer(input);
12760
0
  xmlFreeParserCtxt(ctxt);
12761
0
  return(NULL);
12762
0
    }
12763
12764
    /*
12765
     * plug some encoding conversion routines here.
12766
     */
12767
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12768
0
  xmlFreeParserCtxt(ctxt);
12769
0
  return(NULL);
12770
0
    }
12771
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12772
0
        xmlSwitchEncoding(ctxt, enc);
12773
0
    }
12774
12775
0
    pinput->filename = NULL;
12776
0
    pinput->line = 1;
12777
0
    pinput->col = 1;
12778
0
    pinput->base = ctxt->input->cur;
12779
0
    pinput->cur = ctxt->input->cur;
12780
0
    pinput->free = NULL;
12781
12782
    /*
12783
     * let's parse that entity knowing it's an external subset.
12784
     */
12785
0
    ctxt->inSubset = 2;
12786
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12787
0
    if (ctxt->myDoc == NULL) {
12788
0
  xmlErrMemory(ctxt, "New Doc failed");
12789
0
  return(NULL);
12790
0
    }
12791
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12792
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793
0
                                 BAD_CAST "none", BAD_CAST "none");
12794
12795
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12796
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12797
  /*
12798
   * Get the 4 first bytes and decode the charset
12799
   * if enc != XML_CHAR_ENCODING_NONE
12800
   * plug some encoding conversion routines.
12801
   */
12802
0
  start[0] = RAW;
12803
0
  start[1] = NXT(1);
12804
0
  start[2] = NXT(2);
12805
0
  start[3] = NXT(3);
12806
0
  enc = xmlDetectCharEncoding(start, 4);
12807
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12808
0
      xmlSwitchEncoding(ctxt, enc);
12809
0
  }
12810
0
    }
12811
12812
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12813
12814
0
    if (ctxt->myDoc != NULL) {
12815
0
  if (ctxt->wellFormed) {
12816
0
      ret = ctxt->myDoc->extSubset;
12817
0
      ctxt->myDoc->extSubset = NULL;
12818
0
      if (ret != NULL) {
12819
0
    xmlNodePtr tmp;
12820
12821
0
    ret->doc = NULL;
12822
0
    tmp = ret->children;
12823
0
    while (tmp != NULL) {
12824
0
        tmp->doc = NULL;
12825
0
        tmp = tmp->next;
12826
0
    }
12827
0
      }
12828
0
  } else {
12829
0
      ret = NULL;
12830
0
  }
12831
0
        xmlFreeDoc(ctxt->myDoc);
12832
0
        ctxt->myDoc = NULL;
12833
0
    }
12834
0
    xmlFreeParserCtxt(ctxt);
12835
12836
0
    return(ret);
12837
0
}
12838
12839
/**
12840
 * xmlSAXParseDTD:
12841
 * @sax:  the SAX handler block
12842
 * @ExternalID:  a NAME* containing the External ID of the DTD
12843
 * @SystemID:  a NAME* containing the URL to the DTD
12844
 *
12845
 * DEPRECATED: Don't use.
12846
 *
12847
 * Load and parse an external subset.
12848
 *
12849
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12850
 */
12851
12852
xmlDtdPtr
12853
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12854
0
                          const xmlChar *SystemID) {
12855
0
    xmlDtdPtr ret = NULL;
12856
0
    xmlParserCtxtPtr ctxt;
12857
0
    xmlParserInputPtr input = NULL;
12858
0
    xmlCharEncoding enc;
12859
0
    xmlChar* systemIdCanonic;
12860
12861
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12862
12863
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12864
0
    if (ctxt == NULL) {
12865
0
  return(NULL);
12866
0
    }
12867
12868
    /* We are loading a DTD */
12869
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12870
12871
    /*
12872
     * Canonicalise the system ID
12873
     */
12874
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12875
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12876
0
  xmlFreeParserCtxt(ctxt);
12877
0
  return(NULL);
12878
0
    }
12879
12880
    /*
12881
     * Ask the Entity resolver to load the damn thing
12882
     */
12883
12884
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12885
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12886
0
                                   systemIdCanonic);
12887
0
    if (input == NULL) {
12888
0
  xmlFreeParserCtxt(ctxt);
12889
0
  if (systemIdCanonic != NULL)
12890
0
      xmlFree(systemIdCanonic);
12891
0
  return(NULL);
12892
0
    }
12893
12894
    /*
12895
     * plug some encoding conversion routines here.
12896
     */
12897
0
    if (xmlPushInput(ctxt, input) < 0) {
12898
0
  xmlFreeParserCtxt(ctxt);
12899
0
  if (systemIdCanonic != NULL)
12900
0
      xmlFree(systemIdCanonic);
12901
0
  return(NULL);
12902
0
    }
12903
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12904
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12905
0
  xmlSwitchEncoding(ctxt, enc);
12906
0
    }
12907
12908
0
    if (input->filename == NULL)
12909
0
  input->filename = (char *) systemIdCanonic;
12910
0
    else
12911
0
  xmlFree(systemIdCanonic);
12912
0
    input->line = 1;
12913
0
    input->col = 1;
12914
0
    input->base = ctxt->input->cur;
12915
0
    input->cur = ctxt->input->cur;
12916
0
    input->free = NULL;
12917
12918
    /*
12919
     * let's parse that entity knowing it's an external subset.
12920
     */
12921
0
    ctxt->inSubset = 2;
12922
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12923
0
    if (ctxt->myDoc == NULL) {
12924
0
  xmlErrMemory(ctxt, "New Doc failed");
12925
0
  xmlFreeParserCtxt(ctxt);
12926
0
  return(NULL);
12927
0
    }
12928
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12929
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12930
0
                                 ExternalID, SystemID);
12931
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12932
12933
0
    if (ctxt->myDoc != NULL) {
12934
0
  if (ctxt->wellFormed) {
12935
0
      ret = ctxt->myDoc->extSubset;
12936
0
      ctxt->myDoc->extSubset = NULL;
12937
0
      if (ret != NULL) {
12938
0
    xmlNodePtr tmp;
12939
12940
0
    ret->doc = NULL;
12941
0
    tmp = ret->children;
12942
0
    while (tmp != NULL) {
12943
0
        tmp->doc = NULL;
12944
0
        tmp = tmp->next;
12945
0
    }
12946
0
      }
12947
0
  } else {
12948
0
      ret = NULL;
12949
0
  }
12950
0
        xmlFreeDoc(ctxt->myDoc);
12951
0
        ctxt->myDoc = NULL;
12952
0
    }
12953
0
    xmlFreeParserCtxt(ctxt);
12954
12955
0
    return(ret);
12956
0
}
12957
12958
12959
/**
12960
 * xmlParseDTD:
12961
 * @ExternalID:  a NAME* containing the External ID of the DTD
12962
 * @SystemID:  a NAME* containing the URL to the DTD
12963
 *
12964
 * Load and parse an external subset.
12965
 *
12966
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12967
 */
12968
12969
xmlDtdPtr
12970
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12971
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12972
0
}
12973
#endif /* LIBXML_VALID_ENABLED */
12974
12975
/************************************************************************
12976
 *                  *
12977
 *    Front ends when parsing an Entity     *
12978
 *                  *
12979
 ************************************************************************/
12980
12981
/**
12982
 * xmlParseCtxtExternalEntity:
12983
 * @ctx:  the existing parsing context
12984
 * @URL:  the URL for the entity to load
12985
 * @ID:  the System ID for the entity to load
12986
 * @lst:  the return value for the set of parsed nodes
12987
 *
12988
 * Parse an external general entity within an existing parsing context
12989
 * An external general parsed entity is well-formed if it matches the
12990
 * production labeled extParsedEnt.
12991
 *
12992
 * [78] extParsedEnt ::= TextDecl? content
12993
 *
12994
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12995
 *    the parser error code otherwise
12996
 */
12997
12998
int
12999
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13000
0
                 const xmlChar *ID, xmlNodePtr *lst) {
13001
0
    void *userData;
13002
13003
0
    if (ctx == NULL) return(-1);
13004
    /*
13005
     * If the user provided their own SAX callbacks, then reuse the
13006
     * userData callback field, otherwise the expected setup in a
13007
     * DOM builder is to have userData == ctxt
13008
     */
13009
0
    if (ctx->userData == ctx)
13010
0
        userData = NULL;
13011
0
    else
13012
0
        userData = ctx->userData;
13013
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
13014
0
                                         userData, ctx->depth + 1,
13015
0
                                         URL, ID, lst);
13016
0
}
13017
13018
/**
13019
 * xmlParseExternalEntityPrivate:
13020
 * @doc:  the document the chunk pertains to
13021
 * @oldctxt:  the previous parser context if available
13022
 * @sax:  the SAX handler block (possibly NULL)
13023
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13024
 * @depth:  Used for loop detection, use 0
13025
 * @URL:  the URL for the entity to load
13026
 * @ID:  the System ID for the entity to load
13027
 * @list:  the return value for the set of parsed nodes
13028
 *
13029
 * Private version of xmlParseExternalEntity()
13030
 *
13031
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13032
 *    the parser error code otherwise
13033
 */
13034
13035
static xmlParserErrors
13036
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13037
                xmlSAXHandlerPtr sax,
13038
          void *user_data, int depth, const xmlChar *URL,
13039
5.52M
          const xmlChar *ID, xmlNodePtr *list) {
13040
5.52M
    xmlParserCtxtPtr ctxt;
13041
5.52M
    xmlDocPtr newDoc;
13042
5.52M
    xmlNodePtr newRoot;
13043
5.52M
    xmlParserErrors ret = XML_ERR_OK;
13044
5.52M
    xmlChar start[4];
13045
5.52M
    xmlCharEncoding enc;
13046
13047
5.52M
    if (((depth > 40) &&
13048
5.52M
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13049
5.52M
  (depth > 1024)) {
13050
4.92k
  return(XML_ERR_ENTITY_LOOP);
13051
4.92k
    }
13052
13053
5.52M
    if (list != NULL)
13054
5.48M
        *list = NULL;
13055
5.52M
    if ((URL == NULL) && (ID == NULL))
13056
556
  return(XML_ERR_INTERNAL_ERROR);
13057
5.52M
    if (doc == NULL)
13058
0
  return(XML_ERR_INTERNAL_ERROR);
13059
13060
5.52M
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
13061
5.52M
                                             oldctxt);
13062
5.52M
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13063
5.46M
    xmlDetectSAX2(ctxt);
13064
13065
5.46M
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13066
5.46M
    if (newDoc == NULL) {
13067
0
  xmlFreeParserCtxt(ctxt);
13068
0
  return(XML_ERR_INTERNAL_ERROR);
13069
0
    }
13070
5.46M
    newDoc->properties = XML_DOC_INTERNAL;
13071
5.46M
    if (doc) {
13072
5.46M
        newDoc->intSubset = doc->intSubset;
13073
5.46M
        newDoc->extSubset = doc->extSubset;
13074
5.46M
        if (doc->dict) {
13075
3.09M
            newDoc->dict = doc->dict;
13076
3.09M
            xmlDictReference(newDoc->dict);
13077
3.09M
        }
13078
5.46M
        if (doc->URL != NULL) {
13079
3.04M
            newDoc->URL = xmlStrdup(doc->URL);
13080
3.04M
        }
13081
5.46M
    }
13082
5.46M
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13083
5.46M
    if (newRoot == NULL) {
13084
0
  if (sax != NULL)
13085
0
  xmlFreeParserCtxt(ctxt);
13086
0
  newDoc->intSubset = NULL;
13087
0
  newDoc->extSubset = NULL;
13088
0
        xmlFreeDoc(newDoc);
13089
0
  return(XML_ERR_INTERNAL_ERROR);
13090
0
    }
13091
5.46M
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13092
5.46M
    nodePush(ctxt, newDoc->children);
13093
5.46M
    if (doc == NULL) {
13094
0
        ctxt->myDoc = newDoc;
13095
5.46M
    } else {
13096
5.46M
        ctxt->myDoc = doc;
13097
5.46M
        newRoot->doc = doc;
13098
5.46M
    }
13099
13100
    /*
13101
     * Get the 4 first bytes and decode the charset
13102
     * if enc != XML_CHAR_ENCODING_NONE
13103
     * plug some encoding conversion routines.
13104
     */
13105
5.46M
    GROW;
13106
5.46M
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13107
5.46M
  start[0] = RAW;
13108
5.46M
  start[1] = NXT(1);
13109
5.46M
  start[2] = NXT(2);
13110
5.46M
  start[3] = NXT(3);
13111
5.46M
  enc = xmlDetectCharEncoding(start, 4);
13112
5.46M
  if (enc != XML_CHAR_ENCODING_NONE) {
13113
28.6k
      xmlSwitchEncoding(ctxt, enc);
13114
28.6k
  }
13115
5.46M
    }
13116
13117
    /*
13118
     * Parse a possible text declaration first
13119
     */
13120
5.46M
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13121
26.5k
  xmlParseTextDecl(ctxt);
13122
        /*
13123
         * An XML-1.0 document can't reference an entity not XML-1.0
13124
         */
13125
26.5k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13126
26.5k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13127
202
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13128
202
                           "Version mismatch between document and entity\n");
13129
202
        }
13130
26.5k
    }
13131
13132
5.46M
    ctxt->instate = XML_PARSER_CONTENT;
13133
5.46M
    ctxt->depth = depth;
13134
5.46M
    if (oldctxt != NULL) {
13135
5.46M
  ctxt->_private = oldctxt->_private;
13136
5.46M
  ctxt->loadsubset = oldctxt->loadsubset;
13137
5.46M
  ctxt->validate = oldctxt->validate;
13138
5.46M
  ctxt->valid = oldctxt->valid;
13139
5.46M
  ctxt->replaceEntities = oldctxt->replaceEntities;
13140
5.46M
        if (oldctxt->validate) {
13141
5.08M
            ctxt->vctxt.error = oldctxt->vctxt.error;
13142
5.08M
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
13143
5.08M
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
13144
5.08M
        }
13145
5.46M
  ctxt->external = oldctxt->external;
13146
5.46M
        if (ctxt->dict) xmlDictFree(ctxt->dict);
13147
5.46M
        ctxt->dict = oldctxt->dict;
13148
5.46M
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13149
5.46M
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13150
5.46M
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13151
5.46M
        ctxt->dictNames = oldctxt->dictNames;
13152
5.46M
        ctxt->attsDefault = oldctxt->attsDefault;
13153
5.46M
        ctxt->attsSpecial = oldctxt->attsSpecial;
13154
5.46M
        ctxt->linenumbers = oldctxt->linenumbers;
13155
5.46M
  ctxt->record_info = oldctxt->record_info;
13156
5.46M
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13157
5.46M
  ctxt->node_seq.length = oldctxt->node_seq.length;
13158
5.46M
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13159
5.46M
    } else {
13160
  /*
13161
   * Doing validity checking on chunk without context
13162
   * doesn't make sense
13163
   */
13164
0
  ctxt->_private = NULL;
13165
0
  ctxt->validate = 0;
13166
0
  ctxt->external = 2;
13167
0
  ctxt->loadsubset = 0;
13168
0
    }
13169
13170
5.46M
    xmlParseContent(ctxt);
13171
13172
5.46M
    if ((RAW == '<') && (NXT(1) == '/')) {
13173
587k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13174
4.87M
    } else if (RAW != 0) {
13175
8.26k
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13176
8.26k
    }
13177
5.46M
    if (ctxt->node != newDoc->children) {
13178
3.86M
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13179
3.86M
    }
13180
13181
5.46M
    if (!ctxt->wellFormed) {
13182
5.43M
        if (ctxt->errNo == 0)
13183
0
      ret = XML_ERR_INTERNAL_ERROR;
13184
5.43M
  else
13185
5.43M
      ret = (xmlParserErrors)ctxt->errNo;
13186
5.43M
    } else {
13187
24.4k
  if (list != NULL) {
13188
7.94k
      xmlNodePtr cur;
13189
13190
      /*
13191
       * Return the newly created nodeset after unlinking it from
13192
       * they pseudo parent.
13193
       */
13194
7.94k
      cur = newDoc->children->children;
13195
7.94k
      *list = cur;
13196
13.6k
      while (cur != NULL) {
13197
5.71k
    cur->parent = NULL;
13198
5.71k
    cur = cur->next;
13199
5.71k
      }
13200
7.94k
            newDoc->children->children = NULL;
13201
7.94k
  }
13202
24.4k
  ret = XML_ERR_OK;
13203
24.4k
    }
13204
13205
    /*
13206
     * Record in the parent context the number of entities replacement
13207
     * done when parsing that reference.
13208
     */
13209
5.46M
    if (oldctxt != NULL)
13210
5.45M
        oldctxt->nbentities += ctxt->nbentities;
13211
13212
    /*
13213
     * Also record the size of the entity parsed
13214
     */
13215
5.46M
    if (ctxt->input != NULL && oldctxt != NULL) {
13216
5.45M
  oldctxt->sizeentities += ctxt->input->consumed;
13217
5.45M
  oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13218
5.45M
    }
13219
    /*
13220
     * And record the last error if any
13221
     */
13222
5.46M
    if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13223
5.43M
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13224
13225
5.46M
    if (oldctxt != NULL) {
13226
5.45M
        ctxt->dict = NULL;
13227
5.45M
        ctxt->attsDefault = NULL;
13228
5.45M
        ctxt->attsSpecial = NULL;
13229
5.45M
        oldctxt->validate = ctxt->validate;
13230
5.45M
        oldctxt->valid = ctxt->valid;
13231
5.45M
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13232
5.45M
        oldctxt->node_seq.length = ctxt->node_seq.length;
13233
5.45M
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13234
5.45M
    }
13235
5.46M
    ctxt->node_seq.maximum = 0;
13236
5.46M
    ctxt->node_seq.length = 0;
13237
5.46M
    ctxt->node_seq.buffer = NULL;
13238
5.46M
    xmlFreeParserCtxt(ctxt);
13239
5.46M
    newDoc->intSubset = NULL;
13240
5.46M
    newDoc->extSubset = NULL;
13241
5.46M
    xmlFreeDoc(newDoc);
13242
13243
5.46M
    return(ret);
13244
5.46M
}
13245
13246
#ifdef LIBXML_SAX1_ENABLED
13247
/**
13248
 * xmlParseExternalEntity:
13249
 * @doc:  the document the chunk pertains to
13250
 * @sax:  the SAX handler block (possibly NULL)
13251
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13252
 * @depth:  Used for loop detection, use 0
13253
 * @URL:  the URL for the entity to load
13254
 * @ID:  the System ID for the entity to load
13255
 * @lst:  the return value for the set of parsed nodes
13256
 *
13257
 * Parse an external general entity
13258
 * An external general parsed entity is well-formed if it matches the
13259
 * production labeled extParsedEnt.
13260
 *
13261
 * [78] extParsedEnt ::= TextDecl? content
13262
 *
13263
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13264
 *    the parser error code otherwise
13265
 */
13266
13267
int
13268
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13269
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13270
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13271
0
                           ID, lst));
13272
0
}
13273
13274
/**
13275
 * xmlParseBalancedChunkMemory:
13276
 * @doc:  the document the chunk pertains to (must not be NULL)
13277
 * @sax:  the SAX handler block (possibly NULL)
13278
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13279
 * @depth:  Used for loop detection, use 0
13280
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13281
 * @lst:  the return value for the set of parsed nodes
13282
 *
13283
 * Parse a well-balanced chunk of an XML document
13284
 * called by the parser
13285
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13286
 * the content production in the XML grammar:
13287
 *
13288
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13289
 *
13290
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13291
 *    the parser error code otherwise
13292
 */
13293
13294
int
13295
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13296
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13297
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13298
0
                                                depth, string, lst, 0 );
13299
0
}
13300
#endif /* LIBXML_SAX1_ENABLED */
13301
13302
/**
13303
 * xmlParseBalancedChunkMemoryInternal:
13304
 * @oldctxt:  the existing parsing context
13305
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13306
 * @user_data:  the user data field for the parser context
13307
 * @lst:  the return value for the set of parsed nodes
13308
 *
13309
 *
13310
 * Parse a well-balanced chunk of an XML document
13311
 * called by the parser
13312
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13313
 * the content production in the XML grammar:
13314
 *
13315
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13316
 *
13317
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13318
 * error code otherwise
13319
 *
13320
 * In case recover is set to 1, the nodelist will not be empty even if
13321
 * the parsed chunk is not well balanced.
13322
 */
13323
static xmlParserErrors
13324
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13325
438k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13326
438k
    xmlParserCtxtPtr ctxt;
13327
438k
    xmlDocPtr newDoc = NULL;
13328
438k
    xmlNodePtr newRoot;
13329
438k
    xmlSAXHandlerPtr oldsax = NULL;
13330
438k
    xmlNodePtr content = NULL;
13331
438k
    xmlNodePtr last = NULL;
13332
438k
    int size;
13333
438k
    xmlParserErrors ret = XML_ERR_OK;
13334
438k
#ifdef SAX2
13335
438k
    int i;
13336
438k
#endif
13337
13338
438k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13339
438k
        (oldctxt->depth >  1024)) {
13340
1.13k
  return(XML_ERR_ENTITY_LOOP);
13341
1.13k
    }
13342
13343
13344
437k
    if (lst != NULL)
13345
427k
        *lst = NULL;
13346
437k
    if (string == NULL)
13347
114
        return(XML_ERR_INTERNAL_ERROR);
13348
13349
437k
    size = xmlStrlen(string);
13350
13351
437k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13352
437k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13353
426k
    if (user_data != NULL)
13354
0
  ctxt->userData = user_data;
13355
426k
    else
13356
426k
  ctxt->userData = ctxt;
13357
426k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13358
426k
    ctxt->dict = oldctxt->dict;
13359
426k
    ctxt->input_id = oldctxt->input_id + 1;
13360
426k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13361
426k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13362
426k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13363
13364
426k
#ifdef SAX2
13365
    /* propagate namespaces down the entity */
13366
3.19M
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13367
2.76M
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13368
2.76M
    }
13369
426k
#endif
13370
13371
426k
    oldsax = ctxt->sax;
13372
426k
    ctxt->sax = oldctxt->sax;
13373
426k
    xmlDetectSAX2(ctxt);
13374
426k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13375
426k
    ctxt->options = oldctxt->options;
13376
13377
426k
    ctxt->_private = oldctxt->_private;
13378
426k
    if (oldctxt->myDoc == NULL) {
13379
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13380
0
  if (newDoc == NULL) {
13381
0
      ctxt->sax = oldsax;
13382
0
      ctxt->dict = NULL;
13383
0
      xmlFreeParserCtxt(ctxt);
13384
0
      return(XML_ERR_INTERNAL_ERROR);
13385
0
  }
13386
0
  newDoc->properties = XML_DOC_INTERNAL;
13387
0
  newDoc->dict = ctxt->dict;
13388
0
  xmlDictReference(newDoc->dict);
13389
0
  ctxt->myDoc = newDoc;
13390
426k
    } else {
13391
426k
  ctxt->myDoc = oldctxt->myDoc;
13392
426k
        content = ctxt->myDoc->children;
13393
426k
  last = ctxt->myDoc->last;
13394
426k
    }
13395
426k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13396
426k
    if (newRoot == NULL) {
13397
0
  ctxt->sax = oldsax;
13398
0
  ctxt->dict = NULL;
13399
0
  xmlFreeParserCtxt(ctxt);
13400
0
  if (newDoc != NULL) {
13401
0
      xmlFreeDoc(newDoc);
13402
0
  }
13403
0
  return(XML_ERR_INTERNAL_ERROR);
13404
0
    }
13405
426k
    ctxt->myDoc->children = NULL;
13406
426k
    ctxt->myDoc->last = NULL;
13407
426k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13408
426k
    nodePush(ctxt, ctxt->myDoc->children);
13409
426k
    ctxt->instate = XML_PARSER_CONTENT;
13410
426k
    ctxt->depth = oldctxt->depth + 1;
13411
13412
426k
    ctxt->validate = 0;
13413
426k
    ctxt->loadsubset = oldctxt->loadsubset;
13414
426k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13415
  /*
13416
   * ID/IDREF registration will be done in xmlValidateElement below
13417
   */
13418
112k
  ctxt->loadsubset |= XML_SKIP_IDS;
13419
112k
    }
13420
426k
    ctxt->dictNames = oldctxt->dictNames;
13421
426k
    ctxt->attsDefault = oldctxt->attsDefault;
13422
426k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13423
13424
426k
    xmlParseContent(ctxt);
13425
426k
    if ((RAW == '<') && (NXT(1) == '/')) {
13426
8.11k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13427
418k
    } else if (RAW != 0) {
13428
781
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13429
781
    }
13430
426k
    if (ctxt->node != ctxt->myDoc->children) {
13431
165k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13432
165k
    }
13433
13434
426k
    if (!ctxt->wellFormed) {
13435
368k
        if (ctxt->errNo == 0)
13436
0
      ret = XML_ERR_INTERNAL_ERROR;
13437
368k
  else
13438
368k
      ret = (xmlParserErrors)ctxt->errNo;
13439
368k
    } else {
13440
58.4k
      ret = XML_ERR_OK;
13441
58.4k
    }
13442
13443
426k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13444
58.4k
  xmlNodePtr cur;
13445
13446
  /*
13447
   * Return the newly created nodeset after unlinking it from
13448
   * they pseudo parent.
13449
   */
13450
58.4k
  cur = ctxt->myDoc->children->children;
13451
58.4k
  *lst = cur;
13452
141k
  while (cur != NULL) {
13453
83.3k
#ifdef LIBXML_VALID_ENABLED
13454
83.3k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13455
83.3k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13456
83.3k
    (cur->type == XML_ELEMENT_NODE)) {
13457
11.0k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13458
11.0k
      oldctxt->myDoc, cur);
13459
11.0k
      }
13460
83.3k
#endif /* LIBXML_VALID_ENABLED */
13461
83.3k
      cur->parent = NULL;
13462
83.3k
      cur = cur->next;
13463
83.3k
  }
13464
58.4k
  ctxt->myDoc->children->children = NULL;
13465
58.4k
    }
13466
426k
    if (ctxt->myDoc != NULL) {
13467
426k
  xmlFreeNode(ctxt->myDoc->children);
13468
426k
        ctxt->myDoc->children = content;
13469
426k
        ctxt->myDoc->last = last;
13470
426k
    }
13471
13472
    /*
13473
     * Record in the parent context the number of entities replacement
13474
     * done when parsing that reference.
13475
     */
13476
426k
    if (oldctxt != NULL)
13477
426k
        oldctxt->nbentities += ctxt->nbentities;
13478
13479
    /*
13480
     * Also record the last error if any
13481
     */
13482
426k
    if (ctxt->lastError.code != XML_ERR_OK)
13483
370k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13484
13485
426k
    ctxt->sax = oldsax;
13486
426k
    ctxt->dict = NULL;
13487
426k
    ctxt->attsDefault = NULL;
13488
426k
    ctxt->attsSpecial = NULL;
13489
426k
    xmlFreeParserCtxt(ctxt);
13490
426k
    if (newDoc != NULL) {
13491
0
  xmlFreeDoc(newDoc);
13492
0
    }
13493
13494
426k
    return(ret);
13495
426k
}
13496
13497
/**
13498
 * xmlParseInNodeContext:
13499
 * @node:  the context node
13500
 * @data:  the input string
13501
 * @datalen:  the input string length in bytes
13502
 * @options:  a combination of xmlParserOption
13503
 * @lst:  the return value for the set of parsed nodes
13504
 *
13505
 * Parse a well-balanced chunk of an XML document
13506
 * within the context (DTD, namespaces, etc ...) of the given node.
13507
 *
13508
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13509
 * the content production in the XML grammar:
13510
 *
13511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13512
 *
13513
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13514
 * error code otherwise
13515
 */
13516
xmlParserErrors
13517
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13518
0
                      int options, xmlNodePtr *lst) {
13519
0
#ifdef SAX2
13520
0
    xmlParserCtxtPtr ctxt;
13521
0
    xmlDocPtr doc = NULL;
13522
0
    xmlNodePtr fake, cur;
13523
0
    int nsnr = 0;
13524
13525
0
    xmlParserErrors ret = XML_ERR_OK;
13526
13527
    /*
13528
     * check all input parameters, grab the document
13529
     */
13530
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13531
0
        return(XML_ERR_INTERNAL_ERROR);
13532
0
    switch (node->type) {
13533
0
        case XML_ELEMENT_NODE:
13534
0
        case XML_ATTRIBUTE_NODE:
13535
0
        case XML_TEXT_NODE:
13536
0
        case XML_CDATA_SECTION_NODE:
13537
0
        case XML_ENTITY_REF_NODE:
13538
0
        case XML_PI_NODE:
13539
0
        case XML_COMMENT_NODE:
13540
0
        case XML_DOCUMENT_NODE:
13541
0
        case XML_HTML_DOCUMENT_NODE:
13542
0
      break;
13543
0
  default:
13544
0
      return(XML_ERR_INTERNAL_ERROR);
13545
13546
0
    }
13547
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13548
0
           (node->type != XML_DOCUMENT_NODE) &&
13549
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13550
0
  node = node->parent;
13551
0
    if (node == NULL)
13552
0
  return(XML_ERR_INTERNAL_ERROR);
13553
0
    if (node->type == XML_ELEMENT_NODE)
13554
0
  doc = node->doc;
13555
0
    else
13556
0
        doc = (xmlDocPtr) node;
13557
0
    if (doc == NULL)
13558
0
  return(XML_ERR_INTERNAL_ERROR);
13559
13560
    /*
13561
     * allocate a context and set-up everything not related to the
13562
     * node position in the tree
13563
     */
13564
0
    if (doc->type == XML_DOCUMENT_NODE)
13565
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13566
0
#ifdef LIBXML_HTML_ENABLED
13567
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13568
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13569
        /*
13570
         * When parsing in context, it makes no sense to add implied
13571
         * elements like html/body/etc...
13572
         */
13573
0
        options |= HTML_PARSE_NOIMPLIED;
13574
0
    }
13575
0
#endif
13576
0
    else
13577
0
        return(XML_ERR_INTERNAL_ERROR);
13578
13579
0
    if (ctxt == NULL)
13580
0
        return(XML_ERR_NO_MEMORY);
13581
13582
    /*
13583
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13584
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13585
     * we must wait until the last moment to free the original one.
13586
     */
13587
0
    if (doc->dict != NULL) {
13588
0
        if (ctxt->dict != NULL)
13589
0
      xmlDictFree(ctxt->dict);
13590
0
  ctxt->dict = doc->dict;
13591
0
    } else
13592
0
        options |= XML_PARSE_NODICT;
13593
13594
0
    if (doc->encoding != NULL) {
13595
0
        xmlCharEncodingHandlerPtr hdlr;
13596
13597
0
        if (ctxt->encoding != NULL)
13598
0
      xmlFree((xmlChar *) ctxt->encoding);
13599
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13600
13601
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13602
0
        if (hdlr != NULL) {
13603
0
            xmlSwitchToEncoding(ctxt, hdlr);
13604
0
  } else {
13605
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13606
0
        }
13607
0
    }
13608
13609
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13610
0
    xmlDetectSAX2(ctxt);
13611
0
    ctxt->myDoc = doc;
13612
    /* parsing in context, i.e. as within existing content */
13613
0
    ctxt->input_id = 2;
13614
0
    ctxt->instate = XML_PARSER_CONTENT;
13615
13616
0
    fake = xmlNewDocComment(node->doc, NULL);
13617
0
    if (fake == NULL) {
13618
0
        xmlFreeParserCtxt(ctxt);
13619
0
  return(XML_ERR_NO_MEMORY);
13620
0
    }
13621
0
    xmlAddChild(node, fake);
13622
13623
0
    if (node->type == XML_ELEMENT_NODE) {
13624
0
  nodePush(ctxt, node);
13625
  /*
13626
   * initialize the SAX2 namespaces stack
13627
   */
13628
0
  cur = node;
13629
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13630
0
      xmlNsPtr ns = cur->nsDef;
13631
0
      const xmlChar *iprefix, *ihref;
13632
13633
0
      while (ns != NULL) {
13634
0
    if (ctxt->dict) {
13635
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13636
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13637
0
    } else {
13638
0
        iprefix = ns->prefix;
13639
0
        ihref = ns->href;
13640
0
    }
13641
13642
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13643
0
        nsPush(ctxt, iprefix, ihref);
13644
0
        nsnr++;
13645
0
    }
13646
0
    ns = ns->next;
13647
0
      }
13648
0
      cur = cur->parent;
13649
0
  }
13650
0
    }
13651
13652
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13653
  /*
13654
   * ID/IDREF registration will be done in xmlValidateElement below
13655
   */
13656
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13657
0
    }
13658
13659
0
#ifdef LIBXML_HTML_ENABLED
13660
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13661
0
        __htmlParseContent(ctxt);
13662
0
    else
13663
0
#endif
13664
0
  xmlParseContent(ctxt);
13665
13666
0
    nsPop(ctxt, nsnr);
13667
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13668
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13669
0
    } else if (RAW != 0) {
13670
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13671
0
    }
13672
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13673
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13674
0
  ctxt->wellFormed = 0;
13675
0
    }
13676
13677
0
    if (!ctxt->wellFormed) {
13678
0
        if (ctxt->errNo == 0)
13679
0
      ret = XML_ERR_INTERNAL_ERROR;
13680
0
  else
13681
0
      ret = (xmlParserErrors)ctxt->errNo;
13682
0
    } else {
13683
0
        ret = XML_ERR_OK;
13684
0
    }
13685
13686
    /*
13687
     * Return the newly created nodeset after unlinking it from
13688
     * the pseudo sibling.
13689
     */
13690
13691
0
    cur = fake->next;
13692
0
    fake->next = NULL;
13693
0
    node->last = fake;
13694
13695
0
    if (cur != NULL) {
13696
0
  cur->prev = NULL;
13697
0
    }
13698
13699
0
    *lst = cur;
13700
13701
0
    while (cur != NULL) {
13702
0
  cur->parent = NULL;
13703
0
  cur = cur->next;
13704
0
    }
13705
13706
0
    xmlUnlinkNode(fake);
13707
0
    xmlFreeNode(fake);
13708
13709
13710
0
    if (ret != XML_ERR_OK) {
13711
0
        xmlFreeNodeList(*lst);
13712
0
  *lst = NULL;
13713
0
    }
13714
13715
0
    if (doc->dict != NULL)
13716
0
        ctxt->dict = NULL;
13717
0
    xmlFreeParserCtxt(ctxt);
13718
13719
0
    return(ret);
13720
#else /* !SAX2 */
13721
    return(XML_ERR_INTERNAL_ERROR);
13722
#endif
13723
0
}
13724
13725
#ifdef LIBXML_SAX1_ENABLED
13726
/**
13727
 * xmlParseBalancedChunkMemoryRecover:
13728
 * @doc:  the document the chunk pertains to (must not be NULL)
13729
 * @sax:  the SAX handler block (possibly NULL)
13730
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13731
 * @depth:  Used for loop detection, use 0
13732
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13733
 * @lst:  the return value for the set of parsed nodes
13734
 * @recover: return nodes even if the data is broken (use 0)
13735
 *
13736
 *
13737
 * Parse a well-balanced chunk of an XML document
13738
 * called by the parser
13739
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13740
 * the content production in the XML grammar:
13741
 *
13742
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13743
 *
13744
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13745
 *    the parser error code otherwise
13746
 *
13747
 * In case recover is set to 1, the nodelist will not be empty even if
13748
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13749
 * some extent.
13750
 */
13751
int
13752
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13753
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13754
0
     int recover) {
13755
0
    xmlParserCtxtPtr ctxt;
13756
0
    xmlDocPtr newDoc;
13757
0
    xmlSAXHandlerPtr oldsax = NULL;
13758
0
    xmlNodePtr content, newRoot;
13759
0
    int size;
13760
0
    int ret = 0;
13761
13762
0
    if (depth > 40) {
13763
0
  return(XML_ERR_ENTITY_LOOP);
13764
0
    }
13765
13766
13767
0
    if (lst != NULL)
13768
0
        *lst = NULL;
13769
0
    if (string == NULL)
13770
0
        return(-1);
13771
13772
0
    size = xmlStrlen(string);
13773
13774
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13775
0
    if (ctxt == NULL) return(-1);
13776
0
    ctxt->userData = ctxt;
13777
0
    if (sax != NULL) {
13778
0
  oldsax = ctxt->sax;
13779
0
        ctxt->sax = sax;
13780
0
  if (user_data != NULL)
13781
0
      ctxt->userData = user_data;
13782
0
    }
13783
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13784
0
    if (newDoc == NULL) {
13785
0
  xmlFreeParserCtxt(ctxt);
13786
0
  return(-1);
13787
0
    }
13788
0
    newDoc->properties = XML_DOC_INTERNAL;
13789
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13790
0
        xmlDictFree(ctxt->dict);
13791
0
  ctxt->dict = doc->dict;
13792
0
  xmlDictReference(ctxt->dict);
13793
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13794
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13795
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13796
0
  ctxt->dictNames = 1;
13797
0
    } else {
13798
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13799
0
    }
13800
    /* doc == NULL is only supported for historic reasons */
13801
0
    if (doc != NULL) {
13802
0
  newDoc->intSubset = doc->intSubset;
13803
0
  newDoc->extSubset = doc->extSubset;
13804
0
    }
13805
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13806
0
    if (newRoot == NULL) {
13807
0
  if (sax != NULL)
13808
0
      ctxt->sax = oldsax;
13809
0
  xmlFreeParserCtxt(ctxt);
13810
0
  newDoc->intSubset = NULL;
13811
0
  newDoc->extSubset = NULL;
13812
0
        xmlFreeDoc(newDoc);
13813
0
  return(-1);
13814
0
    }
13815
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13816
0
    nodePush(ctxt, newRoot);
13817
    /* doc == NULL is only supported for historic reasons */
13818
0
    if (doc == NULL) {
13819
0
  ctxt->myDoc = newDoc;
13820
0
    } else {
13821
0
  ctxt->myDoc = newDoc;
13822
0
  newDoc->children->doc = doc;
13823
  /* Ensure that doc has XML spec namespace */
13824
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13825
0
  newDoc->oldNs = doc->oldNs;
13826
0
    }
13827
0
    ctxt->instate = XML_PARSER_CONTENT;
13828
0
    ctxt->input_id = 2;
13829
0
    ctxt->depth = depth;
13830
13831
    /*
13832
     * Doing validity checking on chunk doesn't make sense
13833
     */
13834
0
    ctxt->validate = 0;
13835
0
    ctxt->loadsubset = 0;
13836
0
    xmlDetectSAX2(ctxt);
13837
13838
0
    if ( doc != NULL ){
13839
0
        content = doc->children;
13840
0
        doc->children = NULL;
13841
0
        xmlParseContent(ctxt);
13842
0
        doc->children = content;
13843
0
    }
13844
0
    else {
13845
0
        xmlParseContent(ctxt);
13846
0
    }
13847
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13848
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13849
0
    } else if (RAW != 0) {
13850
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13851
0
    }
13852
0
    if (ctxt->node != newDoc->children) {
13853
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13854
0
    }
13855
13856
0
    if (!ctxt->wellFormed) {
13857
0
        if (ctxt->errNo == 0)
13858
0
      ret = 1;
13859
0
  else
13860
0
      ret = ctxt->errNo;
13861
0
    } else {
13862
0
      ret = 0;
13863
0
    }
13864
13865
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13866
0
  xmlNodePtr cur;
13867
13868
  /*
13869
   * Return the newly created nodeset after unlinking it from
13870
   * they pseudo parent.
13871
   */
13872
0
  cur = newDoc->children->children;
13873
0
  *lst = cur;
13874
0
  while (cur != NULL) {
13875
0
      xmlSetTreeDoc(cur, doc);
13876
0
      cur->parent = NULL;
13877
0
      cur = cur->next;
13878
0
  }
13879
0
  newDoc->children->children = NULL;
13880
0
    }
13881
13882
0
    if (sax != NULL)
13883
0
  ctxt->sax = oldsax;
13884
0
    xmlFreeParserCtxt(ctxt);
13885
0
    newDoc->intSubset = NULL;
13886
0
    newDoc->extSubset = NULL;
13887
    /* This leaks the namespace list if doc == NULL */
13888
0
    newDoc->oldNs = NULL;
13889
0
    xmlFreeDoc(newDoc);
13890
13891
0
    return(ret);
13892
0
}
13893
13894
/**
13895
 * xmlSAXParseEntity:
13896
 * @sax:  the SAX handler block
13897
 * @filename:  the filename
13898
 *
13899
 * DEPRECATED: Don't use.
13900
 *
13901
 * parse an XML external entity out of context and build a tree.
13902
 * It use the given SAX function block to handle the parsing callback.
13903
 * If sax is NULL, fallback to the default DOM tree building routines.
13904
 *
13905
 * [78] extParsedEnt ::= TextDecl? content
13906
 *
13907
 * This correspond to a "Well Balanced" chunk
13908
 *
13909
 * Returns the resulting document tree
13910
 */
13911
13912
xmlDocPtr
13913
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13914
0
    xmlDocPtr ret;
13915
0
    xmlParserCtxtPtr ctxt;
13916
13917
0
    ctxt = xmlCreateFileParserCtxt(filename);
13918
0
    if (ctxt == NULL) {
13919
0
  return(NULL);
13920
0
    }
13921
0
    if (sax != NULL) {
13922
0
  if (ctxt->sax != NULL)
13923
0
      xmlFree(ctxt->sax);
13924
0
        ctxt->sax = sax;
13925
0
        ctxt->userData = NULL;
13926
0
    }
13927
13928
0
    xmlParseExtParsedEnt(ctxt);
13929
13930
0
    if (ctxt->wellFormed)
13931
0
  ret = ctxt->myDoc;
13932
0
    else {
13933
0
        ret = NULL;
13934
0
        xmlFreeDoc(ctxt->myDoc);
13935
0
        ctxt->myDoc = NULL;
13936
0
    }
13937
0
    if (sax != NULL)
13938
0
        ctxt->sax = NULL;
13939
0
    xmlFreeParserCtxt(ctxt);
13940
13941
0
    return(ret);
13942
0
}
13943
13944
/**
13945
 * xmlParseEntity:
13946
 * @filename:  the filename
13947
 *
13948
 * parse an XML external entity out of context and build a tree.
13949
 *
13950
 * [78] extParsedEnt ::= TextDecl? content
13951
 *
13952
 * This correspond to a "Well Balanced" chunk
13953
 *
13954
 * Returns the resulting document tree
13955
 */
13956
13957
xmlDocPtr
13958
0
xmlParseEntity(const char *filename) {
13959
0
    return(xmlSAXParseEntity(NULL, filename));
13960
0
}
13961
#endif /* LIBXML_SAX1_ENABLED */
13962
13963
/**
13964
 * xmlCreateEntityParserCtxtInternal:
13965
 * @URL:  the entity URL
13966
 * @ID:  the entity PUBLIC ID
13967
 * @base:  a possible base for the target URI
13968
 * @pctx:  parser context used to set options on new context
13969
 *
13970
 * Create a parser context for an external entity
13971
 * Automatic support for ZLIB/Compress compressed document is provided
13972
 * by default if found at compile-time.
13973
 *
13974
 * Returns the new parser context or NULL
13975
 */
13976
static xmlParserCtxtPtr
13977
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13978
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13979
5.52M
        xmlParserCtxtPtr pctx) {
13980
5.52M
    xmlParserCtxtPtr ctxt;
13981
5.52M
    xmlParserInputPtr inputStream;
13982
5.52M
    char *directory = NULL;
13983
5.52M
    xmlChar *uri;
13984
13985
5.52M
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13986
5.52M
    if (ctxt == NULL) {
13987
0
  return(NULL);
13988
0
    }
13989
13990
5.52M
    if (pctx != NULL) {
13991
5.52M
        ctxt->options = pctx->options;
13992
5.52M
        ctxt->_private = pctx->_private;
13993
  /*
13994
   * this is a subparser of pctx, so the input_id should be
13995
   * incremented to distinguish from main entity
13996
   */
13997
5.52M
  ctxt->input_id = pctx->input_id + 1;
13998
5.52M
    }
13999
14000
    /* Don't read from stdin. */
14001
5.52M
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
14002
0
        URL = BAD_CAST "./-";
14003
14004
5.52M
    uri = xmlBuildURI(URL, base);
14005
14006
5.52M
    if (uri == NULL) {
14007
28.2k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14008
28.2k
  if (inputStream == NULL) {
14009
27.9k
      xmlFreeParserCtxt(ctxt);
14010
27.9k
      return(NULL);
14011
27.9k
  }
14012
14013
331
  inputPush(ctxt, inputStream);
14014
14015
331
  if ((ctxt->directory == NULL) && (directory == NULL))
14016
331
      directory = xmlParserGetDirectory((char *)URL);
14017
331
  if ((ctxt->directory == NULL) && (directory != NULL))
14018
331
      ctxt->directory = directory;
14019
5.49M
    } else {
14020
5.49M
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14021
5.49M
  if (inputStream == NULL) {
14022
31.5k
      xmlFree(uri);
14023
31.5k
      xmlFreeParserCtxt(ctxt);
14024
31.5k
      return(NULL);
14025
31.5k
  }
14026
14027
5.46M
  inputPush(ctxt, inputStream);
14028
14029
5.46M
  if ((ctxt->directory == NULL) && (directory == NULL))
14030
5.46M
      directory = xmlParserGetDirectory((char *)uri);
14031
5.46M
  if ((ctxt->directory == NULL) && (directory != NULL))
14032
5.46M
      ctxt->directory = directory;
14033
5.46M
  xmlFree(uri);
14034
5.46M
    }
14035
5.46M
    return(ctxt);
14036
5.52M
}
14037
14038
/**
14039
 * xmlCreateEntityParserCtxt:
14040
 * @URL:  the entity URL
14041
 * @ID:  the entity PUBLIC ID
14042
 * @base:  a possible base for the target URI
14043
 *
14044
 * Create a parser context for an external entity
14045
 * Automatic support for ZLIB/Compress compressed document is provided
14046
 * by default if found at compile-time.
14047
 *
14048
 * Returns the new parser context or NULL
14049
 */
14050
xmlParserCtxtPtr
14051
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14052
0
                    const xmlChar *base) {
14053
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
14054
14055
0
}
14056
14057
/************************************************************************
14058
 *                  *
14059
 *    Front ends when parsing from a file     *
14060
 *                  *
14061
 ************************************************************************/
14062
14063
/**
14064
 * xmlCreateURLParserCtxt:
14065
 * @filename:  the filename or URL
14066
 * @options:  a combination of xmlParserOption
14067
 *
14068
 * Create a parser context for a file or URL content.
14069
 * Automatic support for ZLIB/Compress compressed document is provided
14070
 * by default if found at compile-time and for file accesses
14071
 *
14072
 * Returns the new parser context or NULL
14073
 */
14074
xmlParserCtxtPtr
14075
xmlCreateURLParserCtxt(const char *filename, int options)
14076
0
{
14077
0
    xmlParserCtxtPtr ctxt;
14078
0
    xmlParserInputPtr inputStream;
14079
0
    char *directory = NULL;
14080
14081
0
    ctxt = xmlNewParserCtxt();
14082
0
    if (ctxt == NULL) {
14083
0
  xmlErrMemory(NULL, "cannot allocate parser context");
14084
0
  return(NULL);
14085
0
    }
14086
14087
0
    if (options)
14088
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14089
0
    ctxt->linenumbers = 1;
14090
14091
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14092
0
    if (inputStream == NULL) {
14093
0
  xmlFreeParserCtxt(ctxt);
14094
0
  return(NULL);
14095
0
    }
14096
14097
0
    inputPush(ctxt, inputStream);
14098
0
    if ((ctxt->directory == NULL) && (directory == NULL))
14099
0
        directory = xmlParserGetDirectory(filename);
14100
0
    if ((ctxt->directory == NULL) && (directory != NULL))
14101
0
        ctxt->directory = directory;
14102
14103
0
    return(ctxt);
14104
0
}
14105
14106
/**
14107
 * xmlCreateFileParserCtxt:
14108
 * @filename:  the filename
14109
 *
14110
 * Create a parser context for a file content.
14111
 * Automatic support for ZLIB/Compress compressed document is provided
14112
 * by default if found at compile-time.
14113
 *
14114
 * Returns the new parser context or NULL
14115
 */
14116
xmlParserCtxtPtr
14117
xmlCreateFileParserCtxt(const char *filename)
14118
0
{
14119
0
    return(xmlCreateURLParserCtxt(filename, 0));
14120
0
}
14121
14122
#ifdef LIBXML_SAX1_ENABLED
14123
/**
14124
 * xmlSAXParseFileWithData:
14125
 * @sax:  the SAX handler block
14126
 * @filename:  the filename
14127
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14128
 *             documents
14129
 * @data:  the userdata
14130
 *
14131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14132
 *
14133
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14134
 * compressed document is provided by default if found at compile-time.
14135
 * It use the given SAX function block to handle the parsing callback.
14136
 * If sax is NULL, fallback to the default DOM tree building routines.
14137
 *
14138
 * User data (void *) is stored within the parser context in the
14139
 * context's _private member, so it is available nearly everywhere in libxml
14140
 *
14141
 * Returns the resulting document tree
14142
 */
14143
14144
xmlDocPtr
14145
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14146
0
                        int recovery, void *data) {
14147
0
    xmlDocPtr ret;
14148
0
    xmlParserCtxtPtr ctxt;
14149
14150
0
    xmlInitParser();
14151
14152
0
    ctxt = xmlCreateFileParserCtxt(filename);
14153
0
    if (ctxt == NULL) {
14154
0
  return(NULL);
14155
0
    }
14156
0
    if (sax != NULL) {
14157
0
  if (ctxt->sax != NULL)
14158
0
      xmlFree(ctxt->sax);
14159
0
        ctxt->sax = sax;
14160
0
    }
14161
0
    xmlDetectSAX2(ctxt);
14162
0
    if (data!=NULL) {
14163
0
  ctxt->_private = data;
14164
0
    }
14165
14166
0
    if (ctxt->directory == NULL)
14167
0
        ctxt->directory = xmlParserGetDirectory(filename);
14168
14169
0
    ctxt->recovery = recovery;
14170
14171
0
    xmlParseDocument(ctxt);
14172
14173
0
    if ((ctxt->wellFormed) || recovery) {
14174
0
        ret = ctxt->myDoc;
14175
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14176
0
      if (ctxt->input->buf->compressed > 0)
14177
0
    ret->compression = 9;
14178
0
      else
14179
0
    ret->compression = ctxt->input->buf->compressed;
14180
0
  }
14181
0
    }
14182
0
    else {
14183
0
       ret = NULL;
14184
0
       xmlFreeDoc(ctxt->myDoc);
14185
0
       ctxt->myDoc = NULL;
14186
0
    }
14187
0
    if (sax != NULL)
14188
0
        ctxt->sax = NULL;
14189
0
    xmlFreeParserCtxt(ctxt);
14190
14191
0
    return(ret);
14192
0
}
14193
14194
/**
14195
 * xmlSAXParseFile:
14196
 * @sax:  the SAX handler block
14197
 * @filename:  the filename
14198
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14199
 *             documents
14200
 *
14201
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14202
 *
14203
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14204
 * compressed document is provided by default if found at compile-time.
14205
 * It use the given SAX function block to handle the parsing callback.
14206
 * If sax is NULL, fallback to the default DOM tree building routines.
14207
 *
14208
 * Returns the resulting document tree
14209
 */
14210
14211
xmlDocPtr
14212
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14213
0
                          int recovery) {
14214
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14215
0
}
14216
14217
/**
14218
 * xmlRecoverDoc:
14219
 * @cur:  a pointer to an array of xmlChar
14220
 *
14221
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14222
 *
14223
 * parse an XML in-memory document and build a tree.
14224
 * In the case the document is not Well Formed, a attempt to build a
14225
 * tree is tried anyway
14226
 *
14227
 * Returns the resulting document tree or NULL in case of failure
14228
 */
14229
14230
xmlDocPtr
14231
0
xmlRecoverDoc(const xmlChar *cur) {
14232
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14233
0
}
14234
14235
/**
14236
 * xmlParseFile:
14237
 * @filename:  the filename
14238
 *
14239
 * DEPRECATED: Use xmlReadFile.
14240
 *
14241
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14242
 * compressed document is provided by default if found at compile-time.
14243
 *
14244
 * Returns the resulting document tree if the file was wellformed,
14245
 * NULL otherwise.
14246
 */
14247
14248
xmlDocPtr
14249
0
xmlParseFile(const char *filename) {
14250
0
    return(xmlSAXParseFile(NULL, filename, 0));
14251
0
}
14252
14253
/**
14254
 * xmlRecoverFile:
14255
 * @filename:  the filename
14256
 *
14257
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14258
 *
14259
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14260
 * compressed document is provided by default if found at compile-time.
14261
 * In the case the document is not Well Formed, it attempts to build
14262
 * a tree anyway
14263
 *
14264
 * Returns the resulting document tree or NULL in case of failure
14265
 */
14266
14267
xmlDocPtr
14268
0
xmlRecoverFile(const char *filename) {
14269
0
    return(xmlSAXParseFile(NULL, filename, 1));
14270
0
}
14271
14272
14273
/**
14274
 * xmlSetupParserForBuffer:
14275
 * @ctxt:  an XML parser context
14276
 * @buffer:  a xmlChar * buffer
14277
 * @filename:  a file name
14278
 *
14279
 * DEPRECATED: Don't use.
14280
 *
14281
 * Setup the parser context to parse a new buffer; Clears any prior
14282
 * contents from the parser context. The buffer parameter must not be
14283
 * NULL, but the filename parameter can be
14284
 */
14285
void
14286
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14287
                             const char* filename)
14288
0
{
14289
0
    xmlParserInputPtr input;
14290
14291
0
    if ((ctxt == NULL) || (buffer == NULL))
14292
0
        return;
14293
14294
0
    input = xmlNewInputStream(ctxt);
14295
0
    if (input == NULL) {
14296
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14297
0
        xmlClearParserCtxt(ctxt);
14298
0
        return;
14299
0
    }
14300
14301
0
    xmlClearParserCtxt(ctxt);
14302
0
    if (filename != NULL)
14303
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14304
0
    input->base = buffer;
14305
0
    input->cur = buffer;
14306
0
    input->end = &buffer[xmlStrlen(buffer)];
14307
0
    inputPush(ctxt, input);
14308
0
}
14309
14310
/**
14311
 * xmlSAXUserParseFile:
14312
 * @sax:  a SAX handler
14313
 * @user_data:  The user data returned on SAX callbacks
14314
 * @filename:  a file name
14315
 *
14316
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14317
 *
14318
 * parse an XML file and call the given SAX handler routines.
14319
 * Automatic support for ZLIB/Compress compressed document is provided
14320
 *
14321
 * Returns 0 in case of success or a error number otherwise
14322
 */
14323
int
14324
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14325
0
                    const char *filename) {
14326
0
    int ret = 0;
14327
0
    xmlParserCtxtPtr ctxt;
14328
14329
0
    ctxt = xmlCreateFileParserCtxt(filename);
14330
0
    if (ctxt == NULL) return -1;
14331
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14332
0
  xmlFree(ctxt->sax);
14333
0
    ctxt->sax = sax;
14334
0
    xmlDetectSAX2(ctxt);
14335
14336
0
    if (user_data != NULL)
14337
0
  ctxt->userData = user_data;
14338
14339
0
    xmlParseDocument(ctxt);
14340
14341
0
    if (ctxt->wellFormed)
14342
0
  ret = 0;
14343
0
    else {
14344
0
        if (ctxt->errNo != 0)
14345
0
      ret = ctxt->errNo;
14346
0
  else
14347
0
      ret = -1;
14348
0
    }
14349
0
    if (sax != NULL)
14350
0
  ctxt->sax = NULL;
14351
0
    if (ctxt->myDoc != NULL) {
14352
0
        xmlFreeDoc(ctxt->myDoc);
14353
0
  ctxt->myDoc = NULL;
14354
0
    }
14355
0
    xmlFreeParserCtxt(ctxt);
14356
14357
0
    return ret;
14358
0
}
14359
#endif /* LIBXML_SAX1_ENABLED */
14360
14361
/************************************************************************
14362
 *                  *
14363
 *    Front ends when parsing from memory     *
14364
 *                  *
14365
 ************************************************************************/
14366
14367
/**
14368
 * xmlCreateMemoryParserCtxt:
14369
 * @buffer:  a pointer to a char array
14370
 * @size:  the size of the array
14371
 *
14372
 * Create a parser context for an XML in-memory document.
14373
 *
14374
 * Returns the new parser context or NULL
14375
 */
14376
xmlParserCtxtPtr
14377
880k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14378
880k
    xmlParserCtxtPtr ctxt;
14379
880k
    xmlParserInputPtr input;
14380
880k
    xmlParserInputBufferPtr buf;
14381
14382
880k
    if (buffer == NULL)
14383
0
  return(NULL);
14384
880k
    if (size <= 0)
14385
11.5k
  return(NULL);
14386
14387
868k
    ctxt = xmlNewParserCtxt();
14388
868k
    if (ctxt == NULL)
14389
0
  return(NULL);
14390
14391
    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14392
868k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14393
868k
    if (buf == NULL) {
14394
0
  xmlFreeParserCtxt(ctxt);
14395
0
  return(NULL);
14396
0
    }
14397
14398
868k
    input = xmlNewInputStream(ctxt);
14399
868k
    if (input == NULL) {
14400
0
  xmlFreeParserInputBuffer(buf);
14401
0
  xmlFreeParserCtxt(ctxt);
14402
0
  return(NULL);
14403
0
    }
14404
14405
868k
    input->filename = NULL;
14406
868k
    input->buf = buf;
14407
868k
    xmlBufResetInput(input->buf->buffer, input);
14408
14409
868k
    inputPush(ctxt, input);
14410
868k
    return(ctxt);
14411
868k
}
14412
14413
#ifdef LIBXML_SAX1_ENABLED
14414
/**
14415
 * xmlSAXParseMemoryWithData:
14416
 * @sax:  the SAX handler block
14417
 * @buffer:  an pointer to a char array
14418
 * @size:  the size of the array
14419
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14420
 *             documents
14421
 * @data:  the userdata
14422
 *
14423
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14424
 *
14425
 * parse an XML in-memory block and use the given SAX function block
14426
 * to handle the parsing callback. If sax is NULL, fallback to the default
14427
 * DOM tree building routines.
14428
 *
14429
 * User data (void *) is stored within the parser context in the
14430
 * context's _private member, so it is available nearly everywhere in libxml
14431
 *
14432
 * Returns the resulting document tree
14433
 */
14434
14435
xmlDocPtr
14436
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14437
0
            int size, int recovery, void *data) {
14438
0
    xmlDocPtr ret;
14439
0
    xmlParserCtxtPtr ctxt;
14440
14441
0
    xmlInitParser();
14442
14443
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14444
0
    if (ctxt == NULL) return(NULL);
14445
0
    if (sax != NULL) {
14446
0
  if (ctxt->sax != NULL)
14447
0
      xmlFree(ctxt->sax);
14448
0
        ctxt->sax = sax;
14449
0
    }
14450
0
    xmlDetectSAX2(ctxt);
14451
0
    if (data!=NULL) {
14452
0
  ctxt->_private=data;
14453
0
    }
14454
14455
0
    ctxt->recovery = recovery;
14456
14457
0
    xmlParseDocument(ctxt);
14458
14459
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14460
0
    else {
14461
0
       ret = NULL;
14462
0
       xmlFreeDoc(ctxt->myDoc);
14463
0
       ctxt->myDoc = NULL;
14464
0
    }
14465
0
    if (sax != NULL)
14466
0
  ctxt->sax = NULL;
14467
0
    xmlFreeParserCtxt(ctxt);
14468
14469
0
    return(ret);
14470
0
}
14471
14472
/**
14473
 * xmlSAXParseMemory:
14474
 * @sax:  the SAX handler block
14475
 * @buffer:  an pointer to a char array
14476
 * @size:  the size of the array
14477
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14478
 *             documents
14479
 *
14480
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14481
 *
14482
 * parse an XML in-memory block and use the given SAX function block
14483
 * to handle the parsing callback. If sax is NULL, fallback to the default
14484
 * DOM tree building routines.
14485
 *
14486
 * Returns the resulting document tree
14487
 */
14488
xmlDocPtr
14489
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14490
0
            int size, int recovery) {
14491
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14492
0
}
14493
14494
/**
14495
 * xmlParseMemory:
14496
 * @buffer:  an pointer to a char array
14497
 * @size:  the size of the array
14498
 *
14499
 * DEPRECATED: Use xmlReadMemory.
14500
 *
14501
 * parse an XML in-memory block and build a tree.
14502
 *
14503
 * Returns the resulting document tree
14504
 */
14505
14506
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14507
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14508
0
}
14509
14510
/**
14511
 * xmlRecoverMemory:
14512
 * @buffer:  an pointer to a char array
14513
 * @size:  the size of the array
14514
 *
14515
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14516
 *
14517
 * parse an XML in-memory block and build a tree.
14518
 * In the case the document is not Well Formed, an attempt to
14519
 * build a tree is tried anyway
14520
 *
14521
 * Returns the resulting document tree or NULL in case of error
14522
 */
14523
14524
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14525
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14526
0
}
14527
14528
/**
14529
 * xmlSAXUserParseMemory:
14530
 * @sax:  a SAX handler
14531
 * @user_data:  The user data returned on SAX callbacks
14532
 * @buffer:  an in-memory XML document input
14533
 * @size:  the length of the XML document in bytes
14534
 *
14535
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14536
 *
14537
 * parse an XML in-memory buffer and call the given SAX handler routines.
14538
 *
14539
 * Returns 0 in case of success or a error number otherwise
14540
 */
14541
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14542
0
        const char *buffer, int size) {
14543
0
    int ret = 0;
14544
0
    xmlParserCtxtPtr ctxt;
14545
14546
0
    xmlInitParser();
14547
14548
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14549
0
    if (ctxt == NULL) return -1;
14550
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14551
0
        xmlFree(ctxt->sax);
14552
0
    ctxt->sax = sax;
14553
0
    xmlDetectSAX2(ctxt);
14554
14555
0
    if (user_data != NULL)
14556
0
  ctxt->userData = user_data;
14557
14558
0
    xmlParseDocument(ctxt);
14559
14560
0
    if (ctxt->wellFormed)
14561
0
  ret = 0;
14562
0
    else {
14563
0
        if (ctxt->errNo != 0)
14564
0
      ret = ctxt->errNo;
14565
0
  else
14566
0
      ret = -1;
14567
0
    }
14568
0
    if (sax != NULL)
14569
0
        ctxt->sax = NULL;
14570
0
    if (ctxt->myDoc != NULL) {
14571
0
        xmlFreeDoc(ctxt->myDoc);
14572
0
  ctxt->myDoc = NULL;
14573
0
    }
14574
0
    xmlFreeParserCtxt(ctxt);
14575
14576
0
    return ret;
14577
0
}
14578
#endif /* LIBXML_SAX1_ENABLED */
14579
14580
/**
14581
 * xmlCreateDocParserCtxt:
14582
 * @cur:  a pointer to an array of xmlChar
14583
 *
14584
 * Creates a parser context for an XML in-memory document.
14585
 *
14586
 * Returns the new parser context or NULL
14587
 */
14588
xmlParserCtxtPtr
14589
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14590
0
    int len;
14591
14592
0
    if (cur == NULL)
14593
0
  return(NULL);
14594
0
    len = xmlStrlen(cur);
14595
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14596
0
}
14597
14598
#ifdef LIBXML_SAX1_ENABLED
14599
/**
14600
 * xmlSAXParseDoc:
14601
 * @sax:  the SAX handler block
14602
 * @cur:  a pointer to an array of xmlChar
14603
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14604
 *             documents
14605
 *
14606
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14607
 *
14608
 * parse an XML in-memory document and build a tree.
14609
 * It use the given SAX function block to handle the parsing callback.
14610
 * If sax is NULL, fallback to the default DOM tree building routines.
14611
 *
14612
 * Returns the resulting document tree
14613
 */
14614
14615
xmlDocPtr
14616
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14617
0
    xmlDocPtr ret;
14618
0
    xmlParserCtxtPtr ctxt;
14619
0
    xmlSAXHandlerPtr oldsax = NULL;
14620
14621
0
    if (cur == NULL) return(NULL);
14622
14623
14624
0
    ctxt = xmlCreateDocParserCtxt(cur);
14625
0
    if (ctxt == NULL) return(NULL);
14626
0
    if (sax != NULL) {
14627
0
        oldsax = ctxt->sax;
14628
0
        ctxt->sax = sax;
14629
0
        ctxt->userData = NULL;
14630
0
    }
14631
0
    xmlDetectSAX2(ctxt);
14632
14633
0
    xmlParseDocument(ctxt);
14634
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14635
0
    else {
14636
0
       ret = NULL;
14637
0
       xmlFreeDoc(ctxt->myDoc);
14638
0
       ctxt->myDoc = NULL;
14639
0
    }
14640
0
    if (sax != NULL)
14641
0
  ctxt->sax = oldsax;
14642
0
    xmlFreeParserCtxt(ctxt);
14643
14644
0
    return(ret);
14645
0
}
14646
14647
/**
14648
 * xmlParseDoc:
14649
 * @cur:  a pointer to an array of xmlChar
14650
 *
14651
 * DEPRECATED: Use xmlReadDoc.
14652
 *
14653
 * parse an XML in-memory document and build a tree.
14654
 *
14655
 * Returns the resulting document tree
14656
 */
14657
14658
xmlDocPtr
14659
0
xmlParseDoc(const xmlChar *cur) {
14660
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14661
0
}
14662
#endif /* LIBXML_SAX1_ENABLED */
14663
14664
#ifdef LIBXML_LEGACY_ENABLED
14665
/************************************************************************
14666
 *                  *
14667
 *  Specific function to keep track of entities references    *
14668
 *  and used by the XSLT debugger         *
14669
 *                  *
14670
 ************************************************************************/
14671
14672
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14673
14674
/**
14675
 * xmlAddEntityReference:
14676
 * @ent : A valid entity
14677
 * @firstNode : A valid first node for children of entity
14678
 * @lastNode : A valid last node of children entity
14679
 *
14680
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14681
 */
14682
static void
14683
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14684
                      xmlNodePtr lastNode)
14685
{
14686
    if (xmlEntityRefFunc != NULL) {
14687
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14688
    }
14689
}
14690
14691
14692
/**
14693
 * xmlSetEntityReferenceFunc:
14694
 * @func: A valid function
14695
 *
14696
 * Set the function to call call back when a xml reference has been made
14697
 */
14698
void
14699
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14700
{
14701
    xmlEntityRefFunc = func;
14702
}
14703
#endif /* LIBXML_LEGACY_ENABLED */
14704
14705
/************************************************************************
14706
 *                  *
14707
 *        Miscellaneous       *
14708
 *                  *
14709
 ************************************************************************/
14710
14711
static int xmlParserInitialized = 0;
14712
14713
/**
14714
 * xmlInitParser:
14715
 *
14716
 * Initialization function for the XML parser.
14717
 * This is not reentrant. Call once before processing in case of
14718
 * use in multithreaded programs.
14719
 */
14720
14721
void
14722
9.30M
xmlInitParser(void) {
14723
9.30M
    if (xmlParserInitialized != 0)
14724
9.29M
  return;
14725
14726
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14727
    if (xmlFree == free)
14728
        atexit(xmlCleanupParser);
14729
#endif
14730
14731
9.59k
#ifdef LIBXML_THREAD_ENABLED
14732
9.59k
    __xmlGlobalInitMutexLock();
14733
9.59k
    if (xmlParserInitialized == 0) {
14734
9.59k
#endif
14735
9.59k
  xmlInitThreads();
14736
9.59k
  xmlInitGlobals();
14737
9.59k
  xmlInitMemory();
14738
9.59k
        xmlInitializeDict();
14739
9.59k
  xmlInitCharEncodingHandlers();
14740
9.59k
  xmlDefaultSAXHandlerInit();
14741
9.59k
  xmlRegisterDefaultInputCallbacks();
14742
9.59k
#ifdef LIBXML_OUTPUT_ENABLED
14743
9.59k
  xmlRegisterDefaultOutputCallbacks();
14744
9.59k
#endif /* LIBXML_OUTPUT_ENABLED */
14745
9.59k
#ifdef LIBXML_HTML_ENABLED
14746
9.59k
  htmlInitAutoClose();
14747
9.59k
  htmlDefaultSAXHandlerInit();
14748
9.59k
#endif
14749
9.59k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14750
9.59k
  xmlXPathInit();
14751
9.59k
#endif
14752
9.59k
  xmlParserInitialized = 1;
14753
9.59k
#ifdef LIBXML_THREAD_ENABLED
14754
9.59k
    }
14755
9.59k
    __xmlGlobalInitMutexUnlock();
14756
9.59k
#endif
14757
9.59k
}
14758
14759
/**
14760
 * xmlCleanupParser:
14761
 *
14762
 * This function name is somewhat misleading. It does not clean up
14763
 * parser state, it cleans up memory allocated by the library itself.
14764
 * It is a cleanup function for the XML library. It tries to reclaim all
14765
 * related global memory allocated for the library processing.
14766
 * It doesn't deallocate any document related memory. One should
14767
 * call xmlCleanupParser() only when the process has finished using
14768
 * the library and all XML/HTML documents built with it.
14769
 * See also xmlInitParser() which has the opposite function of preparing
14770
 * the library for operations.
14771
 *
14772
 * WARNING: if your application is multithreaded or has plugin support
14773
 *          calling this may crash the application if another thread or
14774
 *          a plugin is still using libxml2. It's sometimes very hard to
14775
 *          guess if libxml2 is in use in the application, some libraries
14776
 *          or plugins may use it without notice. In case of doubt abstain
14777
 *          from calling this function or do it just before calling exit()
14778
 *          to avoid leak reports from valgrind !
14779
 */
14780
14781
void
14782
0
xmlCleanupParser(void) {
14783
0
    if (!xmlParserInitialized)
14784
0
  return;
14785
14786
0
    xmlCleanupCharEncodingHandlers();
14787
0
#ifdef LIBXML_CATALOG_ENABLED
14788
0
    xmlCatalogCleanup();
14789
0
#endif
14790
0
    xmlDictCleanup();
14791
0
    xmlCleanupInputCallbacks();
14792
0
#ifdef LIBXML_OUTPUT_ENABLED
14793
0
    xmlCleanupOutputCallbacks();
14794
0
#endif
14795
0
#ifdef LIBXML_SCHEMAS_ENABLED
14796
0
    xmlSchemaCleanupTypes();
14797
0
    xmlRelaxNGCleanupTypes();
14798
0
#endif
14799
0
    xmlCleanupGlobals();
14800
0
    xmlCleanupThreads(); /* must be last if called not from the main thread */
14801
0
    xmlCleanupMemory();
14802
0
    xmlParserInitialized = 0;
14803
0
}
14804
14805
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14806
    !defined(_WIN32)
14807
static void
14808
ATTRIBUTE_DESTRUCTOR
14809
xmlDestructor(void) {
14810
    /*
14811
     * Calling custom deallocation functions in a destructor can cause
14812
     * problems, for example with Nokogiri.
14813
     */
14814
    if (xmlFree == free)
14815
        xmlCleanupParser();
14816
}
14817
#endif
14818
14819
/************************************************************************
14820
 *                  *
14821
 *  New set (2.6.0) of simpler and more flexible APIs   *
14822
 *                  *
14823
 ************************************************************************/
14824
14825
/**
14826
 * DICT_FREE:
14827
 * @str:  a string
14828
 *
14829
 * Free a string if it is not owned by the "dict" dictionary in the
14830
 * current scope
14831
 */
14832
#define DICT_FREE(str)            \
14833
0
  if ((str) && ((!dict) ||       \
14834
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14835
0
      xmlFree((char *)(str));
14836
14837
/**
14838
 * xmlCtxtReset:
14839
 * @ctxt: an XML parser context
14840
 *
14841
 * Reset a parser context
14842
 */
14843
void
14844
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14845
0
{
14846
0
    xmlParserInputPtr input;
14847
0
    xmlDictPtr dict;
14848
14849
0
    if (ctxt == NULL)
14850
0
        return;
14851
14852
0
    dict = ctxt->dict;
14853
14854
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14855
0
        xmlFreeInputStream(input);
14856
0
    }
14857
0
    ctxt->inputNr = 0;
14858
0
    ctxt->input = NULL;
14859
14860
0
    ctxt->spaceNr = 0;
14861
0
    if (ctxt->spaceTab != NULL) {
14862
0
  ctxt->spaceTab[0] = -1;
14863
0
  ctxt->space = &ctxt->spaceTab[0];
14864
0
    } else {
14865
0
        ctxt->space = NULL;
14866
0
    }
14867
14868
14869
0
    ctxt->nodeNr = 0;
14870
0
    ctxt->node = NULL;
14871
14872
0
    ctxt->nameNr = 0;
14873
0
    ctxt->name = NULL;
14874
14875
0
    ctxt->nsNr = 0;
14876
14877
0
    DICT_FREE(ctxt->version);
14878
0
    ctxt->version = NULL;
14879
0
    DICT_FREE(ctxt->encoding);
14880
0
    ctxt->encoding = NULL;
14881
0
    DICT_FREE(ctxt->directory);
14882
0
    ctxt->directory = NULL;
14883
0
    DICT_FREE(ctxt->extSubURI);
14884
0
    ctxt->extSubURI = NULL;
14885
0
    DICT_FREE(ctxt->extSubSystem);
14886
0
    ctxt->extSubSystem = NULL;
14887
0
    if (ctxt->myDoc != NULL)
14888
0
        xmlFreeDoc(ctxt->myDoc);
14889
0
    ctxt->myDoc = NULL;
14890
14891
0
    ctxt->standalone = -1;
14892
0
    ctxt->hasExternalSubset = 0;
14893
0
    ctxt->hasPErefs = 0;
14894
0
    ctxt->html = 0;
14895
0
    ctxt->external = 0;
14896
0
    ctxt->instate = XML_PARSER_START;
14897
0
    ctxt->token = 0;
14898
14899
0
    ctxt->wellFormed = 1;
14900
0
    ctxt->nsWellFormed = 1;
14901
0
    ctxt->disableSAX = 0;
14902
0
    ctxt->valid = 1;
14903
#if 0
14904
    ctxt->vctxt.userData = ctxt;
14905
    ctxt->vctxt.error = xmlParserValidityError;
14906
    ctxt->vctxt.warning = xmlParserValidityWarning;
14907
#endif
14908
0
    ctxt->record_info = 0;
14909
0
    ctxt->checkIndex = 0;
14910
0
    ctxt->inSubset = 0;
14911
0
    ctxt->errNo = XML_ERR_OK;
14912
0
    ctxt->depth = 0;
14913
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14914
0
    ctxt->catalogs = NULL;
14915
0
    ctxt->nbentities = 0;
14916
0
    ctxt->sizeentities = 0;
14917
0
    ctxt->sizeentcopy = 0;
14918
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14919
14920
0
    if (ctxt->attsDefault != NULL) {
14921
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14922
0
        ctxt->attsDefault = NULL;
14923
0
    }
14924
0
    if (ctxt->attsSpecial != NULL) {
14925
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14926
0
        ctxt->attsSpecial = NULL;
14927
0
    }
14928
14929
0
#ifdef LIBXML_CATALOG_ENABLED
14930
0
    if (ctxt->catalogs != NULL)
14931
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14932
0
#endif
14933
0
    if (ctxt->lastError.code != XML_ERR_OK)
14934
0
        xmlResetError(&ctxt->lastError);
14935
0
}
14936
14937
/**
14938
 * xmlCtxtResetPush:
14939
 * @ctxt: an XML parser context
14940
 * @chunk:  a pointer to an array of chars
14941
 * @size:  number of chars in the array
14942
 * @filename:  an optional file name or URI
14943
 * @encoding:  the document encoding, or NULL
14944
 *
14945
 * Reset a push parser context
14946
 *
14947
 * Returns 0 in case of success and 1 in case of error
14948
 */
14949
int
14950
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14951
                 int size, const char *filename, const char *encoding)
14952
0
{
14953
0
    xmlParserInputPtr inputStream;
14954
0
    xmlParserInputBufferPtr buf;
14955
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14956
14957
0
    if (ctxt == NULL)
14958
0
        return(1);
14959
14960
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14961
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14962
14963
0
    buf = xmlAllocParserInputBuffer(enc);
14964
0
    if (buf == NULL)
14965
0
        return(1);
14966
14967
0
    if (ctxt == NULL) {
14968
0
        xmlFreeParserInputBuffer(buf);
14969
0
        return(1);
14970
0
    }
14971
14972
0
    xmlCtxtReset(ctxt);
14973
14974
0
    if (filename == NULL) {
14975
0
        ctxt->directory = NULL;
14976
0
    } else {
14977
0
        ctxt->directory = xmlParserGetDirectory(filename);
14978
0
    }
14979
14980
0
    inputStream = xmlNewInputStream(ctxt);
14981
0
    if (inputStream == NULL) {
14982
0
        xmlFreeParserInputBuffer(buf);
14983
0
        return(1);
14984
0
    }
14985
14986
0
    if (filename == NULL)
14987
0
        inputStream->filename = NULL;
14988
0
    else
14989
0
        inputStream->filename = (char *)
14990
0
            xmlCanonicPath((const xmlChar *) filename);
14991
0
    inputStream->buf = buf;
14992
0
    xmlBufResetInput(buf->buffer, inputStream);
14993
14994
0
    inputPush(ctxt, inputStream);
14995
14996
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14997
0
        (ctxt->input->buf != NULL)) {
14998
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14999
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
15000
15001
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15002
15003
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15004
#ifdef DEBUG_PUSH
15005
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15006
#endif
15007
0
    }
15008
15009
0
    if (encoding != NULL) {
15010
0
        xmlCharEncodingHandlerPtr hdlr;
15011
15012
0
        if (ctxt->encoding != NULL)
15013
0
      xmlFree((xmlChar *) ctxt->encoding);
15014
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15015
15016
0
        hdlr = xmlFindCharEncodingHandler(encoding);
15017
0
        if (hdlr != NULL) {
15018
0
            xmlSwitchToEncoding(ctxt, hdlr);
15019
0
  } else {
15020
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15021
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
15022
0
        }
15023
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
15024
0
        xmlSwitchEncoding(ctxt, enc);
15025
0
    }
15026
15027
0
    return(0);
15028
0
}
15029
15030
15031
/**
15032
 * xmlCtxtUseOptionsInternal:
15033
 * @ctxt: an XML parser context
15034
 * @options:  a combination of xmlParserOption
15035
 * @encoding:  the user provided encoding to use
15036
 *
15037
 * Applies the options to the parser context
15038
 *
15039
 * Returns 0 in case of success, the set of unknown or unimplemented options
15040
 *         in case of error.
15041
 */
15042
static int
15043
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15044
1.60M
{
15045
1.60M
    if (ctxt == NULL)
15046
0
        return(-1);
15047
1.60M
    if (encoding != NULL) {
15048
0
        if (ctxt->encoding != NULL)
15049
0
      xmlFree((xmlChar *) ctxt->encoding);
15050
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15051
0
    }
15052
1.60M
    if (options & XML_PARSE_RECOVER) {
15053
928k
        ctxt->recovery = 1;
15054
928k
        options -= XML_PARSE_RECOVER;
15055
928k
  ctxt->options |= XML_PARSE_RECOVER;
15056
928k
    } else
15057
677k
        ctxt->recovery = 0;
15058
1.60M
    if (options & XML_PARSE_DTDLOAD) {
15059
1.39M
        ctxt->loadsubset = XML_DETECT_IDS;
15060
1.39M
        options -= XML_PARSE_DTDLOAD;
15061
1.39M
  ctxt->options |= XML_PARSE_DTDLOAD;
15062
1.39M
    } else
15063
209k
        ctxt->loadsubset = 0;
15064
1.60M
    if (options & XML_PARSE_DTDATTR) {
15065
433k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15066
433k
        options -= XML_PARSE_DTDATTR;
15067
433k
  ctxt->options |= XML_PARSE_DTDATTR;
15068
433k
    }
15069
1.60M
    if (options & XML_PARSE_NOENT) {
15070
1.01M
        ctxt->replaceEntities = 1;
15071
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
15072
1.01M
        options -= XML_PARSE_NOENT;
15073
1.01M
  ctxt->options |= XML_PARSE_NOENT;
15074
1.01M
    } else
15075
586k
        ctxt->replaceEntities = 0;
15076
1.60M
    if (options & XML_PARSE_PEDANTIC) {
15077
373k
        ctxt->pedantic = 1;
15078
373k
        options -= XML_PARSE_PEDANTIC;
15079
373k
  ctxt->options |= XML_PARSE_PEDANTIC;
15080
373k
    } else
15081
1.23M
        ctxt->pedantic = 0;
15082
1.60M
    if (options & XML_PARSE_NOBLANKS) {
15083
689k
        ctxt->keepBlanks = 0;
15084
689k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15085
689k
        options -= XML_PARSE_NOBLANKS;
15086
689k
  ctxt->options |= XML_PARSE_NOBLANKS;
15087
689k
    } else
15088
916k
        ctxt->keepBlanks = 1;
15089
1.60M
    if (options & XML_PARSE_DTDVALID) {
15090
572k
        ctxt->validate = 1;
15091
572k
        if (options & XML_PARSE_NOWARNING)
15092
441k
            ctxt->vctxt.warning = NULL;
15093
572k
        if (options & XML_PARSE_NOERROR)
15094
425k
            ctxt->vctxt.error = NULL;
15095
572k
        options -= XML_PARSE_DTDVALID;
15096
572k
  ctxt->options |= XML_PARSE_DTDVALID;
15097
572k
    } else
15098
1.03M
        ctxt->validate = 0;
15099
1.60M
    if (options & XML_PARSE_NOWARNING) {
15100
571k
        ctxt->sax->warning = NULL;
15101
571k
        options -= XML_PARSE_NOWARNING;
15102
571k
    }
15103
1.60M
    if (options & XML_PARSE_NOERROR) {
15104
581k
        ctxt->sax->error = NULL;
15105
581k
        ctxt->sax->fatalError = NULL;
15106
581k
        options -= XML_PARSE_NOERROR;
15107
581k
    }
15108
1.60M
#ifdef LIBXML_SAX1_ENABLED
15109
1.60M
    if (options & XML_PARSE_SAX1) {
15110
671k
        ctxt->sax->startElement = xmlSAX2StartElement;
15111
671k
        ctxt->sax->endElement = xmlSAX2EndElement;
15112
671k
        ctxt->sax->startElementNs = NULL;
15113
671k
        ctxt->sax->endElementNs = NULL;
15114
671k
        ctxt->sax->initialized = 1;
15115
671k
        options -= XML_PARSE_SAX1;
15116
671k
  ctxt->options |= XML_PARSE_SAX1;
15117
671k
    }
15118
1.60M
#endif /* LIBXML_SAX1_ENABLED */
15119
1.60M
    if (options & XML_PARSE_NODICT) {
15120
737k
        ctxt->dictNames = 0;
15121
737k
        options -= XML_PARSE_NODICT;
15122
737k
  ctxt->options |= XML_PARSE_NODICT;
15123
867k
    } else {
15124
867k
        ctxt->dictNames = 1;
15125
867k
    }
15126
1.60M
    if (options & XML_PARSE_NOCDATA) {
15127
767k
        ctxt->sax->cdataBlock = NULL;
15128
767k
        options -= XML_PARSE_NOCDATA;
15129
767k
  ctxt->options |= XML_PARSE_NOCDATA;
15130
767k
    }
15131
1.60M
    if (options & XML_PARSE_NSCLEAN) {
15132
813k
  ctxt->options |= XML_PARSE_NSCLEAN;
15133
813k
        options -= XML_PARSE_NSCLEAN;
15134
813k
    }
15135
1.60M
    if (options & XML_PARSE_NONET) {
15136
679k
  ctxt->options |= XML_PARSE_NONET;
15137
679k
        options -= XML_PARSE_NONET;
15138
679k
    }
15139
1.60M
    if (options & XML_PARSE_COMPACT) {
15140
854k
  ctxt->options |= XML_PARSE_COMPACT;
15141
854k
        options -= XML_PARSE_COMPACT;
15142
854k
    }
15143
1.60M
    if (options & XML_PARSE_OLD10) {
15144
570k
  ctxt->options |= XML_PARSE_OLD10;
15145
570k
        options -= XML_PARSE_OLD10;
15146
570k
    }
15147
1.60M
    if (options & XML_PARSE_NOBASEFIX) {
15148
566k
  ctxt->options |= XML_PARSE_NOBASEFIX;
15149
566k
        options -= XML_PARSE_NOBASEFIX;
15150
566k
    }
15151
1.60M
    if (options & XML_PARSE_HUGE) {
15152
549k
  ctxt->options |= XML_PARSE_HUGE;
15153
549k
        options -= XML_PARSE_HUGE;
15154
549k
        if (ctxt->dict != NULL)
15155
549k
            xmlDictSetLimit(ctxt->dict, 0);
15156
549k
    }
15157
1.60M
    if (options & XML_PARSE_OLDSAX) {
15158
495k
  ctxt->options |= XML_PARSE_OLDSAX;
15159
495k
        options -= XML_PARSE_OLDSAX;
15160
495k
    }
15161
1.60M
    if (options & XML_PARSE_IGNORE_ENC) {
15162
634k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
15163
634k
        options -= XML_PARSE_IGNORE_ENC;
15164
634k
    }
15165
1.60M
    if (options & XML_PARSE_BIG_LINES) {
15166
603k
  ctxt->options |= XML_PARSE_BIG_LINES;
15167
603k
        options -= XML_PARSE_BIG_LINES;
15168
603k
    }
15169
1.60M
    ctxt->linenumbers = 1;
15170
1.60M
    return (options);
15171
1.60M
}
15172
15173
/**
15174
 * xmlCtxtUseOptions:
15175
 * @ctxt: an XML parser context
15176
 * @options:  a combination of xmlParserOption
15177
 *
15178
 * Applies the options to the parser context
15179
 *
15180
 * Returns 0 in case of success, the set of unknown or unimplemented options
15181
 *         in case of error.
15182
 */
15183
int
15184
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15185
1.16M
{
15186
1.16M
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15187
1.16M
}
15188
15189
/**
15190
 * xmlDoRead:
15191
 * @ctxt:  an XML parser context
15192
 * @URL:  the base URL to use for the document
15193
 * @encoding:  the document encoding, or NULL
15194
 * @options:  a combination of xmlParserOption
15195
 * @reuse:  keep the context for reuse
15196
 *
15197
 * Common front-end for the xmlRead functions
15198
 *
15199
 * Returns the resulting document tree or NULL
15200
 */
15201
static xmlDocPtr
15202
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15203
          int options, int reuse)
15204
441k
{
15205
441k
    xmlDocPtr ret;
15206
15207
441k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15208
441k
    if (encoding != NULL) {
15209
0
        xmlCharEncodingHandlerPtr hdlr;
15210
15211
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15212
0
  if (hdlr != NULL)
15213
0
      xmlSwitchToEncoding(ctxt, hdlr);
15214
0
    }
15215
441k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15216
441k
        (ctxt->input->filename == NULL))
15217
441k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15218
441k
    xmlParseDocument(ctxt);
15219
441k
    if ((ctxt->wellFormed) || ctxt->recovery)
15220
243k
        ret = ctxt->myDoc;
15221
198k
    else {
15222
198k
        ret = NULL;
15223
198k
  if (ctxt->myDoc != NULL) {
15224
170k
      xmlFreeDoc(ctxt->myDoc);
15225
170k
  }
15226
198k
    }
15227
441k
    ctxt->myDoc = NULL;
15228
441k
    if (!reuse) {
15229
441k
  xmlFreeParserCtxt(ctxt);
15230
441k
    }
15231
15232
441k
    return (ret);
15233
441k
}
15234
15235
/**
15236
 * xmlReadDoc:
15237
 * @cur:  a pointer to a zero terminated string
15238
 * @URL:  the base URL to use for the document
15239
 * @encoding:  the document encoding, or NULL
15240
 * @options:  a combination of xmlParserOption
15241
 *
15242
 * parse an XML in-memory document and build a tree.
15243
 *
15244
 * Returns the resulting document tree
15245
 */
15246
xmlDocPtr
15247
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15248
0
{
15249
0
    xmlParserCtxtPtr ctxt;
15250
15251
0
    if (cur == NULL)
15252
0
        return (NULL);
15253
0
    xmlInitParser();
15254
15255
0
    ctxt = xmlCreateDocParserCtxt(cur);
15256
0
    if (ctxt == NULL)
15257
0
        return (NULL);
15258
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15259
0
}
15260
15261
/**
15262
 * xmlReadFile:
15263
 * @filename:  a file or URL
15264
 * @encoding:  the document encoding, or NULL
15265
 * @options:  a combination of xmlParserOption
15266
 *
15267
 * parse an XML file from the filesystem or the network.
15268
 *
15269
 * Returns the resulting document tree
15270
 */
15271
xmlDocPtr
15272
xmlReadFile(const char *filename, const char *encoding, int options)
15273
0
{
15274
0
    xmlParserCtxtPtr ctxt;
15275
15276
0
    xmlInitParser();
15277
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15278
0
    if (ctxt == NULL)
15279
0
        return (NULL);
15280
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15281
0
}
15282
15283
/**
15284
 * xmlReadMemory:
15285
 * @buffer:  a pointer to a char array
15286
 * @size:  the size of the array
15287
 * @URL:  the base URL to use for the document
15288
 * @encoding:  the document encoding, or NULL
15289
 * @options:  a combination of xmlParserOption
15290
 *
15291
 * parse an XML in-memory document and build a tree.
15292
 *
15293
 * Returns the resulting document tree
15294
 */
15295
xmlDocPtr
15296
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15297
443k
{
15298
443k
    xmlParserCtxtPtr ctxt;
15299
15300
443k
    xmlInitParser();
15301
443k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15302
443k
    if (ctxt == NULL)
15303
1.03k
        return (NULL);
15304
441k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15305
443k
}
15306
15307
/**
15308
 * xmlReadFd:
15309
 * @fd:  an open file descriptor
15310
 * @URL:  the base URL to use for the document
15311
 * @encoding:  the document encoding, or NULL
15312
 * @options:  a combination of xmlParserOption
15313
 *
15314
 * parse an XML from a file descriptor and build a tree.
15315
 * NOTE that the file descriptor will not be closed when the
15316
 *      reader is closed or reset.
15317
 *
15318
 * Returns the resulting document tree
15319
 */
15320
xmlDocPtr
15321
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15322
0
{
15323
0
    xmlParserCtxtPtr ctxt;
15324
0
    xmlParserInputBufferPtr input;
15325
0
    xmlParserInputPtr stream;
15326
15327
0
    if (fd < 0)
15328
0
        return (NULL);
15329
0
    xmlInitParser();
15330
15331
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15332
0
    if (input == NULL)
15333
0
        return (NULL);
15334
0
    input->closecallback = NULL;
15335
0
    ctxt = xmlNewParserCtxt();
15336
0
    if (ctxt == NULL) {
15337
0
        xmlFreeParserInputBuffer(input);
15338
0
        return (NULL);
15339
0
    }
15340
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15341
0
    if (stream == NULL) {
15342
0
        xmlFreeParserInputBuffer(input);
15343
0
  xmlFreeParserCtxt(ctxt);
15344
0
        return (NULL);
15345
0
    }
15346
0
    inputPush(ctxt, stream);
15347
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15348
0
}
15349
15350
/**
15351
 * xmlReadIO:
15352
 * @ioread:  an I/O read function
15353
 * @ioclose:  an I/O close function
15354
 * @ioctx:  an I/O handler
15355
 * @URL:  the base URL to use for the document
15356
 * @encoding:  the document encoding, or NULL
15357
 * @options:  a combination of xmlParserOption
15358
 *
15359
 * parse an XML document from I/O functions and source and build a tree.
15360
 *
15361
 * Returns the resulting document tree
15362
 */
15363
xmlDocPtr
15364
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15365
          void *ioctx, const char *URL, const char *encoding, int options)
15366
0
{
15367
0
    xmlParserCtxtPtr ctxt;
15368
0
    xmlParserInputBufferPtr input;
15369
0
    xmlParserInputPtr stream;
15370
15371
0
    if (ioread == NULL)
15372
0
        return (NULL);
15373
0
    xmlInitParser();
15374
15375
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15376
0
                                         XML_CHAR_ENCODING_NONE);
15377
0
    if (input == NULL) {
15378
0
        if (ioclose != NULL)
15379
0
            ioclose(ioctx);
15380
0
        return (NULL);
15381
0
    }
15382
0
    ctxt = xmlNewParserCtxt();
15383
0
    if (ctxt == NULL) {
15384
0
        xmlFreeParserInputBuffer(input);
15385
0
        return (NULL);
15386
0
    }
15387
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15388
0
    if (stream == NULL) {
15389
0
        xmlFreeParserInputBuffer(input);
15390
0
  xmlFreeParserCtxt(ctxt);
15391
0
        return (NULL);
15392
0
    }
15393
0
    inputPush(ctxt, stream);
15394
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15395
0
}
15396
15397
/**
15398
 * xmlCtxtReadDoc:
15399
 * @ctxt:  an XML parser context
15400
 * @cur:  a pointer to a zero terminated string
15401
 * @URL:  the base URL to use for the document
15402
 * @encoding:  the document encoding, or NULL
15403
 * @options:  a combination of xmlParserOption
15404
 *
15405
 * parse an XML in-memory document and build a tree.
15406
 * This reuses the existing @ctxt parser context
15407
 *
15408
 * Returns the resulting document tree
15409
 */
15410
xmlDocPtr
15411
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15412
               const char *URL, const char *encoding, int options)
15413
0
{
15414
0
    if (cur == NULL)
15415
0
        return (NULL);
15416
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15417
0
                              encoding, options));
15418
0
}
15419
15420
/**
15421
 * xmlCtxtReadFile:
15422
 * @ctxt:  an XML parser context
15423
 * @filename:  a file or URL
15424
 * @encoding:  the document encoding, or NULL
15425
 * @options:  a combination of xmlParserOption
15426
 *
15427
 * parse an XML file from the filesystem or the network.
15428
 * This reuses the existing @ctxt parser context
15429
 *
15430
 * Returns the resulting document tree
15431
 */
15432
xmlDocPtr
15433
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15434
                const char *encoding, int options)
15435
0
{
15436
0
    xmlParserInputPtr stream;
15437
15438
0
    if (filename == NULL)
15439
0
        return (NULL);
15440
0
    if (ctxt == NULL)
15441
0
        return (NULL);
15442
0
    xmlInitParser();
15443
15444
0
    xmlCtxtReset(ctxt);
15445
15446
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15447
0
    if (stream == NULL) {
15448
0
        return (NULL);
15449
0
    }
15450
0
    inputPush(ctxt, stream);
15451
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15452
0
}
15453
15454
/**
15455
 * xmlCtxtReadMemory:
15456
 * @ctxt:  an XML parser context
15457
 * @buffer:  a pointer to a char array
15458
 * @size:  the size of the array
15459
 * @URL:  the base URL to use for the document
15460
 * @encoding:  the document encoding, or NULL
15461
 * @options:  a combination of xmlParserOption
15462
 *
15463
 * parse an XML in-memory document and build a tree.
15464
 * This reuses the existing @ctxt parser context
15465
 *
15466
 * Returns the resulting document tree
15467
 */
15468
xmlDocPtr
15469
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15470
                  const char *URL, const char *encoding, int options)
15471
0
{
15472
0
    xmlParserInputBufferPtr input;
15473
0
    xmlParserInputPtr stream;
15474
15475
0
    if (ctxt == NULL)
15476
0
        return (NULL);
15477
0
    if (buffer == NULL)
15478
0
        return (NULL);
15479
0
    xmlInitParser();
15480
15481
0
    xmlCtxtReset(ctxt);
15482
15483
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15484
0
    if (input == NULL) {
15485
0
  return(NULL);
15486
0
    }
15487
15488
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15489
0
    if (stream == NULL) {
15490
0
  xmlFreeParserInputBuffer(input);
15491
0
  return(NULL);
15492
0
    }
15493
15494
0
    inputPush(ctxt, stream);
15495
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15496
0
}
15497
15498
/**
15499
 * xmlCtxtReadFd:
15500
 * @ctxt:  an XML parser context
15501
 * @fd:  an open file descriptor
15502
 * @URL:  the base URL to use for the document
15503
 * @encoding:  the document encoding, or NULL
15504
 * @options:  a combination of xmlParserOption
15505
 *
15506
 * parse an XML from a file descriptor and build a tree.
15507
 * This reuses the existing @ctxt parser context
15508
 * NOTE that the file descriptor will not be closed when the
15509
 *      reader is closed or reset.
15510
 *
15511
 * Returns the resulting document tree
15512
 */
15513
xmlDocPtr
15514
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15515
              const char *URL, const char *encoding, int options)
15516
0
{
15517
0
    xmlParserInputBufferPtr input;
15518
0
    xmlParserInputPtr stream;
15519
15520
0
    if (fd < 0)
15521
0
        return (NULL);
15522
0
    if (ctxt == NULL)
15523
0
        return (NULL);
15524
0
    xmlInitParser();
15525
15526
0
    xmlCtxtReset(ctxt);
15527
15528
15529
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15530
0
    if (input == NULL)
15531
0
        return (NULL);
15532
0
    input->closecallback = NULL;
15533
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15534
0
    if (stream == NULL) {
15535
0
        xmlFreeParserInputBuffer(input);
15536
0
        return (NULL);
15537
0
    }
15538
0
    inputPush(ctxt, stream);
15539
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15540
0
}
15541
15542
/**
15543
 * xmlCtxtReadIO:
15544
 * @ctxt:  an XML parser context
15545
 * @ioread:  an I/O read function
15546
 * @ioclose:  an I/O close function
15547
 * @ioctx:  an I/O handler
15548
 * @URL:  the base URL to use for the document
15549
 * @encoding:  the document encoding, or NULL
15550
 * @options:  a combination of xmlParserOption
15551
 *
15552
 * parse an XML document from I/O functions and source and build a tree.
15553
 * This reuses the existing @ctxt parser context
15554
 *
15555
 * Returns the resulting document tree
15556
 */
15557
xmlDocPtr
15558
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15559
              xmlInputCloseCallback ioclose, void *ioctx,
15560
        const char *URL,
15561
              const char *encoding, int options)
15562
0
{
15563
0
    xmlParserInputBufferPtr input;
15564
0
    xmlParserInputPtr stream;
15565
15566
0
    if (ioread == NULL)
15567
0
        return (NULL);
15568
0
    if (ctxt == NULL)
15569
0
        return (NULL);
15570
0
    xmlInitParser();
15571
15572
0
    xmlCtxtReset(ctxt);
15573
15574
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15575
0
                                         XML_CHAR_ENCODING_NONE);
15576
0
    if (input == NULL) {
15577
0
        if (ioclose != NULL)
15578
0
            ioclose(ioctx);
15579
0
        return (NULL);
15580
0
    }
15581
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15582
0
    if (stream == NULL) {
15583
0
        xmlFreeParserInputBuffer(input);
15584
0
        return (NULL);
15585
0
    }
15586
0
    inputPush(ctxt, stream);
15587
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15588
0
}
15589