Coverage Report

Created: 2024-01-18 09:16

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/enc.h"
80
#include "private/error.h"
81
#include "private/html.h"
82
#include "private/io.h"
83
#include "private/parser.h"
84
#include "private/threads.h"
85
86
struct _xmlStartTag {
87
    const xmlChar *prefix;
88
    const xmlChar *URI;
89
    int line;
90
    int nsNr;
91
};
92
93
static void
94
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
95
96
static xmlParserCtxtPtr
97
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
98
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
99
        xmlParserCtxtPtr pctx);
100
101
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
19.8M
#define XML_MAX_HUGE_LENGTH 1000000000
116
117
6.14k
#define XML_PARSER_BIG_ENTITY 1000
118
#define XML_PARSER_LOT_ENTITY 5000
119
120
/*
121
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
122
 *    replacement over the size in byte of the input indicates that you have
123
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
124
 *    replacement per byte of input.
125
 */
126
1.18M
#define XML_PARSER_NON_LINEAR 10
127
128
/*
129
 * xmlParserEntityCheck
130
 *
131
 * Function to check non-linear entity expansion behaviour
132
 * This is here to detect and stop exponential linear entity expansion
133
 * This is not a limitation of the parser but a safety
134
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
135
 * parser option.
136
 */
137
static int
138
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
139
                     xmlEntityPtr ent, size_t replacement)
140
14.0M
{
141
14.0M
    size_t consumed = 0;
142
14.0M
    int i;
143
144
14.0M
    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
145
12.4M
        return (0);
146
1.55M
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
147
5.22k
        return (1);
148
149
    /*
150
     * This may look absurd but is needed to detect
151
     * entities problems
152
     */
153
1.54M
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
154
1.54M
  (ent->content != NULL) && (ent->checked == 0) &&
155
1.54M
  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
156
40.0k
  unsigned long oldnbent = ctxt->nbentities, diff;
157
40.0k
  xmlChar *rep;
158
159
40.0k
  ent->checked = 1;
160
161
40.0k
        ++ctxt->depth;
162
40.0k
  rep = xmlStringDecodeEntities(ctxt, ent->content,
163
40.0k
          XML_SUBSTITUTE_REF, 0, 0, 0);
164
40.0k
        --ctxt->depth;
165
40.0k
  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
166
1.44k
      ent->content[0] = 0;
167
1.44k
  }
168
169
40.0k
        diff = ctxt->nbentities - oldnbent + 1;
170
40.0k
        if (diff > INT_MAX / 2)
171
0
            diff = INT_MAX / 2;
172
40.0k
  ent->checked = diff * 2;
173
40.0k
  if (rep != NULL) {
174
38.9k
      if (xmlStrchr(rep, '<'))
175
4.59k
    ent->checked |= 1;
176
38.9k
      xmlFree(rep);
177
38.9k
      rep = NULL;
178
38.9k
  }
179
40.0k
    }
180
181
    /*
182
     * Prevent entity exponential check, not just replacement while
183
     * parsing the DTD
184
     * The check is potentially costly so do that only once in a thousand
185
     */
186
1.54M
    if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
187
1.54M
        (ctxt->nbentities % 1024 == 0)) {
188
0
  for (i = 0;i < ctxt->inputNr;i++) {
189
0
      consumed += ctxt->inputTab[i]->consumed +
190
0
                 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
191
0
  }
192
0
  if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
193
0
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
194
0
      ctxt->instate = XML_PARSER_EOF;
195
0
      return (1);
196
0
  }
197
0
  consumed = 0;
198
0
    }
199
200
201
202
1.54M
    if (replacement != 0) {
203
18.6k
  if (replacement < XML_MAX_TEXT_LENGTH)
204
18.6k
      return(0);
205
206
        /*
207
   * If the volume of entity copy reaches 10 times the
208
   * amount of parsed data and over the large text threshold
209
   * then that's very likely to be an abuse.
210
   */
211
0
        if (ctxt->input != NULL) {
212
0
      consumed = ctxt->input->consumed +
213
0
                 (ctxt->input->cur - ctxt->input->base);
214
0
  }
215
0
        consumed += ctxt->sizeentities;
216
217
0
        if (replacement < XML_PARSER_NON_LINEAR * consumed)
218
0
      return(0);
219
1.52M
    } else if (size != 0) {
220
        /*
221
         * Do the check based on the replacement size of the entity
222
         */
223
6.14k
        if (size < XML_PARSER_BIG_ENTITY)
224
4.02k
      return(0);
225
226
        /*
227
         * A limit on the amount of text data reasonably used
228
         */
229
2.12k
        if (ctxt->input != NULL) {
230
2.12k
            consumed = ctxt->input->consumed +
231
2.12k
                (ctxt->input->cur - ctxt->input->base);
232
2.12k
        }
233
2.12k
        consumed += ctxt->sizeentities;
234
235
2.12k
        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
236
2.12k
      (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
237
1.73k
            return (0);
238
1.52M
    } else if (ent != NULL) {
239
        /*
240
         * use the number of parsed entities in the replacement
241
         */
242
1.18M
        size = ent->checked / 2;
243
244
        /*
245
         * The amount of data parsed counting entities size only once
246
         */
247
1.18M
        if (ctxt->input != NULL) {
248
1.18M
            consumed = ctxt->input->consumed +
249
1.18M
                (ctxt->input->cur - ctxt->input->base);
250
1.18M
        }
251
1.18M
        consumed += ctxt->sizeentities;
252
253
        /*
254
         * Check the density of entities for the amount of data
255
   * knowing an entity reference will take at least 3 bytes
256
         */
257
1.18M
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
258
1.18M
            return (0);
259
1.18M
    } else {
260
        /*
261
         * strange we got no data for checking
262
         */
263
338k
  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
264
338k
       (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
265
338k
      (ctxt->nbentities <= 10000))
266
338k
      return (0);
267
338k
    }
268
533
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
269
533
    return (1);
270
1.54M
}
271
272
/**
273
 * xmlParserMaxDepth:
274
 *
275
 * arbitrary depth limit for the XML documents that we allow to
276
 * process. This is not a limitation of the parser but a safety
277
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
278
 * parser option.
279
 */
280
unsigned int xmlParserMaxDepth = 256;
281
282
283
284
#define SAX2 1
285
200M
#define XML_PARSER_BIG_BUFFER_SIZE 300
286
711M
#define XML_PARSER_BUFFER_SIZE 100
287
458k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
288
289
/**
290
 * XML_PARSER_CHUNK_SIZE
291
 *
292
 * When calling GROW that's the minimal amount of data
293
 * the parser expected to have received. It is not a hard
294
 * limit but an optimization when reading strings like Names
295
 * It is not strictly needed as long as inputs available characters
296
 * are followed by 0, which should be provided by the I/O level
297
 */
298
90.1M
#define XML_PARSER_CHUNK_SIZE 100
299
300
/*
301
 * List of XML prefixed PI allowed by W3C specs
302
 */
303
304
static const char* const xmlW3CPIs[] = {
305
    "xml-stylesheet",
306
    "xml-model",
307
    NULL
308
};
309
310
311
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
312
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
313
                                              const xmlChar **str);
314
315
static xmlParserErrors
316
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
317
                xmlSAXHandlerPtr sax,
318
          void *user_data, int depth, const xmlChar *URL,
319
          const xmlChar *ID, xmlNodePtr *list);
320
321
static int
322
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
323
                          const char *encoding);
324
#ifdef LIBXML_LEGACY_ENABLED
325
static void
326
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
327
                      xmlNodePtr lastNode);
328
#endif /* LIBXML_LEGACY_ENABLED */
329
330
static xmlParserErrors
331
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
332
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
333
334
static int
335
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
336
337
/************************************************************************
338
 *                  *
339
 *    Some factorized error routines        *
340
 *                  *
341
 ************************************************************************/
342
343
/**
344
 * xmlErrAttributeDup:
345
 * @ctxt:  an XML parser context
346
 * @prefix:  the attribute prefix
347
 * @localname:  the attribute localname
348
 *
349
 * Handle a redefinition of attribute error
350
 */
351
static void
352
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
353
                   const xmlChar * localname)
354
28.3k
{
355
28.3k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
356
28.3k
        (ctxt->instate == XML_PARSER_EOF))
357
0
  return;
358
28.3k
    if (ctxt != NULL)
359
28.3k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
360
361
28.3k
    if (prefix == NULL)
362
22.5k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
363
22.5k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
364
22.5k
                        (const char *) localname, NULL, NULL, 0, 0,
365
22.5k
                        "Attribute %s redefined\n", localname);
366
5.85k
    else
367
5.85k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
368
5.85k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
369
5.85k
                        (const char *) prefix, (const char *) localname,
370
5.85k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
371
5.85k
                        localname);
372
28.3k
    if (ctxt != NULL) {
373
28.3k
  ctxt->wellFormed = 0;
374
28.3k
  if (ctxt->recovery == 0)
375
19.8k
      ctxt->disableSAX = 1;
376
28.3k
    }
377
28.3k
}
378
379
/**
380
 * xmlFatalErr:
381
 * @ctxt:  an XML parser context
382
 * @error:  the error number
383
 * @extra:  extra information string
384
 *
385
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
386
 */
387
static void
388
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
389
7.32M
{
390
7.32M
    const char *errmsg;
391
392
7.32M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393
7.32M
        (ctxt->instate == XML_PARSER_EOF))
394
451k
  return;
395
6.87M
    switch (error) {
396
49.2k
        case XML_ERR_INVALID_HEX_CHARREF:
397
49.2k
            errmsg = "CharRef: invalid hexadecimal value";
398
49.2k
            break;
399
185k
        case XML_ERR_INVALID_DEC_CHARREF:
400
185k
            errmsg = "CharRef: invalid decimal value";
401
185k
            break;
402
0
        case XML_ERR_INVALID_CHARREF:
403
0
            errmsg = "CharRef: invalid value";
404
0
            break;
405
234k
        case XML_ERR_INTERNAL_ERROR:
406
234k
            errmsg = "internal error";
407
234k
            break;
408
0
        case XML_ERR_PEREF_AT_EOF:
409
0
            errmsg = "PEReference at end of document";
410
0
            break;
411
0
        case XML_ERR_PEREF_IN_PROLOG:
412
0
            errmsg = "PEReference in prolog";
413
0
            break;
414
0
        case XML_ERR_PEREF_IN_EPILOG:
415
0
            errmsg = "PEReference in epilog";
416
0
            break;
417
0
        case XML_ERR_PEREF_NO_NAME:
418
0
            errmsg = "PEReference: no name";
419
0
            break;
420
154k
        case XML_ERR_PEREF_SEMICOL_MISSING:
421
154k
            errmsg = "PEReference: expecting ';'";
422
154k
            break;
423
526k
        case XML_ERR_ENTITY_LOOP:
424
526k
            errmsg = "Detected an entity reference loop";
425
526k
            break;
426
0
        case XML_ERR_ENTITY_NOT_STARTED:
427
0
            errmsg = "EntityValue: \" or ' expected";
428
0
            break;
429
542
        case XML_ERR_ENTITY_PE_INTERNAL:
430
542
            errmsg = "PEReferences forbidden in internal subset";
431
542
            break;
432
1.72k
        case XML_ERR_ENTITY_NOT_FINISHED:
433
1.72k
            errmsg = "EntityValue: \" or ' expected";
434
1.72k
            break;
435
194k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
436
194k
            errmsg = "AttValue: \" or ' expected";
437
194k
            break;
438
376k
        case XML_ERR_LT_IN_ATTRIBUTE:
439
376k
            errmsg = "Unescaped '<' not allowed in attributes values";
440
376k
            break;
441
7.72k
        case XML_ERR_LITERAL_NOT_STARTED:
442
7.72k
            errmsg = "SystemLiteral \" or ' expected";
443
7.72k
            break;
444
6.45k
        case XML_ERR_LITERAL_NOT_FINISHED:
445
6.45k
            errmsg = "Unfinished System or Public ID \" or ' expected";
446
6.45k
            break;
447
146k
        case XML_ERR_MISPLACED_CDATA_END:
448
146k
            errmsg = "Sequence ']]>' not allowed in content";
449
146k
            break;
450
6.86k
        case XML_ERR_URI_REQUIRED:
451
6.86k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
452
6.86k
            break;
453
861
        case XML_ERR_PUBID_REQUIRED:
454
861
            errmsg = "PUBLIC, the Public Identifier is missing";
455
861
            break;
456
142k
        case XML_ERR_HYPHEN_IN_COMMENT:
457
142k
            errmsg = "Comment must not contain '--' (double-hyphen)";
458
142k
            break;
459
54.8k
        case XML_ERR_PI_NOT_STARTED:
460
54.8k
            errmsg = "xmlParsePI : no target name";
461
54.8k
            break;
462
1.76k
        case XML_ERR_RESERVED_XML_NAME:
463
1.76k
            errmsg = "Invalid PI name";
464
1.76k
            break;
465
2.29k
        case XML_ERR_NOTATION_NOT_STARTED:
466
2.29k
            errmsg = "NOTATION: Name expected here";
467
2.29k
            break;
468
5.90k
        case XML_ERR_NOTATION_NOT_FINISHED:
469
5.90k
            errmsg = "'>' required to close NOTATION declaration";
470
5.90k
            break;
471
69.7k
        case XML_ERR_VALUE_REQUIRED:
472
69.7k
            errmsg = "Entity value required";
473
69.7k
            break;
474
1.07k
        case XML_ERR_URI_FRAGMENT:
475
1.07k
            errmsg = "Fragment not allowed";
476
1.07k
            break;
477
34.9k
        case XML_ERR_ATTLIST_NOT_STARTED:
478
34.9k
            errmsg = "'(' required to start ATTLIST enumeration";
479
34.9k
            break;
480
3.07k
        case XML_ERR_NMTOKEN_REQUIRED:
481
3.07k
            errmsg = "NmToken expected in ATTLIST enumeration";
482
3.07k
            break;
483
74.0k
        case XML_ERR_ATTLIST_NOT_FINISHED:
484
74.0k
            errmsg = "')' required to finish ATTLIST enumeration";
485
74.0k
            break;
486
2.77k
        case XML_ERR_MIXED_NOT_STARTED:
487
2.77k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
488
2.77k
            break;
489
0
        case XML_ERR_PCDATA_REQUIRED:
490
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
491
0
            break;
492
18.3k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
493
18.3k
            errmsg = "ContentDecl : Name or '(' expected";
494
18.3k
            break;
495
79.8k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
496
79.8k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
497
79.8k
            break;
498
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
499
0
            errmsg =
500
0
                "PEReference: forbidden within markup decl in internal subset";
501
0
            break;
502
1.10M
        case XML_ERR_GT_REQUIRED:
503
1.10M
            errmsg = "expected '>'";
504
1.10M
            break;
505
111
        case XML_ERR_CONDSEC_INVALID:
506
111
            errmsg = "XML conditional section '[' expected";
507
111
            break;
508
9.13k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
509
9.13k
            errmsg = "Content error in the external subset";
510
9.13k
            break;
511
741
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
512
741
            errmsg =
513
741
                "conditional section INCLUDE or IGNORE keyword expected";
514
741
            break;
515
1.29k
        case XML_ERR_CONDSEC_NOT_FINISHED:
516
1.29k
            errmsg = "XML conditional section not closed";
517
1.29k
            break;
518
180
        case XML_ERR_XMLDECL_NOT_STARTED:
519
180
            errmsg = "Text declaration '<?xml' required";
520
180
            break;
521
197k
        case XML_ERR_XMLDECL_NOT_FINISHED:
522
197k
            errmsg = "parsing XML declaration: '?>' expected";
523
197k
            break;
524
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
525
0
            errmsg = "external parsed entities cannot be standalone";
526
0
            break;
527
1.18M
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
528
1.18M
            errmsg = "EntityRef: expecting ';'";
529
1.18M
            break;
530
50.6k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
531
50.6k
            errmsg = "DOCTYPE improperly terminated";
532
50.6k
            break;
533
0
        case XML_ERR_LTSLASH_REQUIRED:
534
0
            errmsg = "EndTag: '</' not found";
535
0
            break;
536
6.74k
        case XML_ERR_EQUAL_REQUIRED:
537
6.74k
            errmsg = "expected '='";
538
6.74k
            break;
539
27.0k
        case XML_ERR_STRING_NOT_CLOSED:
540
27.0k
            errmsg = "String not closed expecting \" or '";
541
27.0k
            break;
542
12.7k
        case XML_ERR_STRING_NOT_STARTED:
543
12.7k
            errmsg = "String not started expecting ' or \"";
544
12.7k
            break;
545
2.45k
        case XML_ERR_ENCODING_NAME:
546
2.45k
            errmsg = "Invalid XML encoding name";
547
2.45k
            break;
548
1.45k
        case XML_ERR_STANDALONE_VALUE:
549
1.45k
            errmsg = "standalone accepts only 'yes' or 'no'";
550
1.45k
            break;
551
22.5k
        case XML_ERR_DOCUMENT_EMPTY:
552
22.5k
            errmsg = "Document is empty";
553
22.5k
            break;
554
94.0k
        case XML_ERR_DOCUMENT_END:
555
94.0k
            errmsg = "Extra content at the end of the document";
556
94.0k
            break;
557
1.71M
        case XML_ERR_NOT_WELL_BALANCED:
558
1.71M
            errmsg = "chunk is not well balanced";
559
1.71M
            break;
560
0
        case XML_ERR_EXTRA_CONTENT:
561
0
            errmsg = "extra content at the end of well balanced chunk";
562
0
            break;
563
25.9k
        case XML_ERR_VERSION_MISSING:
564
25.9k
            errmsg = "Malformed declaration expecting version";
565
25.9k
            break;
566
3
        case XML_ERR_NAME_TOO_LONG:
567
3
            errmsg = "Name too long";
568
3
            break;
569
#if 0
570
        case:
571
            errmsg = "";
572
            break;
573
#endif
574
28.9k
        default:
575
28.9k
            errmsg = "Unregistered error message";
576
6.87M
    }
577
6.87M
    if (ctxt != NULL)
578
6.87M
  ctxt->errNo = error;
579
6.87M
    if (info == NULL) {
580
6.63M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581
6.63M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
582
6.63M
                        errmsg);
583
6.63M
    } else {
584
234k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
585
234k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
586
234k
                        errmsg, info);
587
234k
    }
588
6.87M
    if (ctxt != NULL) {
589
6.87M
  ctxt->wellFormed = 0;
590
6.87M
  if (ctxt->recovery == 0)
591
5.69M
      ctxt->disableSAX = 1;
592
6.87M
    }
593
6.87M
}
594
595
/**
596
 * xmlFatalErrMsg:
597
 * @ctxt:  an XML parser context
598
 * @error:  the error number
599
 * @msg:  the error message
600
 *
601
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
602
 */
603
static void LIBXML_ATTR_FORMAT(3,0)
604
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
605
               const char *msg)
606
14.7M
{
607
14.7M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
608
14.7M
        (ctxt->instate == XML_PARSER_EOF))
609
0
  return;
610
14.7M
    if (ctxt != NULL)
611
14.7M
  ctxt->errNo = error;
612
14.7M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
613
14.7M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
614
14.7M
    if (ctxt != NULL) {
615
14.7M
  ctxt->wellFormed = 0;
616
14.7M
  if (ctxt->recovery == 0)
617
13.0M
      ctxt->disableSAX = 1;
618
14.7M
    }
619
14.7M
}
620
621
/**
622
 * xmlWarningMsg:
623
 * @ctxt:  an XML parser context
624
 * @error:  the error number
625
 * @msg:  the error message
626
 * @str1:  extra data
627
 * @str2:  extra data
628
 *
629
 * Handle a warning.
630
 */
631
static void LIBXML_ATTR_FORMAT(3,0)
632
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633
              const char *msg, const xmlChar *str1, const xmlChar *str2)
634
1.33M
{
635
1.33M
    xmlStructuredErrorFunc schannel = NULL;
636
637
1.33M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638
1.33M
        (ctxt->instate == XML_PARSER_EOF))
639
0
  return;
640
1.33M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
641
1.33M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
642
751k
        schannel = ctxt->sax->serror;
643
1.33M
    if (ctxt != NULL) {
644
1.33M
        __xmlRaiseError(schannel,
645
1.33M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
646
1.33M
                    ctxt->userData,
647
1.33M
                    ctxt, NULL, XML_FROM_PARSER, error,
648
1.33M
                    XML_ERR_WARNING, NULL, 0,
649
1.33M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
650
1.33M
        msg, (const char *) str1, (const char *) str2);
651
1.33M
    } else {
652
0
        __xmlRaiseError(schannel, NULL, NULL,
653
0
                    ctxt, NULL, XML_FROM_PARSER, error,
654
0
                    XML_ERR_WARNING, NULL, 0,
655
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
656
0
        msg, (const char *) str1, (const char *) str2);
657
0
    }
658
1.33M
}
659
660
/**
661
 * xmlValidityError:
662
 * @ctxt:  an XML parser context
663
 * @error:  the error number
664
 * @msg:  the error message
665
 * @str1:  extra data
666
 *
667
 * Handle a validity error.
668
 */
669
static void LIBXML_ATTR_FORMAT(3,0)
670
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
671
              const char *msg, const xmlChar *str1, const xmlChar *str2)
672
21.6k
{
673
21.6k
    xmlStructuredErrorFunc schannel = NULL;
674
675
21.6k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676
21.6k
        (ctxt->instate == XML_PARSER_EOF))
677
0
  return;
678
21.6k
    if (ctxt != NULL) {
679
21.6k
  ctxt->errNo = error;
680
21.6k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
681
10.7k
      schannel = ctxt->sax->serror;
682
21.6k
    }
683
21.6k
    if (ctxt != NULL) {
684
21.6k
        __xmlRaiseError(schannel,
685
21.6k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
686
21.6k
                    ctxt, NULL, XML_FROM_DTD, error,
687
21.6k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
688
21.6k
        (const char *) str2, NULL, 0, 0,
689
21.6k
        msg, (const char *) str1, (const char *) str2);
690
21.6k
  ctxt->valid = 0;
691
21.6k
    } else {
692
0
        __xmlRaiseError(schannel, NULL, NULL,
693
0
                    ctxt, NULL, XML_FROM_DTD, error,
694
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
695
0
        (const char *) str2, NULL, 0, 0,
696
0
        msg, (const char *) str1, (const char *) str2);
697
0
    }
698
21.6k
}
699
700
/**
701
 * xmlFatalErrMsgInt:
702
 * @ctxt:  an XML parser context
703
 * @error:  the error number
704
 * @msg:  the error message
705
 * @val:  an integer value
706
 *
707
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
708
 */
709
static void LIBXML_ATTR_FORMAT(3,0)
710
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
711
                  const char *msg, int val)
712
4.28M
{
713
4.28M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714
4.28M
        (ctxt->instate == XML_PARSER_EOF))
715
0
  return;
716
4.28M
    if (ctxt != NULL)
717
4.28M
  ctxt->errNo = error;
718
4.28M
    __xmlRaiseError(NULL, NULL, NULL,
719
4.28M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
720
4.28M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
721
4.28M
    if (ctxt != NULL) {
722
4.28M
  ctxt->wellFormed = 0;
723
4.28M
  if (ctxt->recovery == 0)
724
3.77M
      ctxt->disableSAX = 1;
725
4.28M
    }
726
4.28M
}
727
728
/**
729
 * xmlFatalErrMsgStrIntStr:
730
 * @ctxt:  an XML parser context
731
 * @error:  the error number
732
 * @msg:  the error message
733
 * @str1:  an string info
734
 * @val:  an integer value
735
 * @str2:  an string info
736
 *
737
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
738
 */
739
static void LIBXML_ATTR_FORMAT(3,0)
740
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
741
                  const char *msg, const xmlChar *str1, int val,
742
      const xmlChar *str2)
743
5.55M
{
744
5.55M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
745
5.55M
        (ctxt->instate == XML_PARSER_EOF))
746
0
  return;
747
5.55M
    if (ctxt != NULL)
748
5.55M
  ctxt->errNo = error;
749
5.55M
    __xmlRaiseError(NULL, NULL, NULL,
750
5.55M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
751
5.55M
                    NULL, 0, (const char *) str1, (const char *) str2,
752
5.55M
        NULL, val, 0, msg, str1, val, str2);
753
5.55M
    if (ctxt != NULL) {
754
5.55M
  ctxt->wellFormed = 0;
755
5.55M
  if (ctxt->recovery == 0)
756
5.25M
      ctxt->disableSAX = 1;
757
5.55M
    }
758
5.55M
}
759
760
/**
761
 * xmlFatalErrMsgStr:
762
 * @ctxt:  an XML parser context
763
 * @error:  the error number
764
 * @msg:  the error message
765
 * @val:  a string value
766
 *
767
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
768
 */
769
static void LIBXML_ATTR_FORMAT(3,0)
770
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
771
                  const char *msg, const xmlChar * val)
772
5.33M
{
773
5.33M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
774
5.33M
        (ctxt->instate == XML_PARSER_EOF))
775
0
  return;
776
5.33M
    if (ctxt != NULL)
777
5.33M
  ctxt->errNo = error;
778
5.33M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
779
5.33M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
780
5.33M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
781
5.33M
                    val);
782
5.33M
    if (ctxt != NULL) {
783
5.33M
  ctxt->wellFormed = 0;
784
5.33M
  if (ctxt->recovery == 0)
785
4.70M
      ctxt->disableSAX = 1;
786
5.33M
    }
787
5.33M
}
788
789
/**
790
 * xmlErrMsgStr:
791
 * @ctxt:  an XML parser context
792
 * @error:  the error number
793
 * @msg:  the error message
794
 * @val:  a string value
795
 *
796
 * Handle a non fatal parser error
797
 */
798
static void LIBXML_ATTR_FORMAT(3,0)
799
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
800
                  const char *msg, const xmlChar * val)
801
55.6k
{
802
55.6k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
803
55.6k
        (ctxt->instate == XML_PARSER_EOF))
804
0
  return;
805
55.6k
    if (ctxt != NULL)
806
55.6k
  ctxt->errNo = error;
807
55.6k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
808
55.6k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
809
55.6k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
810
55.6k
                    val);
811
55.6k
}
812
813
/**
814
 * xmlNsErr:
815
 * @ctxt:  an XML parser context
816
 * @error:  the error number
817
 * @msg:  the message
818
 * @info1:  extra information string
819
 * @info2:  extra information string
820
 *
821
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
822
 */
823
static void LIBXML_ATTR_FORMAT(3,0)
824
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
825
         const char *msg,
826
         const xmlChar * info1, const xmlChar * info2,
827
         const xmlChar * info3)
828
492k
{
829
492k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
830
492k
        (ctxt->instate == XML_PARSER_EOF))
831
0
  return;
832
492k
    if (ctxt != NULL)
833
492k
  ctxt->errNo = error;
834
492k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
835
492k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
836
492k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
837
492k
                    info1, info2, info3);
838
492k
    if (ctxt != NULL)
839
492k
  ctxt->nsWellFormed = 0;
840
492k
}
841
842
/**
843
 * xmlNsWarn
844
 * @ctxt:  an XML parser context
845
 * @error:  the error number
846
 * @msg:  the message
847
 * @info1:  extra information string
848
 * @info2:  extra information string
849
 *
850
 * Handle a namespace warning error
851
 */
852
static void LIBXML_ATTR_FORMAT(3,0)
853
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
854
         const char *msg,
855
         const xmlChar * info1, const xmlChar * info2,
856
         const xmlChar * info3)
857
6.37k
{
858
6.37k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
859
6.37k
        (ctxt->instate == XML_PARSER_EOF))
860
0
  return;
861
6.37k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
862
6.37k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
863
6.37k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
864
6.37k
                    info1, info2, info3);
865
6.37k
}
866
867
/************************************************************************
868
 *                  *
869
 *    Library wide options          *
870
 *                  *
871
 ************************************************************************/
872
873
/**
874
  * xmlHasFeature:
875
  * @feature: the feature to be examined
876
  *
877
  * Examines if the library has been compiled with a given feature.
878
  *
879
  * Returns a non-zero value if the feature exist, otherwise zero.
880
  * Returns zero (0) if the feature does not exist or an unknown
881
  * unknown feature is requested, non-zero otherwise.
882
  */
883
int
884
xmlHasFeature(xmlFeature feature)
885
0
{
886
0
    switch (feature) {
887
0
  case XML_WITH_THREAD:
888
0
#ifdef LIBXML_THREAD_ENABLED
889
0
      return(1);
890
#else
891
      return(0);
892
#endif
893
0
        case XML_WITH_TREE:
894
0
#ifdef LIBXML_TREE_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_OUTPUT:
900
0
#ifdef LIBXML_OUTPUT_ENABLED
901
0
            return(1);
902
#else
903
            return(0);
904
#endif
905
0
        case XML_WITH_PUSH:
906
0
#ifdef LIBXML_PUSH_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_READER:
912
0
#ifdef LIBXML_READER_ENABLED
913
0
            return(1);
914
#else
915
            return(0);
916
#endif
917
0
        case XML_WITH_PATTERN:
918
0
#ifdef LIBXML_PATTERN_ENABLED
919
0
            return(1);
920
#else
921
            return(0);
922
#endif
923
0
        case XML_WITH_WRITER:
924
0
#ifdef LIBXML_WRITER_ENABLED
925
0
            return(1);
926
#else
927
            return(0);
928
#endif
929
0
        case XML_WITH_SAX1:
930
0
#ifdef LIBXML_SAX1_ENABLED
931
0
            return(1);
932
#else
933
            return(0);
934
#endif
935
0
        case XML_WITH_FTP:
936
#ifdef LIBXML_FTP_ENABLED
937
            return(1);
938
#else
939
0
            return(0);
940
0
#endif
941
0
        case XML_WITH_HTTP:
942
#ifdef LIBXML_HTTP_ENABLED
943
            return(1);
944
#else
945
0
            return(0);
946
0
#endif
947
0
        case XML_WITH_VALID:
948
0
#ifdef LIBXML_VALID_ENABLED
949
0
            return(1);
950
#else
951
            return(0);
952
#endif
953
0
        case XML_WITH_HTML:
954
0
#ifdef LIBXML_HTML_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_LEGACY:
960
#ifdef LIBXML_LEGACY_ENABLED
961
            return(1);
962
#else
963
0
            return(0);
964
0
#endif
965
0
        case XML_WITH_C14N:
966
0
#ifdef LIBXML_C14N_ENABLED
967
0
            return(1);
968
#else
969
            return(0);
970
#endif
971
0
        case XML_WITH_CATALOG:
972
0
#ifdef LIBXML_CATALOG_ENABLED
973
0
            return(1);
974
#else
975
            return(0);
976
#endif
977
0
        case XML_WITH_XPATH:
978
0
#ifdef LIBXML_XPATH_ENABLED
979
0
            return(1);
980
#else
981
            return(0);
982
#endif
983
0
        case XML_WITH_XPTR:
984
0
#ifdef LIBXML_XPTR_ENABLED
985
0
            return(1);
986
#else
987
            return(0);
988
#endif
989
0
        case XML_WITH_XINCLUDE:
990
0
#ifdef LIBXML_XINCLUDE_ENABLED
991
0
            return(1);
992
#else
993
            return(0);
994
#endif
995
0
        case XML_WITH_ICONV:
996
0
#ifdef LIBXML_ICONV_ENABLED
997
0
            return(1);
998
#else
999
            return(0);
1000
#endif
1001
0
        case XML_WITH_ISO8859X:
1002
0
#ifdef LIBXML_ISO8859X_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_UNICODE:
1008
0
#ifdef LIBXML_UNICODE_ENABLED
1009
0
            return(1);
1010
#else
1011
            return(0);
1012
#endif
1013
0
        case XML_WITH_REGEXP:
1014
0
#ifdef LIBXML_REGEXP_ENABLED
1015
0
            return(1);
1016
#else
1017
            return(0);
1018
#endif
1019
0
        case XML_WITH_AUTOMATA:
1020
0
#ifdef LIBXML_AUTOMATA_ENABLED
1021
0
            return(1);
1022
#else
1023
            return(0);
1024
#endif
1025
0
        case XML_WITH_EXPR:
1026
#ifdef LIBXML_EXPR_ENABLED
1027
            return(1);
1028
#else
1029
0
            return(0);
1030
0
#endif
1031
0
        case XML_WITH_SCHEMAS:
1032
0
#ifdef LIBXML_SCHEMAS_ENABLED
1033
0
            return(1);
1034
#else
1035
            return(0);
1036
#endif
1037
0
        case XML_WITH_SCHEMATRON:
1038
0
#ifdef LIBXML_SCHEMATRON_ENABLED
1039
0
            return(1);
1040
#else
1041
            return(0);
1042
#endif
1043
0
        case XML_WITH_MODULES:
1044
0
#ifdef LIBXML_MODULES_ENABLED
1045
0
            return(1);
1046
#else
1047
            return(0);
1048
#endif
1049
0
        case XML_WITH_DEBUG:
1050
#ifdef LIBXML_DEBUG_ENABLED
1051
            return(1);
1052
#else
1053
0
            return(0);
1054
0
#endif
1055
0
        case XML_WITH_DEBUG_MEM:
1056
#ifdef DEBUG_MEMORY_LOCATION
1057
            return(1);
1058
#else
1059
0
            return(0);
1060
0
#endif
1061
0
        case XML_WITH_DEBUG_RUN:
1062
0
            return(0);
1063
0
        case XML_WITH_ZLIB:
1064
0
#ifdef LIBXML_ZLIB_ENABLED
1065
0
            return(1);
1066
#else
1067
            return(0);
1068
#endif
1069
0
        case XML_WITH_LZMA:
1070
0
#ifdef LIBXML_LZMA_ENABLED
1071
0
            return(1);
1072
#else
1073
            return(0);
1074
#endif
1075
0
        case XML_WITH_ICU:
1076
#ifdef LIBXML_ICU_ENABLED
1077
            return(1);
1078
#else
1079
0
            return(0);
1080
0
#endif
1081
0
        default:
1082
0
      break;
1083
0
     }
1084
0
     return(0);
1085
0
}
1086
1087
/************************************************************************
1088
 *                  *
1089
 *    SAX2 defaulted attributes handling      *
1090
 *                  *
1091
 ************************************************************************/
1092
1093
/**
1094
 * xmlDetectSAX2:
1095
 * @ctxt:  an XML parser context
1096
 *
1097
 * Do the SAX2 detection and specific initialization
1098
 */
1099
static void
1100
3.36M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1101
3.36M
    xmlSAXHandlerPtr sax;
1102
1103
    /* Avoid unused variable warning if features are disabled. */
1104
3.36M
    (void) sax;
1105
1106
3.36M
    if (ctxt == NULL) return;
1107
3.36M
    sax = ctxt->sax;
1108
3.36M
#ifdef LIBXML_SAX1_ENABLED
1109
3.36M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1110
3.36M
        ((sax->startElementNs != NULL) ||
1111
700k
         (sax->endElementNs != NULL) ||
1112
700k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1113
700k
        ctxt->sax2 = 1;
1114
#else
1115
    ctxt->sax2 = 1;
1116
#endif /* LIBXML_SAX1_ENABLED */
1117
1118
3.36M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1119
3.36M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1120
3.36M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1121
3.36M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1122
3.36M
    (ctxt->str_xml_ns == NULL)) {
1123
0
        xmlErrMemory(ctxt, NULL);
1124
0
    }
1125
3.36M
}
1126
1127
typedef struct _xmlDefAttrs xmlDefAttrs;
1128
typedef xmlDefAttrs *xmlDefAttrsPtr;
1129
struct _xmlDefAttrs {
1130
    int nbAttrs;  /* number of defaulted attributes on that element */
1131
    int maxAttrs;       /* the size of the array */
1132
#if __STDC_VERSION__ >= 199901L
1133
    /* Using a C99 flexible array member avoids UBSan errors. */
1134
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1135
#else
1136
    const xmlChar *values[5];
1137
#endif
1138
};
1139
1140
/**
1141
 * xmlAttrNormalizeSpace:
1142
 * @src: the source string
1143
 * @dst: the target string
1144
 *
1145
 * Normalize the space in non CDATA attribute values:
1146
 * If the attribute type is not CDATA, then the XML processor MUST further
1147
 * process the normalized attribute value by discarding any leading and
1148
 * trailing space (#x20) characters, and by replacing sequences of space
1149
 * (#x20) characters by a single space (#x20) character.
1150
 * Note that the size of dst need to be at least src, and if one doesn't need
1151
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1152
 * passing src as dst is just fine.
1153
 *
1154
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1155
 *         is needed.
1156
 */
1157
static xmlChar *
1158
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1159
115k
{
1160
115k
    if ((src == NULL) || (dst == NULL))
1161
0
        return(NULL);
1162
1163
141k
    while (*src == 0x20) src++;
1164
1.22M
    while (*src != 0) {
1165
1.10M
  if (*src == 0x20) {
1166
255k
      while (*src == 0x20) src++;
1167
76.8k
      if (*src != 0)
1168
67.4k
    *dst++ = 0x20;
1169
1.03M
  } else {
1170
1.03M
      *dst++ = *src++;
1171
1.03M
  }
1172
1.10M
    }
1173
115k
    *dst = 0;
1174
115k
    if (dst == src)
1175
102k
       return(NULL);
1176
12.9k
    return(dst);
1177
115k
}
1178
1179
/**
1180
 * xmlAttrNormalizeSpace2:
1181
 * @src: the source string
1182
 *
1183
 * Normalize the space in non CDATA attribute values, a slightly more complex
1184
 * front end to avoid allocation problems when running on attribute values
1185
 * coming from the input.
1186
 *
1187
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1188
 *         is needed.
1189
 */
1190
static const xmlChar *
1191
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1192
27.1k
{
1193
27.1k
    int i;
1194
27.1k
    int remove_head = 0;
1195
27.1k
    int need_realloc = 0;
1196
27.1k
    const xmlChar *cur;
1197
1198
27.1k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1199
0
        return(NULL);
1200
27.1k
    i = *len;
1201
27.1k
    if (i <= 0)
1202
1.25k
        return(NULL);
1203
1204
25.8k
    cur = src;
1205
35.9k
    while (*cur == 0x20) {
1206
10.0k
        cur++;
1207
10.0k
  remove_head++;
1208
10.0k
    }
1209
618k
    while (*cur != 0) {
1210
596k
  if (*cur == 0x20) {
1211
39.3k
      cur++;
1212
39.3k
      if ((*cur == 0x20) || (*cur == 0)) {
1213
3.43k
          need_realloc = 1;
1214
3.43k
    break;
1215
3.43k
      }
1216
39.3k
  } else
1217
556k
      cur++;
1218
596k
    }
1219
25.8k
    if (need_realloc) {
1220
3.43k
        xmlChar *ret;
1221
1222
3.43k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1223
3.43k
  if (ret == NULL) {
1224
0
      xmlErrMemory(ctxt, NULL);
1225
0
      return(NULL);
1226
0
  }
1227
3.43k
  xmlAttrNormalizeSpace(ret, ret);
1228
3.43k
  *len = strlen((const char *)ret);
1229
3.43k
        return(ret);
1230
22.4k
    } else if (remove_head) {
1231
491
        *len -= remove_head;
1232
491
        memmove(src, src + remove_head, 1 + *len);
1233
491
  return(src);
1234
491
    }
1235
21.9k
    return(NULL);
1236
25.8k
}
1237
1238
/**
1239
 * xmlAddDefAttrs:
1240
 * @ctxt:  an XML parser context
1241
 * @fullname:  the element fullname
1242
 * @fullattr:  the attribute fullname
1243
 * @value:  the attribute value
1244
 *
1245
 * Add a defaulted attribute for an element
1246
 */
1247
static void
1248
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1249
               const xmlChar *fullname,
1250
               const xmlChar *fullattr,
1251
619k
               const xmlChar *value) {
1252
619k
    xmlDefAttrsPtr defaults;
1253
619k
    int len;
1254
619k
    const xmlChar *name;
1255
619k
    const xmlChar *prefix;
1256
1257
    /*
1258
     * Allows to detect attribute redefinitions
1259
     */
1260
619k
    if (ctxt->attsSpecial != NULL) {
1261
604k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1262
524k
      return;
1263
604k
    }
1264
1265
94.9k
    if (ctxt->attsDefault == NULL) {
1266
21.9k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1267
21.9k
  if (ctxt->attsDefault == NULL)
1268
0
      goto mem_error;
1269
21.9k
    }
1270
1271
    /*
1272
     * split the element name into prefix:localname , the string found
1273
     * are within the DTD and then not associated to namespace names.
1274
     */
1275
94.9k
    name = xmlSplitQName3(fullname, &len);
1276
94.9k
    if (name == NULL) {
1277
89.2k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1278
89.2k
  prefix = NULL;
1279
89.2k
    } else {
1280
5.68k
        name = xmlDictLookup(ctxt->dict, name, -1);
1281
5.68k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1282
5.68k
    }
1283
1284
    /*
1285
     * make sure there is some storage
1286
     */
1287
94.9k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1288
94.9k
    if (defaults == NULL) {
1289
56.7k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1290
56.7k
                     (4 * 5) * sizeof(const xmlChar *));
1291
56.7k
  if (defaults == NULL)
1292
0
      goto mem_error;
1293
56.7k
  defaults->nbAttrs = 0;
1294
56.7k
  defaults->maxAttrs = 4;
1295
56.7k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1296
56.7k
                          defaults, NULL) < 0) {
1297
0
      xmlFree(defaults);
1298
0
      goto mem_error;
1299
0
  }
1300
56.7k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1301
2.53k
        xmlDefAttrsPtr temp;
1302
1303
2.53k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1304
2.53k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1305
2.53k
  if (temp == NULL)
1306
0
      goto mem_error;
1307
2.53k
  defaults = temp;
1308
2.53k
  defaults->maxAttrs *= 2;
1309
2.53k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1310
2.53k
                          defaults, NULL) < 0) {
1311
0
      xmlFree(defaults);
1312
0
      goto mem_error;
1313
0
  }
1314
2.53k
    }
1315
1316
    /*
1317
     * Split the element name into prefix:localname , the string found
1318
     * are within the DTD and hen not associated to namespace names.
1319
     */
1320
94.9k
    name = xmlSplitQName3(fullattr, &len);
1321
94.9k
    if (name == NULL) {
1322
85.8k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1323
85.8k
  prefix = NULL;
1324
85.8k
    } else {
1325
9.08k
        name = xmlDictLookup(ctxt->dict, name, -1);
1326
9.08k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1327
9.08k
    }
1328
1329
94.9k
    defaults->values[5 * defaults->nbAttrs] = name;
1330
94.9k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1331
    /* intern the string and precompute the end */
1332
94.9k
    len = xmlStrlen(value);
1333
94.9k
    value = xmlDictLookup(ctxt->dict, value, len);
1334
94.9k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1335
94.9k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1336
94.9k
    if (ctxt->external)
1337
2.85k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1338
92.1k
    else
1339
92.1k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1340
94.9k
    defaults->nbAttrs++;
1341
1342
94.9k
    return;
1343
1344
0
mem_error:
1345
0
    xmlErrMemory(ctxt, NULL);
1346
0
    return;
1347
94.9k
}
1348
1349
/**
1350
 * xmlAddSpecialAttr:
1351
 * @ctxt:  an XML parser context
1352
 * @fullname:  the element fullname
1353
 * @fullattr:  the attribute fullname
1354
 * @type:  the attribute type
1355
 *
1356
 * Register this attribute type
1357
 */
1358
static void
1359
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1360
      const xmlChar *fullname,
1361
      const xmlChar *fullattr,
1362
      int type)
1363
3.68M
{
1364
3.68M
    if (ctxt->attsSpecial == NULL) {
1365
43.8k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1366
43.8k
  if (ctxt->attsSpecial == NULL)
1367
0
      goto mem_error;
1368
43.8k
    }
1369
1370
3.68M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1371
2.36M
        return;
1372
1373
1.31M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1374
1.31M
                     (void *) (ptrdiff_t) type);
1375
1.31M
    return;
1376
1377
0
mem_error:
1378
0
    xmlErrMemory(ctxt, NULL);
1379
0
    return;
1380
3.68M
}
1381
1382
/**
1383
 * xmlCleanSpecialAttrCallback:
1384
 *
1385
 * Removes CDATA attributes from the special attribute table
1386
 */
1387
static void
1388
xmlCleanSpecialAttrCallback(void *payload, void *data,
1389
                            const xmlChar *fullname, const xmlChar *fullattr,
1390
1.31M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1391
1.31M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1392
1393
1.31M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1394
608k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1395
608k
    }
1396
1.31M
}
1397
1398
/**
1399
 * xmlCleanSpecialAttr:
1400
 * @ctxt:  an XML parser context
1401
 *
1402
 * Trim the list of attributes defined to remove all those of type
1403
 * CDATA as they are not special. This call should be done when finishing
1404
 * to parse the DTD and before starting to parse the document root.
1405
 */
1406
static void
1407
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1408
242k
{
1409
242k
    if (ctxt->attsSpecial == NULL)
1410
198k
        return;
1411
1412
43.4k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1413
1414
43.4k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1415
9.65k
        xmlHashFree(ctxt->attsSpecial, NULL);
1416
9.65k
        ctxt->attsSpecial = NULL;
1417
9.65k
    }
1418
43.4k
    return;
1419
242k
}
1420
1421
/**
1422
 * xmlCheckLanguageID:
1423
 * @lang:  pointer to the string value
1424
 *
1425
 * Checks that the value conforms to the LanguageID production:
1426
 *
1427
 * NOTE: this is somewhat deprecated, those productions were removed from
1428
 *       the XML Second edition.
1429
 *
1430
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1431
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1432
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1433
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1434
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1435
 * [38] Subcode ::= ([a-z] | [A-Z])+
1436
 *
1437
 * The current REC reference the successors of RFC 1766, currently 5646
1438
 *
1439
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1440
 * langtag       = language
1441
 *                 ["-" script]
1442
 *                 ["-" region]
1443
 *                 *("-" variant)
1444
 *                 *("-" extension)
1445
 *                 ["-" privateuse]
1446
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1447
 *                 ["-" extlang]       ; sometimes followed by
1448
 *                                     ; extended language subtags
1449
 *               / 4ALPHA              ; or reserved for future use
1450
 *               / 5*8ALPHA            ; or registered language subtag
1451
 *
1452
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1453
 *                 *2("-" 3ALPHA)      ; permanently reserved
1454
 *
1455
 * script        = 4ALPHA              ; ISO 15924 code
1456
 *
1457
 * region        = 2ALPHA              ; ISO 3166-1 code
1458
 *               / 3DIGIT              ; UN M.49 code
1459
 *
1460
 * variant       = 5*8alphanum         ; registered variants
1461
 *               / (DIGIT 3alphanum)
1462
 *
1463
 * extension     = singleton 1*("-" (2*8alphanum))
1464
 *
1465
 *                                     ; Single alphanumerics
1466
 *                                     ; "x" reserved for private use
1467
 * singleton     = DIGIT               ; 0 - 9
1468
 *               / %x41-57             ; A - W
1469
 *               / %x59-5A             ; Y - Z
1470
 *               / %x61-77             ; a - w
1471
 *               / %x79-7A             ; y - z
1472
 *
1473
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1474
 * The parser below doesn't try to cope with extension or privateuse
1475
 * that could be added but that's not interoperable anyway
1476
 *
1477
 * Returns 1 if correct 0 otherwise
1478
 **/
1479
int
1480
xmlCheckLanguageID(const xmlChar * lang)
1481
16.5k
{
1482
16.5k
    const xmlChar *cur = lang, *nxt;
1483
1484
16.5k
    if (cur == NULL)
1485
683
        return (0);
1486
15.8k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1487
15.8k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1488
15.8k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1489
15.8k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1490
        /*
1491
         * Still allow IANA code and user code which were coming
1492
         * from the previous version of the XML-1.0 specification
1493
         * it's deprecated but we should not fail
1494
         */
1495
1.10k
        cur += 2;
1496
8.51k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1497
8.51k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1498
7.41k
            cur++;
1499
1.10k
        return(cur[0] == 0);
1500
1.10k
    }
1501
14.7k
    nxt = cur;
1502
61.6k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1503
61.6k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1504
46.9k
           nxt++;
1505
14.7k
    if (nxt - cur >= 4) {
1506
        /*
1507
         * Reserved
1508
         */
1509
1.54k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1510
1.33k
            return(0);
1511
214
        return(1);
1512
1.54k
    }
1513
13.1k
    if (nxt - cur < 2)
1514
1.16k
        return(0);
1515
    /* we got an ISO 639 code */
1516
12.0k
    if (nxt[0] == 0)
1517
4.14k
        return(1);
1518
7.88k
    if (nxt[0] != '-')
1519
623
        return(0);
1520
1521
7.26k
    nxt++;
1522
7.26k
    cur = nxt;
1523
    /* now we can have extlang or script or region or variant */
1524
7.26k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1525
765
        goto region_m49;
1526
1527
33.7k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1528
33.7k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1529
27.2k
           nxt++;
1530
6.49k
    if (nxt - cur == 4)
1531
1.50k
        goto script;
1532
4.99k
    if (nxt - cur == 2)
1533
1.05k
        goto region;
1534
3.93k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535
787
        goto variant;
1536
3.14k
    if (nxt - cur != 3)
1537
801
        return(0);
1538
    /* we parsed an extlang */
1539
2.34k
    if (nxt[0] == 0)
1540
243
        return(1);
1541
2.10k
    if (nxt[0] != '-')
1542
361
        return(0);
1543
1544
1.74k
    nxt++;
1545
1.74k
    cur = nxt;
1546
    /* now we can have script or region or variant */
1547
1.74k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1548
202
        goto region_m49;
1549
1550
14.1k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1551
14.1k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1552
12.6k
           nxt++;
1553
1.54k
    if (nxt - cur == 2)
1554
576
        goto region;
1555
965
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1556
284
        goto variant;
1557
681
    if (nxt - cur != 4)
1558
405
        return(0);
1559
    /* we parsed a script */
1560
1.78k
script:
1561
1.78k
    if (nxt[0] == 0)
1562
222
        return(1);
1563
1.55k
    if (nxt[0] != '-')
1564
425
        return(0);
1565
1566
1.13k
    nxt++;
1567
1.13k
    cur = nxt;
1568
    /* now we can have region or variant */
1569
1.13k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1570
218
        goto region_m49;
1571
1572
9.43k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1573
9.43k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1574
8.52k
           nxt++;
1575
1576
916
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1577
274
        goto variant;
1578
642
    if (nxt - cur != 2)
1579
382
        return(0);
1580
    /* we parsed a region */
1581
2.41k
region:
1582
2.41k
    if (nxt[0] == 0)
1583
325
        return(1);
1584
2.08k
    if (nxt[0] != '-')
1585
927
        return(0);
1586
1587
1.16k
    nxt++;
1588
1.16k
    cur = nxt;
1589
    /* now we can just have a variant */
1590
11.7k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1591
11.7k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1592
10.5k
           nxt++;
1593
1594
1.16k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1595
758
        return(0);
1596
1597
    /* we parsed a variant */
1598
1.74k
variant:
1599
1.74k
    if (nxt[0] == 0)
1600
329
        return(1);
1601
1.42k
    if (nxt[0] != '-')
1602
1.01k
        return(0);
1603
    /* extensions and private use subtags not checked */
1604
409
    return (1);
1605
1606
1.18k
region_m49:
1607
1.18k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1608
1.18k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1609
523
        nxt += 3;
1610
523
        goto region;
1611
523
    }
1612
662
    return(0);
1613
1.18k
}
1614
1615
/************************************************************************
1616
 *                  *
1617
 *    Parser stacks related functions and macros    *
1618
 *                  *
1619
 ************************************************************************/
1620
1621
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1622
                                            const xmlChar ** str);
1623
1624
#ifdef SAX2
1625
/**
1626
 * nsPush:
1627
 * @ctxt:  an XML parser context
1628
 * @prefix:  the namespace prefix or NULL
1629
 * @URL:  the namespace name
1630
 *
1631
 * Pushes a new parser namespace on top of the ns stack
1632
 *
1633
 * Returns -1 in case of error, -2 if the namespace should be discarded
1634
 *     and the index in the stack otherwise.
1635
 */
1636
static int
1637
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1638
1.38M
{
1639
1.38M
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1640
117k
        int i;
1641
156k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1642
54.7k
      if (ctxt->nsTab[i] == prefix) {
1643
    /* in scope */
1644
15.8k
          if (ctxt->nsTab[i + 1] == URL)
1645
9.50k
        return(-2);
1646
    /* out of scope keep it */
1647
6.32k
    break;
1648
15.8k
      }
1649
54.7k
  }
1650
117k
    }
1651
1.37M
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1652
143k
  ctxt->nsMax = 10;
1653
143k
  ctxt->nsNr = 0;
1654
143k
  ctxt->nsTab = (const xmlChar **)
1655
143k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1656
143k
  if (ctxt->nsTab == NULL) {
1657
0
      xmlErrMemory(ctxt, NULL);
1658
0
      ctxt->nsMax = 0;
1659
0
            return (-1);
1660
0
  }
1661
1.23M
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1662
28.7k
        const xmlChar ** tmp;
1663
28.7k
        ctxt->nsMax *= 2;
1664
28.7k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1665
28.7k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1666
28.7k
        if (tmp == NULL) {
1667
0
            xmlErrMemory(ctxt, NULL);
1668
0
      ctxt->nsMax /= 2;
1669
0
            return (-1);
1670
0
        }
1671
28.7k
  ctxt->nsTab = tmp;
1672
28.7k
    }
1673
1.37M
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1674
1.37M
    ctxt->nsTab[ctxt->nsNr++] = URL;
1675
1.37M
    return (ctxt->nsNr);
1676
1.37M
}
1677
/**
1678
 * nsPop:
1679
 * @ctxt: an XML parser context
1680
 * @nr:  the number to pop
1681
 *
1682
 * Pops the top @nr parser prefix/namespace from the ns stack
1683
 *
1684
 * Returns the number of namespaces removed
1685
 */
1686
static int
1687
nsPop(xmlParserCtxtPtr ctxt, int nr)
1688
73.9k
{
1689
73.9k
    int i;
1690
1691
73.9k
    if (ctxt->nsTab == NULL) return(0);
1692
73.9k
    if (ctxt->nsNr < nr) {
1693
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1694
0
        nr = ctxt->nsNr;
1695
0
    }
1696
73.9k
    if (ctxt->nsNr <= 0)
1697
0
        return (0);
1698
1699
237k
    for (i = 0;i < nr;i++) {
1700
163k
         ctxt->nsNr--;
1701
163k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1702
163k
    }
1703
73.9k
    return(nr);
1704
73.9k
}
1705
#endif
1706
1707
static int
1708
219k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1709
219k
    const xmlChar **atts;
1710
219k
    int *attallocs;
1711
219k
    int maxatts;
1712
1713
219k
    if (ctxt->atts == NULL) {
1714
219k
  maxatts = 55; /* allow for 10 attrs by default */
1715
219k
  atts = (const xmlChar **)
1716
219k
         xmlMalloc(maxatts * sizeof(xmlChar *));
1717
219k
  if (atts == NULL) goto mem_error;
1718
219k
  ctxt->atts = atts;
1719
219k
  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1720
219k
  if (attallocs == NULL) goto mem_error;
1721
219k
  ctxt->attallocs = attallocs;
1722
219k
  ctxt->maxatts = maxatts;
1723
219k
    } else if (nr + 5 > ctxt->maxatts) {
1724
311
  maxatts = (nr + 5) * 2;
1725
311
  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1726
311
             maxatts * sizeof(const xmlChar *));
1727
311
  if (atts == NULL) goto mem_error;
1728
311
  ctxt->atts = atts;
1729
311
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1730
311
                               (maxatts / 5) * sizeof(int));
1731
311
  if (attallocs == NULL) goto mem_error;
1732
311
  ctxt->attallocs = attallocs;
1733
311
  ctxt->maxatts = maxatts;
1734
311
    }
1735
219k
    return(ctxt->maxatts);
1736
0
mem_error:
1737
0
    xmlErrMemory(ctxt, NULL);
1738
0
    return(-1);
1739
219k
}
1740
1741
/**
1742
 * inputPush:
1743
 * @ctxt:  an XML parser context
1744
 * @value:  the parser input
1745
 *
1746
 * Pushes a new parser input on top of the input stack
1747
 *
1748
 * Returns -1 in case of error, the index in the stack otherwise
1749
 */
1750
int
1751
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1752
6.93M
{
1753
6.93M
    if ((ctxt == NULL) || (value == NULL))
1754
0
        return(-1);
1755
6.93M
    if (ctxt->inputNr >= ctxt->inputMax) {
1756
17.1k
        ctxt->inputMax *= 2;
1757
17.1k
        ctxt->inputTab =
1758
17.1k
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1759
17.1k
                                             ctxt->inputMax *
1760
17.1k
                                             sizeof(ctxt->inputTab[0]));
1761
17.1k
        if (ctxt->inputTab == NULL) {
1762
0
            xmlErrMemory(ctxt, NULL);
1763
0
      ctxt->inputMax /= 2;
1764
0
            return (-1);
1765
0
        }
1766
17.1k
    }
1767
6.93M
    ctxt->inputTab[ctxt->inputNr] = value;
1768
6.93M
    ctxt->input = value;
1769
6.93M
    return (ctxt->inputNr++);
1770
6.93M
}
1771
/**
1772
 * inputPop:
1773
 * @ctxt: an XML parser context
1774
 *
1775
 * Pops the top parser input from the input stack
1776
 *
1777
 * Returns the input just removed
1778
 */
1779
xmlParserInputPtr
1780
inputPop(xmlParserCtxtPtr ctxt)
1781
13.6M
{
1782
13.6M
    xmlParserInputPtr ret;
1783
1784
13.6M
    if (ctxt == NULL)
1785
0
        return(NULL);
1786
13.6M
    if (ctxt->inputNr <= 0)
1787
6.77M
        return (NULL);
1788
6.90M
    ctxt->inputNr--;
1789
6.90M
    if (ctxt->inputNr > 0)
1790
3.67M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1791
3.23M
    else
1792
3.23M
        ctxt->input = NULL;
1793
6.90M
    ret = ctxt->inputTab[ctxt->inputNr];
1794
6.90M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1795
6.90M
    return (ret);
1796
13.6M
}
1797
/**
1798
 * nodePush:
1799
 * @ctxt:  an XML parser context
1800
 * @value:  the element node
1801
 *
1802
 * Pushes a new element node on top of the node stack
1803
 *
1804
 * Returns -1 in case of error, the index in the stack otherwise
1805
 */
1806
int
1807
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1808
11.1M
{
1809
11.1M
    if (ctxt == NULL) return(0);
1810
11.1M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1811
32.7k
        xmlNodePtr *tmp;
1812
1813
32.7k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1814
32.7k
                                      ctxt->nodeMax * 2 *
1815
32.7k
                                      sizeof(ctxt->nodeTab[0]));
1816
32.7k
        if (tmp == NULL) {
1817
0
            xmlErrMemory(ctxt, NULL);
1818
0
            return (-1);
1819
0
        }
1820
32.7k
        ctxt->nodeTab = tmp;
1821
32.7k
  ctxt->nodeMax *= 2;
1822
32.7k
    }
1823
11.1M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1824
11.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1825
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1826
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1827
0
        xmlParserMaxDepth);
1828
0
  xmlHaltParser(ctxt);
1829
0
  return(-1);
1830
0
    }
1831
11.1M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1832
11.1M
    ctxt->node = value;
1833
11.1M
    return (ctxt->nodeNr++);
1834
11.1M
}
1835
1836
/**
1837
 * nodePop:
1838
 * @ctxt: an XML parser context
1839
 *
1840
 * Pops the top element node from the node stack
1841
 *
1842
 * Returns the node just removed
1843
 */
1844
xmlNodePtr
1845
nodePop(xmlParserCtxtPtr ctxt)
1846
6.81M
{
1847
6.81M
    xmlNodePtr ret;
1848
1849
6.81M
    if (ctxt == NULL) return(NULL);
1850
6.81M
    if (ctxt->nodeNr <= 0)
1851
563k
        return (NULL);
1852
6.24M
    ctxt->nodeNr--;
1853
6.24M
    if (ctxt->nodeNr > 0)
1854
5.39M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1855
849k
    else
1856
849k
        ctxt->node = NULL;
1857
6.24M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1858
6.24M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1859
6.24M
    return (ret);
1860
6.81M
}
1861
1862
/**
1863
 * nameNsPush:
1864
 * @ctxt:  an XML parser context
1865
 * @value:  the element name
1866
 * @prefix:  the element prefix
1867
 * @URI:  the element namespace name
1868
 * @line:  the current line number for error messages
1869
 * @nsNr:  the number of namespaces pushed on the namespace table
1870
 *
1871
 * Pushes a new element name/prefix/URL on top of the name stack
1872
 *
1873
 * Returns -1 in case of error, the index in the stack otherwise
1874
 */
1875
static int
1876
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1877
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1878
17.7M
{
1879
17.7M
    xmlStartTag *tag;
1880
1881
17.7M
    if (ctxt->nameNr >= ctxt->nameMax) {
1882
50.2k
        const xmlChar * *tmp;
1883
50.2k
        xmlStartTag *tmp2;
1884
50.2k
        ctxt->nameMax *= 2;
1885
50.2k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1886
50.2k
                                    ctxt->nameMax *
1887
50.2k
                                    sizeof(ctxt->nameTab[0]));
1888
50.2k
        if (tmp == NULL) {
1889
0
      ctxt->nameMax /= 2;
1890
0
      goto mem_error;
1891
0
        }
1892
50.2k
  ctxt->nameTab = tmp;
1893
50.2k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1894
50.2k
                                    ctxt->nameMax *
1895
50.2k
                                    sizeof(ctxt->pushTab[0]));
1896
50.2k
        if (tmp2 == NULL) {
1897
0
      ctxt->nameMax /= 2;
1898
0
      goto mem_error;
1899
0
        }
1900
50.2k
  ctxt->pushTab = tmp2;
1901
17.6M
    } else if (ctxt->pushTab == NULL) {
1902
2.94M
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1903
2.94M
                                            sizeof(ctxt->pushTab[0]));
1904
2.94M
        if (ctxt->pushTab == NULL)
1905
0
            goto mem_error;
1906
2.94M
    }
1907
17.7M
    ctxt->nameTab[ctxt->nameNr] = value;
1908
17.7M
    ctxt->name = value;
1909
17.7M
    tag = &ctxt->pushTab[ctxt->nameNr];
1910
17.7M
    tag->prefix = prefix;
1911
17.7M
    tag->URI = URI;
1912
17.7M
    tag->line = line;
1913
17.7M
    tag->nsNr = nsNr;
1914
17.7M
    return (ctxt->nameNr++);
1915
0
mem_error:
1916
0
    xmlErrMemory(ctxt, NULL);
1917
0
    return (-1);
1918
17.7M
}
1919
#ifdef LIBXML_PUSH_ENABLED
1920
/**
1921
 * nameNsPop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element/prefix/URI name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
static const xmlChar *
1929
nameNsPop(xmlParserCtxtPtr ctxt)
1930
338k
{
1931
338k
    const xmlChar *ret;
1932
1933
338k
    if (ctxt->nameNr <= 0)
1934
0
        return (NULL);
1935
338k
    ctxt->nameNr--;
1936
338k
    if (ctxt->nameNr > 0)
1937
310k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
28.7k
    else
1939
28.7k
        ctxt->name = NULL;
1940
338k
    ret = ctxt->nameTab[ctxt->nameNr];
1941
338k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
338k
    return (ret);
1943
338k
}
1944
#endif /* LIBXML_PUSH_ENABLED */
1945
1946
/**
1947
 * namePush:
1948
 * @ctxt:  an XML parser context
1949
 * @value:  the element name
1950
 *
1951
 * Pushes a new element name on top of the name stack
1952
 *
1953
 * Returns -1 in case of error, the index in the stack otherwise
1954
 */
1955
int
1956
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1957
0
{
1958
0
    if (ctxt == NULL) return (-1);
1959
1960
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1961
0
        const xmlChar * *tmp;
1962
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1963
0
                                    ctxt->nameMax * 2 *
1964
0
                                    sizeof(ctxt->nameTab[0]));
1965
0
        if (tmp == NULL) {
1966
0
      goto mem_error;
1967
0
        }
1968
0
  ctxt->nameTab = tmp;
1969
0
        ctxt->nameMax *= 2;
1970
0
    }
1971
0
    ctxt->nameTab[ctxt->nameNr] = value;
1972
0
    ctxt->name = value;
1973
0
    return (ctxt->nameNr++);
1974
0
mem_error:
1975
0
    xmlErrMemory(ctxt, NULL);
1976
0
    return (-1);
1977
0
}
1978
/**
1979
 * namePop:
1980
 * @ctxt: an XML parser context
1981
 *
1982
 * Pops the top element name from the name stack
1983
 *
1984
 * Returns the name just removed
1985
 */
1986
const xmlChar *
1987
namePop(xmlParserCtxtPtr ctxt)
1988
13.1M
{
1989
13.1M
    const xmlChar *ret;
1990
1991
13.1M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1992
0
        return (NULL);
1993
13.1M
    ctxt->nameNr--;
1994
13.1M
    if (ctxt->nameNr > 0)
1995
7.90M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1996
5.23M
    else
1997
5.23M
        ctxt->name = NULL;
1998
13.1M
    ret = ctxt->nameTab[ctxt->nameNr];
1999
13.1M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2000
13.1M
    return (ret);
2001
13.1M
}
2002
2003
23.0M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2004
23.0M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2005
61.8k
        int *tmp;
2006
2007
61.8k
  ctxt->spaceMax *= 2;
2008
61.8k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2009
61.8k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2010
61.8k
        if (tmp == NULL) {
2011
0
      xmlErrMemory(ctxt, NULL);
2012
0
      ctxt->spaceMax /=2;
2013
0
      return(-1);
2014
0
  }
2015
61.8k
  ctxt->spaceTab = tmp;
2016
61.8k
    }
2017
23.0M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2018
23.0M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2019
23.0M
    return(ctxt->spaceNr++);
2020
23.0M
}
2021
2022
18.9M
static int spacePop(xmlParserCtxtPtr ctxt) {
2023
18.9M
    int ret;
2024
18.9M
    if (ctxt->spaceNr <= 0) return(0);
2025
18.9M
    ctxt->spaceNr--;
2026
18.9M
    if (ctxt->spaceNr > 0)
2027
18.9M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2028
16.0k
    else
2029
16.0k
        ctxt->space = &ctxt->spaceTab[0];
2030
18.9M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2031
18.9M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2032
18.9M
    return(ret);
2033
18.9M
}
2034
2035
/*
2036
 * Macros for accessing the content. Those should be used only by the parser,
2037
 * and not exported.
2038
 *
2039
 * Dirty macros, i.e. one often need to make assumption on the context to
2040
 * use them
2041
 *
2042
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2043
 *           To be used with extreme caution since operations consuming
2044
 *           characters may move the input buffer to a different location !
2045
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2046
 *           This should be used internally by the parser
2047
 *           only to compare to ASCII values otherwise it would break when
2048
 *           running with UTF-8 encoding.
2049
 *   RAW     same as CUR but in the input buffer, bypass any token
2050
 *           extraction that may have been done
2051
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2052
 *           to compare on ASCII based substring.
2053
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2054
 *           strings without newlines within the parser.
2055
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2056
 *           defined char within the parser.
2057
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2058
 *
2059
 *   NEXT    Skip to the next character, this does the proper decoding
2060
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2061
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2062
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2063
 *           to the number of xmlChars used for the encoding [0-5].
2064
 *   CUR_SCHAR  same but operate on a string instead of the context
2065
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2066
 *            the index
2067
 *   GROW, SHRINK  handling of input buffers
2068
 */
2069
2070
403M
#define RAW (*ctxt->input->cur)
2071
228M
#define CUR (*ctxt->input->cur)
2072
256M
#define NXT(val) ctxt->input->cur[(val)]
2073
18.7M
#define CUR_PTR ctxt->input->cur
2074
605k
#define BASE_PTR ctxt->input->base
2075
2076
#define CMP4( s, c1, c2, c3, c4 ) \
2077
257M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2078
128M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2079
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2080
238M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2081
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2082
213M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2083
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2084
201M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2085
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2086
181M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2087
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2088
82.8M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2089
82.8M
    ((unsigned char *) s)[ 8 ] == c9 )
2090
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2091
159k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2092
159k
    ((unsigned char *) s)[ 9 ] == c10 )
2093
2094
46.6M
#define SKIP(val) do {             \
2095
46.6M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2096
46.6M
    if (*ctxt->input->cur == 0)           \
2097
46.6M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2098
46.6M
  } while (0)
2099
2100
68.3k
#define SKIPL(val) do {             \
2101
68.3k
    int skipl;                \
2102
6.68M
    for(skipl=0; skipl<val; skipl++) {         \
2103
6.61M
  if (*(ctxt->input->cur) == '\n') {       \
2104
111k
  ctxt->input->line++; ctxt->input->col = 1;      \
2105
6.50M
  } else ctxt->input->col++;         \
2106
6.61M
  ctxt->input->cur++;           \
2107
6.61M
    }                  \
2108
68.3k
    if (*ctxt->input->cur == 0)           \
2109
68.3k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2110
68.3k
  } while (0)
2111
2112
159M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2113
159M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2114
159M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2115
159M
  xmlSHRINK (ctxt);
2116
2117
1.55M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2118
1.55M
    xmlParserInputShrink(ctxt->input);
2119
1.55M
    if (*ctxt->input->cur == 0)
2120
127k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2121
1.55M
}
2122
2123
819M
#define GROW if ((ctxt->progressive == 0) &&       \
2124
819M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2125
819M
  xmlGROW (ctxt);
2126
2127
137M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2128
137M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2129
137M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2130
2131
137M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2132
137M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2133
137M
         ((ctxt->input->buf) &&
2134
0
          (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2135
137M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2136
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2137
0
        xmlHaltParser(ctxt);
2138
0
  return;
2139
0
    }
2140
137M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2141
137M
    if ((ctxt->input->cur > ctxt->input->end) ||
2142
137M
        (ctxt->input->cur < ctxt->input->base)) {
2143
0
        xmlHaltParser(ctxt);
2144
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2145
0
  return;
2146
0
    }
2147
137M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2148
4.21M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2149
137M
}
2150
2151
141M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2152
2153
281M
#define NEXT xmlNextChar(ctxt)
2154
2155
36.7M
#define NEXT1 {               \
2156
36.7M
  ctxt->input->col++;           \
2157
36.7M
  ctxt->input->cur++;           \
2158
36.7M
  if (*ctxt->input->cur == 0)         \
2159
36.7M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2160
36.7M
    }
2161
2162
994M
#define NEXTL(l) do {             \
2163
994M
    if (*(ctxt->input->cur) == '\n') {         \
2164
33.4M
  ctxt->input->line++; ctxt->input->col = 1;      \
2165
961M
    } else ctxt->input->col++;           \
2166
994M
    ctxt->input->cur += l;        \
2167
994M
  } while (0)
2168
2169
1.02G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2170
719M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2171
2172
#define COPY_BUF(l,b,i,v)           \
2173
1.62G
    if (l == 1) b[i++] = v;           \
2174
1.62G
    else i += xmlCopyCharMultiByte(&b[i],v)
2175
2176
#define CUR_CONSUMED \
2177
187M
    (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2178
2179
/**
2180
 * xmlSkipBlankChars:
2181
 * @ctxt:  the XML parser context
2182
 *
2183
 * skip all blanks character found at that point in the input streams.
2184
 * It pops up finished entities in the process if allowable at that point.
2185
 *
2186
 * Returns the number of space chars skipped
2187
 */
2188
2189
int
2190
141M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2191
141M
    int res = 0;
2192
2193
    /*
2194
     * It's Okay to use CUR/NEXT here since all the blanks are on
2195
     * the ASCII range.
2196
     */
2197
141M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2198
141M
        (ctxt->instate == XML_PARSER_START)) {
2199
54.3M
  const xmlChar *cur;
2200
  /*
2201
   * if we are in the document content, go really fast
2202
   */
2203
54.3M
  cur = ctxt->input->cur;
2204
54.3M
  while (IS_BLANK_CH(*cur)) {
2205
26.4M
      if (*cur == '\n') {
2206
1.31M
    ctxt->input->line++; ctxt->input->col = 1;
2207
25.0M
      } else {
2208
25.0M
    ctxt->input->col++;
2209
25.0M
      }
2210
26.4M
      cur++;
2211
26.4M
      if (res < INT_MAX)
2212
26.4M
    res++;
2213
26.4M
      if (*cur == 0) {
2214
129k
    ctxt->input->cur = cur;
2215
129k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2216
129k
    cur = ctxt->input->cur;
2217
129k
      }
2218
26.4M
  }
2219
54.3M
  ctxt->input->cur = cur;
2220
86.8M
    } else {
2221
86.8M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2222
2223
265M
  while (1) {
2224
265M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2225
172M
    NEXT;
2226
172M
      } else if (CUR == '%') {
2227
                /*
2228
                 * Need to handle support of entities branching here
2229
                 */
2230
8.88M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2231
4.86M
                    break;
2232
4.02M
          xmlParsePEReference(ctxt);
2233
84.2M
            } else if (CUR == 0) {
2234
2.30M
                if (ctxt->inputNr <= 1)
2235
25.6k
                    break;
2236
2.28M
                xmlPopInput(ctxt);
2237
81.9M
            } else {
2238
81.9M
                break;
2239
81.9M
            }
2240
2241
            /*
2242
             * Also increase the counter when entering or exiting a PERef.
2243
             * The spec says: "When a parameter-entity reference is recognized
2244
             * in the DTD and included, its replacement text MUST be enlarged
2245
             * by the attachment of one leading and one following space (#x20)
2246
             * character."
2247
             */
2248
178M
      if (res < INT_MAX)
2249
178M
    res++;
2250
178M
        }
2251
86.8M
    }
2252
141M
    return(res);
2253
141M
}
2254
2255
/************************************************************************
2256
 *                  *
2257
 *    Commodity functions to handle entities      *
2258
 *                  *
2259
 ************************************************************************/
2260
2261
/**
2262
 * xmlPopInput:
2263
 * @ctxt:  an XML parser context
2264
 *
2265
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2266
 *          pop it and return the next char.
2267
 *
2268
 * Returns the current xmlChar in the parser context
2269
 */
2270
xmlChar
2271
2.39M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2272
2.39M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2273
2.39M
    if (xmlParserDebugEntities)
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Popping input %d\n", ctxt->inputNr);
2276
2.39M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2277
2.39M
        (ctxt->instate != XML_PARSER_EOF))
2278
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2279
0
                    "Unfinished entity outside the DTD");
2280
2.39M
    xmlFreeInputStream(inputPop(ctxt));
2281
2.39M
    if (*ctxt->input->cur == 0)
2282
755
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2283
2.39M
    return(CUR);
2284
2.39M
}
2285
2286
/**
2287
 * xmlPushInput:
2288
 * @ctxt:  an XML parser context
2289
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2290
 *
2291
 * xmlPushInput: switch to a new input stream which is stacked on top
2292
 *               of the previous one(s).
2293
 * Returns -1 in case of error or the index in the input stack
2294
 */
2295
int
2296
3.71M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2297
3.71M
    int ret;
2298
3.71M
    if (input == NULL) return(-1);
2299
2300
3.69M
    if (xmlParserDebugEntities) {
2301
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2302
0
      xmlGenericError(xmlGenericErrorContext,
2303
0
        "%s(%d): ", ctxt->input->filename,
2304
0
        ctxt->input->line);
2305
0
  xmlGenericError(xmlGenericErrorContext,
2306
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2307
0
    }
2308
3.69M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2309
3.69M
        (ctxt->inputNr > 1024)) {
2310
5.19k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2311
1.27M
        while (ctxt->inputNr > 1)
2312
1.27M
            xmlFreeInputStream(inputPop(ctxt));
2313
5.19k
  return(-1);
2314
5.19k
    }
2315
3.69M
    ret = inputPush(ctxt, input);
2316
3.69M
    if (ctxt->instate == XML_PARSER_EOF)
2317
0
        return(-1);
2318
3.69M
    GROW;
2319
3.69M
    return(ret);
2320
3.69M
}
2321
2322
/**
2323
 * xmlParseCharRef:
2324
 * @ctxt:  an XML parser context
2325
 *
2326
 * DEPRECATED: Internal function, don't use.
2327
 *
2328
 * parse Reference declarations
2329
 *
2330
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2331
 *                  '&#x' [0-9a-fA-F]+ ';'
2332
 *
2333
 * [ WFC: Legal Character ]
2334
 * Characters referred to using character references must match the
2335
 * production for Char.
2336
 *
2337
 * Returns the value parsed (as an int), 0 in case of error
2338
 */
2339
int
2340
1.62M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2341
1.62M
    int val = 0;
2342
1.62M
    int count = 0;
2343
2344
    /*
2345
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2346
     */
2347
1.62M
    if ((RAW == '&') && (NXT(1) == '#') &&
2348
1.62M
        (NXT(2) == 'x')) {
2349
407k
  SKIP(3);
2350
407k
  GROW;
2351
1.29M
  while (RAW != ';') { /* loop blocked by count */
2352
939k
      if (count++ > 20) {
2353
29.7k
    count = 0;
2354
29.7k
    GROW;
2355
29.7k
                if (ctxt->instate == XML_PARSER_EOF)
2356
0
                    return(0);
2357
29.7k
      }
2358
939k
      if ((RAW >= '0') && (RAW <= '9'))
2359
616k
          val = val * 16 + (CUR - '0');
2360
322k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2361
216k
          val = val * 16 + (CUR - 'a') + 10;
2362
106k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2363
59.2k
          val = val * 16 + (CUR - 'A') + 10;
2364
47.0k
      else {
2365
47.0k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2366
47.0k
    val = 0;
2367
47.0k
    break;
2368
47.0k
      }
2369
892k
      if (val > 0x110000)
2370
352k
          val = 0x110000;
2371
2372
892k
      NEXT;
2373
892k
      count++;
2374
892k
  }
2375
407k
  if (RAW == ';') {
2376
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2377
360k
      ctxt->input->col++;
2378
360k
      ctxt->input->cur++;
2379
360k
  }
2380
1.21M
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2381
1.21M
  SKIP(2);
2382
1.21M
  GROW;
2383
4.55M
  while (RAW != ';') { /* loop blocked by count */
2384
3.52M
      if (count++ > 20) {
2385
32.6k
    count = 0;
2386
32.6k
    GROW;
2387
32.6k
                if (ctxt->instate == XML_PARSER_EOF)
2388
0
                    return(0);
2389
32.6k
      }
2390
3.52M
      if ((RAW >= '0') && (RAW <= '9'))
2391
3.34M
          val = val * 10 + (CUR - '0');
2392
184k
      else {
2393
184k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2394
184k
    val = 0;
2395
184k
    break;
2396
184k
      }
2397
3.34M
      if (val > 0x110000)
2398
347k
          val = 0x110000;
2399
2400
3.34M
      NEXT;
2401
3.34M
      count++;
2402
3.34M
  }
2403
1.21M
  if (RAW == ';') {
2404
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2405
1.03M
      ctxt->input->col++;
2406
1.03M
      ctxt->input->cur++;
2407
1.03M
  }
2408
1.21M
    } else {
2409
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2410
0
    }
2411
2412
    /*
2413
     * [ WFC: Legal Character ]
2414
     * Characters referred to using character references must match the
2415
     * production for Char.
2416
     */
2417
1.62M
    if (val >= 0x110000) {
2418
7.33k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2419
7.33k
                "xmlParseCharRef: character reference out of bounds\n",
2420
7.33k
          val);
2421
1.61M
    } else if (IS_CHAR(val)) {
2422
1.36M
        return(val);
2423
1.36M
    } else {
2424
249k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2425
249k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2426
249k
                    val);
2427
249k
    }
2428
256k
    return(0);
2429
1.62M
}
2430
2431
/**
2432
 * xmlParseStringCharRef:
2433
 * @ctxt:  an XML parser context
2434
 * @str:  a pointer to an index in the string
2435
 *
2436
 * parse Reference declarations, variant parsing from a string rather
2437
 * than an an input flow.
2438
 *
2439
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2440
 *                  '&#x' [0-9a-fA-F]+ ';'
2441
 *
2442
 * [ WFC: Legal Character ]
2443
 * Characters referred to using character references must match the
2444
 * production for Char.
2445
 *
2446
 * Returns the value parsed (as an int), 0 in case of error, str will be
2447
 *         updated to the current value of the index
2448
 */
2449
static int
2450
672k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2451
672k
    const xmlChar *ptr;
2452
672k
    xmlChar cur;
2453
672k
    int val = 0;
2454
2455
672k
    if ((str == NULL) || (*str == NULL)) return(0);
2456
672k
    ptr = *str;
2457
672k
    cur = *ptr;
2458
672k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2459
288k
  ptr += 3;
2460
288k
  cur = *ptr;
2461
650k
  while (cur != ';') { /* Non input consuming loop */
2462
364k
      if ((cur >= '0') && (cur <= '9'))
2463
110k
          val = val * 16 + (cur - '0');
2464
254k
      else if ((cur >= 'a') && (cur <= 'f'))
2465
171k
          val = val * 16 + (cur - 'a') + 10;
2466
83.1k
      else if ((cur >= 'A') && (cur <= 'F'))
2467
80.9k
          val = val * 16 + (cur - 'A') + 10;
2468
2.23k
      else {
2469
2.23k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2470
2.23k
    val = 0;
2471
2.23k
    break;
2472
2.23k
      }
2473
362k
      if (val > 0x110000)
2474
25.3k
          val = 0x110000;
2475
2476
362k
      ptr++;
2477
362k
      cur = *ptr;
2478
362k
  }
2479
288k
  if (cur == ';')
2480
285k
      ptr++;
2481
384k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2482
384k
  ptr += 2;
2483
384k
  cur = *ptr;
2484
1.28M
  while (cur != ';') { /* Non input consuming loops */
2485
905k
      if ((cur >= '0') && (cur <= '9'))
2486
903k
          val = val * 10 + (cur - '0');
2487
1.50k
      else {
2488
1.50k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2489
1.50k
    val = 0;
2490
1.50k
    break;
2491
1.50k
      }
2492
903k
      if (val > 0x110000)
2493
27.6k
          val = 0x110000;
2494
2495
903k
      ptr++;
2496
903k
      cur = *ptr;
2497
903k
  }
2498
384k
  if (cur == ';')
2499
382k
      ptr++;
2500
384k
    } else {
2501
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2502
0
  return(0);
2503
0
    }
2504
672k
    *str = ptr;
2505
2506
    /*
2507
     * [ WFC: Legal Character ]
2508
     * Characters referred to using character references must match the
2509
     * production for Char.
2510
     */
2511
672k
    if (val >= 0x110000) {
2512
431
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2513
431
                "xmlParseStringCharRef: character reference out of bounds\n",
2514
431
                val);
2515
672k
    } else if (IS_CHAR(val)) {
2516
667k
        return(val);
2517
667k
    } else {
2518
4.74k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2519
4.74k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2520
4.74k
        val);
2521
4.74k
    }
2522
5.17k
    return(0);
2523
672k
}
2524
2525
/**
2526
 * xmlParserHandlePEReference:
2527
 * @ctxt:  the parser context
2528
 *
2529
 * [69] PEReference ::= '%' Name ';'
2530
 *
2531
 * [ WFC: No Recursion ]
2532
 * A parsed entity must not contain a recursive
2533
 * reference to itself, either directly or indirectly.
2534
 *
2535
 * [ WFC: Entity Declared ]
2536
 * In a document without any DTD, a document with only an internal DTD
2537
 * subset which contains no parameter entity references, or a document
2538
 * with "standalone='yes'", ...  ... The declaration of a parameter
2539
 * entity must precede any reference to it...
2540
 *
2541
 * [ VC: Entity Declared ]
2542
 * In a document with an external subset or external parameter entities
2543
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2544
 * must precede any reference to it...
2545
 *
2546
 * [ WFC: In DTD ]
2547
 * Parameter-entity references may only appear in the DTD.
2548
 * NOTE: misleading but this is handled.
2549
 *
2550
 * A PEReference may have been detected in the current input stream
2551
 * the handling is done accordingly to
2552
 *      http://www.w3.org/TR/REC-xml#entproc
2553
 * i.e.
2554
 *   - Included in literal in entity values
2555
 *   - Included as Parameter Entity reference within DTDs
2556
 */
2557
void
2558
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2559
0
    switch(ctxt->instate) {
2560
0
  case XML_PARSER_CDATA_SECTION:
2561
0
      return;
2562
0
        case XML_PARSER_COMMENT:
2563
0
      return;
2564
0
  case XML_PARSER_START_TAG:
2565
0
      return;
2566
0
  case XML_PARSER_END_TAG:
2567
0
      return;
2568
0
        case XML_PARSER_EOF:
2569
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2570
0
      return;
2571
0
        case XML_PARSER_PROLOG:
2572
0
  case XML_PARSER_START:
2573
0
  case XML_PARSER_MISC:
2574
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2575
0
      return;
2576
0
  case XML_PARSER_ENTITY_DECL:
2577
0
        case XML_PARSER_CONTENT:
2578
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2579
0
        case XML_PARSER_PI:
2580
0
  case XML_PARSER_SYSTEM_LITERAL:
2581
0
  case XML_PARSER_PUBLIC_LITERAL:
2582
      /* we just ignore it there */
2583
0
      return;
2584
0
        case XML_PARSER_EPILOG:
2585
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2586
0
      return;
2587
0
  case XML_PARSER_ENTITY_VALUE:
2588
      /*
2589
       * NOTE: in the case of entity values, we don't do the
2590
       *       substitution here since we need the literal
2591
       *       entity value to be able to save the internal
2592
       *       subset of the document.
2593
       *       This will be handled by xmlStringDecodeEntities
2594
       */
2595
0
      return;
2596
0
        case XML_PARSER_DTD:
2597
      /*
2598
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2599
       * In the internal DTD subset, parameter-entity references
2600
       * can occur only where markup declarations can occur, not
2601
       * within markup declarations.
2602
       * In that case this is handled in xmlParseMarkupDecl
2603
       */
2604
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2605
0
    return;
2606
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2607
0
    return;
2608
0
            break;
2609
0
        case XML_PARSER_IGNORE:
2610
0
            return;
2611
0
    }
2612
2613
0
    xmlParsePEReference(ctxt);
2614
0
}
2615
2616
/*
2617
 * Macro used to grow the current buffer.
2618
 * buffer##_size is expected to be a size_t
2619
 * mem_error: is expected to handle memory allocation failures
2620
 */
2621
948k
#define growBuffer(buffer, n) {           \
2622
948k
    xmlChar *tmp;             \
2623
948k
    size_t new_size = buffer##_size * 2 + n;                            \
2624
948k
    if (new_size < buffer##_size) goto mem_error;                       \
2625
948k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2626
948k
    if (tmp == NULL) goto mem_error;         \
2627
948k
    buffer = tmp;             \
2628
948k
    buffer##_size = new_size;                                           \
2629
948k
}
2630
2631
/**
2632
 * xmlStringLenDecodeEntities:
2633
 * @ctxt:  the parser context
2634
 * @str:  the input string
2635
 * @len: the string length
2636
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637
 * @end:  an end marker xmlChar, 0 if none
2638
 * @end2:  an end marker xmlChar, 0 if none
2639
 * @end3:  an end marker xmlChar, 0 if none
2640
 *
2641
 * Takes a entity string content and process to do the adequate substitutions.
2642
 *
2643
 * [67] Reference ::= EntityRef | CharRef
2644
 *
2645
 * [69] PEReference ::= '%' Name ';'
2646
 *
2647
 * Returns A newly allocated string with the substitution done. The caller
2648
 *      must deallocate it !
2649
 */
2650
xmlChar *
2651
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2652
9.57M
          int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2653
9.57M
    xmlChar *buffer = NULL;
2654
9.57M
    size_t buffer_size = 0;
2655
9.57M
    size_t nbchars = 0;
2656
2657
9.57M
    xmlChar *current = NULL;
2658
9.57M
    xmlChar *rep = NULL;
2659
9.57M
    const xmlChar *last;
2660
9.57M
    xmlEntityPtr ent;
2661
9.57M
    int c,l;
2662
2663
9.57M
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2664
0
  return(NULL);
2665
9.57M
    last = str + len;
2666
2667
9.57M
    if (((ctxt->depth > 40) &&
2668
9.57M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2669
9.57M
  (ctxt->depth > 1024)) {
2670
6.84k
  xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2671
6.84k
  return(NULL);
2672
6.84k
    }
2673
2674
    /*
2675
     * allocate a translation buffer.
2676
     */
2677
9.57M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2678
9.57M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2679
9.57M
    if (buffer == NULL) goto mem_error;
2680
2681
    /*
2682
     * OK loop until we reach one of the ending char or a size limit.
2683
     * we are operating on already parsed values.
2684
     */
2685
9.57M
    if (str < last)
2686
9.56M
  c = CUR_SCHAR(str, l);
2687
8.48k
    else
2688
8.48k
        c = 0;
2689
683M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2690
683M
           (c != end2) && (c != end3) &&
2691
683M
           (ctxt->instate != XML_PARSER_EOF)) {
2692
2693
674M
  if (c == 0) break;
2694
674M
        if ((c == '&') && (str[1] == '#')) {
2695
672k
      int val = xmlParseStringCharRef(ctxt, &str);
2696
672k
      if (val == 0)
2697
5.17k
                goto int_error;
2698
667k
      COPY_BUF(0,buffer,nbchars,val);
2699
667k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2700
44.5k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2701
44.5k
      }
2702
674M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2703
1.12M
      if (xmlParserDebugEntities)
2704
0
    xmlGenericError(xmlGenericErrorContext,
2705
0
      "String decoding Entity Reference: %.30s\n",
2706
0
      str);
2707
1.12M
      ent = xmlParseStringEntityRef(ctxt, &str);
2708
1.12M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2709
1.12M
      if (ent != NULL)
2710
959k
          ctxt->nbentities += ent->checked / 2;
2711
1.12M
      if ((ent != NULL) &&
2712
1.12M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2713
297k
    if (ent->content != NULL) {
2714
297k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2715
297k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2716
10.9k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2717
10.9k
        }
2718
297k
    } else {
2719
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2720
0
          "predefined entity has no content\n");
2721
0
                    goto int_error;
2722
0
    }
2723
829k
      } else if ((ent != NULL) && (ent->content != NULL)) {
2724
650k
    ctxt->depth++;
2725
650k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2726
650k
                            0, 0, 0);
2727
650k
    ctxt->depth--;
2728
650k
    if (rep == NULL) {
2729
464k
                    ent->content[0] = 0;
2730
464k
                    goto int_error;
2731
464k
                }
2732
2733
185k
                current = rep;
2734
5.27M
                while (*current != 0) { /* non input consuming loop */
2735
5.08M
                    buffer[nbchars++] = *current++;
2736
5.08M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2737
13.3k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2738
7
                            goto int_error;
2739
39.9k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2740
39.9k
                    }
2741
5.08M
                }
2742
185k
                xmlFree(rep);
2743
185k
                rep = NULL;
2744
185k
      } else if (ent != NULL) {
2745
11.5k
    int i = xmlStrlen(ent->name);
2746
11.5k
    const xmlChar *cur = ent->name;
2747
2748
11.5k
    buffer[nbchars++] = '&';
2749
11.5k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2750
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2751
0
    }
2752
37.2k
    for (;i > 0;i--)
2753
25.7k
        buffer[nbchars++] = *cur++;
2754
11.5k
    buffer[nbchars++] = ';';
2755
11.5k
      }
2756
673M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2757
2.55M
      if (xmlParserDebugEntities)
2758
0
    xmlGenericError(xmlGenericErrorContext,
2759
0
      "String decoding PE Reference: %.30s\n", str);
2760
2.55M
      ent = xmlParseStringPEReference(ctxt, &str);
2761
2.55M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2762
2.55M
      if (ent != NULL)
2763
810k
          ctxt->nbentities += ent->checked / 2;
2764
2.55M
      if (ent != NULL) {
2765
810k
                if (ent->content == NULL) {
2766
        /*
2767
         * Note: external parsed entities will not be loaded,
2768
         * it is not required for a non-validating parser to
2769
         * complete external PEReferences coming from the
2770
         * internal subset
2771
         */
2772
17.3k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2773
17.3k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2774
17.3k
      (ctxt->validate != 0)) {
2775
13.3k
      xmlLoadEntityContent(ctxt, ent);
2776
13.3k
        } else {
2777
4.03k
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2778
4.03k
      "not validating will not read content for PE entity %s\n",
2779
4.03k
                          ent->name, NULL);
2780
4.03k
        }
2781
17.3k
    }
2782
810k
    ctxt->depth++;
2783
810k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2784
810k
                            0, 0, 0);
2785
810k
    ctxt->depth--;
2786
810k
    if (rep == NULL) {
2787
450k
                    if (ent->content != NULL)
2788
435k
                        ent->content[0] = 0;
2789
450k
                    goto int_error;
2790
450k
                }
2791
360k
                current = rep;
2792
19.3M
                while (*current != 0) { /* non input consuming loop */
2793
18.9M
                    buffer[nbchars++] = *current++;
2794
18.9M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2795
5.52k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2796
395
                            goto int_error;
2797
15.3k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798
15.3k
                    }
2799
18.9M
                }
2800
359k
                xmlFree(rep);
2801
359k
                rep = NULL;
2802
359k
      }
2803
670M
  } else {
2804
670M
      COPY_BUF(l,buffer,nbchars,c);
2805
670M
      str += l;
2806
670M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2807
1.24M
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2808
1.24M
      }
2809
670M
  }
2810
674M
  if (str < last)
2811
665M
      c = CUR_SCHAR(str, l);
2812
8.64M
  else
2813
8.64M
      c = 0;
2814
674M
    }
2815
8.65M
    buffer[nbchars] = 0;
2816
8.65M
    return(buffer);
2817
2818
0
mem_error:
2819
0
    xmlErrMemory(ctxt, NULL);
2820
920k
int_error:
2821
920k
    if (rep != NULL)
2822
402
        xmlFree(rep);
2823
920k
    if (buffer != NULL)
2824
920k
        xmlFree(buffer);
2825
920k
    return(NULL);
2826
0
}
2827
2828
/**
2829
 * xmlStringDecodeEntities:
2830
 * @ctxt:  the parser context
2831
 * @str:  the input string
2832
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2833
 * @end:  an end marker xmlChar, 0 if none
2834
 * @end2:  an end marker xmlChar, 0 if none
2835
 * @end3:  an end marker xmlChar, 0 if none
2836
 *
2837
 * Takes a entity string content and process to do the adequate substitutions.
2838
 *
2839
 * [67] Reference ::= EntityRef | CharRef
2840
 *
2841
 * [69] PEReference ::= '%' Name ';'
2842
 *
2843
 * Returns A newly allocated string with the substitution done. The caller
2844
 *      must deallocate it !
2845
 */
2846
xmlChar *
2847
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2848
9.55M
            xmlChar end, xmlChar  end2, xmlChar end3) {
2849
9.55M
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2850
9.53M
    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2851
9.53M
           end, end2, end3));
2852
9.55M
}
2853
2854
/************************************************************************
2855
 *                  *
2856
 *    Commodity functions, cleanup needed ?     *
2857
 *                  *
2858
 ************************************************************************/
2859
2860
/**
2861
 * areBlanks:
2862
 * @ctxt:  an XML parser context
2863
 * @str:  a xmlChar *
2864
 * @len:  the size of @str
2865
 * @blank_chars: we know the chars are blanks
2866
 *
2867
 * Is this a sequence of blank chars that one can ignore ?
2868
 *
2869
 * Returns 1 if ignorable 0 otherwise.
2870
 */
2871
2872
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2873
13.2M
                     int blank_chars) {
2874
13.2M
    int i, ret;
2875
13.2M
    xmlNodePtr lastChild;
2876
2877
    /*
2878
     * Don't spend time trying to differentiate them, the same callback is
2879
     * used !
2880
     */
2881
13.2M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2882
191k
  return(0);
2883
2884
    /*
2885
     * Check for xml:space value.
2886
     */
2887
13.0M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2888
13.0M
        (*(ctxt->space) == -2))
2889
7.77M
  return(0);
2890
2891
    /*
2892
     * Check that the string is made of blanks
2893
     */
2894
5.27M
    if (blank_chars == 0) {
2895
7.21M
  for (i = 0;i < len;i++)
2896
6.52M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2897
1.91M
    }
2898
2899
    /*
2900
     * Look if the element is mixed content in the DTD if available
2901
     */
2902
4.04M
    if (ctxt->node == NULL) return(0);
2903
3.80M
    if (ctxt->myDoc != NULL) {
2904
3.80M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2905
3.80M
        if (ret == 0) return(1);
2906
3.72M
        if (ret == 1) return(0);
2907
3.72M
    }
2908
2909
    /*
2910
     * Otherwise, heuristic :-\
2911
     */
2912
3.72M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2913
3.12M
    if ((ctxt->node->children == NULL) &&
2914
3.12M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2915
2916
3.11M
    lastChild = xmlGetLastChild(ctxt->node);
2917
3.11M
    if (lastChild == NULL) {
2918
1.52M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2919
1.52M
            (ctxt->node->content != NULL)) return(0);
2920
1.59M
    } else if (xmlNodeIsText(lastChild))
2921
922k
        return(0);
2922
674k
    else if ((ctxt->node->children != NULL) &&
2923
674k
             (xmlNodeIsText(ctxt->node->children)))
2924
28.7k
        return(0);
2925
2.16M
    return(1);
2926
3.11M
}
2927
2928
/************************************************************************
2929
 *                  *
2930
 *    Extra stuff for namespace support     *
2931
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2932
 *                  *
2933
 ************************************************************************/
2934
2935
/**
2936
 * xmlSplitQName:
2937
 * @ctxt:  an XML parser context
2938
 * @name:  an XML parser context
2939
 * @prefix:  a xmlChar **
2940
 *
2941
 * parse an UTF8 encoded XML qualified name string
2942
 *
2943
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2944
 *
2945
 * [NS 6] Prefix ::= NCName
2946
 *
2947
 * [NS 7] LocalPart ::= NCName
2948
 *
2949
 * Returns the local part, and prefix is updated
2950
 *   to get the Prefix if any.
2951
 */
2952
2953
xmlChar *
2954
14.0M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2955
14.0M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2956
14.0M
    xmlChar *buffer = NULL;
2957
14.0M
    int len = 0;
2958
14.0M
    int max = XML_MAX_NAMELEN;
2959
14.0M
    xmlChar *ret = NULL;
2960
14.0M
    const xmlChar *cur = name;
2961
14.0M
    int c;
2962
2963
14.0M
    if (prefix == NULL) return(NULL);
2964
14.0M
    *prefix = NULL;
2965
2966
14.0M
    if (cur == NULL) return(NULL);
2967
2968
#ifndef XML_XML_NAMESPACE
2969
    /* xml: prefix is not really a namespace */
2970
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2971
        (cur[2] == 'l') && (cur[3] == ':'))
2972
  return(xmlStrdup(name));
2973
#endif
2974
2975
    /* nasty but well=formed */
2976
14.0M
    if (cur[0] == ':')
2977
38.0k
  return(xmlStrdup(name));
2978
2979
14.0M
    c = *cur++;
2980
94.4M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2981
80.4M
  buf[len++] = c;
2982
80.4M
  c = *cur++;
2983
80.4M
    }
2984
14.0M
    if (len >= max) {
2985
  /*
2986
   * Okay someone managed to make a huge name, so he's ready to pay
2987
   * for the processing speed.
2988
   */
2989
211k
  max = len * 2;
2990
2991
211k
  buffer = (xmlChar *) xmlMallocAtomic(max);
2992
211k
  if (buffer == NULL) {
2993
0
      xmlErrMemory(ctxt, NULL);
2994
0
      return(NULL);
2995
0
  }
2996
211k
  memcpy(buffer, buf, len);
2997
86.7M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2998
86.5M
      if (len + 10 > max) {
2999
332k
          xmlChar *tmp;
3000
3001
332k
    max *= 2;
3002
332k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3003
332k
    if (tmp == NULL) {
3004
0
        xmlFree(buffer);
3005
0
        xmlErrMemory(ctxt, NULL);
3006
0
        return(NULL);
3007
0
    }
3008
332k
    buffer = tmp;
3009
332k
      }
3010
86.5M
      buffer[len++] = c;
3011
86.5M
      c = *cur++;
3012
86.5M
  }
3013
211k
  buffer[len] = 0;
3014
211k
    }
3015
3016
14.0M
    if ((c == ':') && (*cur == 0)) {
3017
50.9k
        if (buffer != NULL)
3018
153
      xmlFree(buffer);
3019
50.9k
  *prefix = NULL;
3020
50.9k
  return(xmlStrdup(name));
3021
50.9k
    }
3022
3023
13.9M
    if (buffer == NULL)
3024
13.7M
  ret = xmlStrndup(buf, len);
3025
211k
    else {
3026
211k
  ret = buffer;
3027
211k
  buffer = NULL;
3028
211k
  max = XML_MAX_NAMELEN;
3029
211k
    }
3030
3031
3032
13.9M
    if (c == ':') {
3033
6.26M
  c = *cur;
3034
6.26M
        *prefix = ret;
3035
6.26M
  if (c == 0) {
3036
0
      return(xmlStrndup(BAD_CAST "", 0));
3037
0
  }
3038
6.26M
  len = 0;
3039
3040
  /*
3041
   * Check that the first character is proper to start
3042
   * a new name
3043
   */
3044
6.26M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3045
6.26M
        ((c >= 0x41) && (c <= 0x5A)) ||
3046
6.26M
        (c == '_') || (c == ':'))) {
3047
8.33k
      int l;
3048
8.33k
      int first = CUR_SCHAR(cur, l);
3049
3050
8.33k
      if (!IS_LETTER(first) && (first != '_')) {
3051
5.28k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3052
5.28k
          "Name %s is not XML Namespace compliant\n",
3053
5.28k
          name);
3054
5.28k
      }
3055
8.33k
  }
3056
6.26M
  cur++;
3057
3058
28.8M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3059
22.5M
      buf[len++] = c;
3060
22.5M
      c = *cur++;
3061
22.5M
  }
3062
6.26M
  if (len >= max) {
3063
      /*
3064
       * Okay someone managed to make a huge name, so he's ready to pay
3065
       * for the processing speed.
3066
       */
3067
46.7k
      max = len * 2;
3068
3069
46.7k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3070
46.7k
      if (buffer == NULL) {
3071
0
          xmlErrMemory(ctxt, NULL);
3072
0
    return(NULL);
3073
0
      }
3074
46.7k
      memcpy(buffer, buf, len);
3075
5.30M
      while (c != 0) { /* tested bigname2.xml */
3076
5.25M
    if (len + 10 > max) {
3077
2.35k
        xmlChar *tmp;
3078
3079
2.35k
        max *= 2;
3080
2.35k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3081
2.35k
        if (tmp == NULL) {
3082
0
      xmlErrMemory(ctxt, NULL);
3083
0
      xmlFree(buffer);
3084
0
      return(NULL);
3085
0
        }
3086
2.35k
        buffer = tmp;
3087
2.35k
    }
3088
5.25M
    buffer[len++] = c;
3089
5.25M
    c = *cur++;
3090
5.25M
      }
3091
46.7k
      buffer[len] = 0;
3092
46.7k
  }
3093
3094
6.26M
  if (buffer == NULL)
3095
6.21M
      ret = xmlStrndup(buf, len);
3096
46.7k
  else {
3097
46.7k
      ret = buffer;
3098
46.7k
  }
3099
6.26M
    }
3100
3101
13.9M
    return(ret);
3102
13.9M
}
3103
3104
/************************************************************************
3105
 *                  *
3106
 *      The parser itself       *
3107
 *  Relates to http://www.w3.org/TR/REC-xml       *
3108
 *                  *
3109
 ************************************************************************/
3110
3111
/************************************************************************
3112
 *                  *
3113
 *  Routines to parse Name, NCName and NmToken      *
3114
 *                  *
3115
 ************************************************************************/
3116
#ifdef DEBUG
3117
static unsigned long nbParseName = 0;
3118
static unsigned long nbParseNmToken = 0;
3119
static unsigned long nbParseNCName = 0;
3120
static unsigned long nbParseNCNameComplex = 0;
3121
static unsigned long nbParseNameComplex = 0;
3122
static unsigned long nbParseStringName = 0;
3123
#endif
3124
3125
/*
3126
 * The two following functions are related to the change of accepted
3127
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3128
 * They correspond to the modified production [4] and the new production [4a]
3129
 * changes in that revision. Also note that the macros used for the
3130
 * productions Letter, Digit, CombiningChar and Extender are not needed
3131
 * anymore.
3132
 * We still keep compatibility to pre-revision5 parsing semantic if the
3133
 * new XML_PARSE_OLD10 option is given to the parser.
3134
 */
3135
static int
3136
4.83M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3137
4.83M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3138
        /*
3139
   * Use the new checks of production [4] [4a] amd [5] of the
3140
   * Update 5 of XML-1.0
3141
   */
3142
2.72M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3143
2.72M
      (((c >= 'a') && (c <= 'z')) ||
3144
2.51M
       ((c >= 'A') && (c <= 'Z')) ||
3145
2.51M
       (c == '_') || (c == ':') ||
3146
2.51M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3147
2.51M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3148
2.51M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3149
2.51M
       ((c >= 0x370) && (c <= 0x37D)) ||
3150
2.51M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3151
2.51M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3152
2.51M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3153
2.51M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3154
2.51M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3155
2.51M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3156
2.51M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3157
2.51M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3158
2.21M
      return(1);
3159
2.72M
    } else {
3160
2.10M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3161
1.77M
      return(1);
3162
2.10M
    }
3163
841k
    return(0);
3164
4.83M
}
3165
3166
static int
3167
60.2M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3168
60.2M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3169
        /*
3170
   * Use the new checks of production [4] [4a] amd [5] of the
3171
   * Update 5 of XML-1.0
3172
   */
3173
32.9M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3174
32.9M
      (((c >= 'a') && (c <= 'z')) ||
3175
32.5M
       ((c >= 'A') && (c <= 'Z')) ||
3176
32.5M
       ((c >= '0') && (c <= '9')) || /* !start */
3177
32.5M
       (c == '_') || (c == ':') ||
3178
32.5M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3179
32.5M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3180
32.5M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3181
32.5M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3182
32.5M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3183
32.5M
       ((c >= 0x370) && (c <= 0x37D)) ||
3184
32.5M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3185
32.5M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3186
32.5M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3187
32.5M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3188
32.5M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189
32.5M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190
32.5M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191
32.5M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192
32.5M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3193
29.6M
       return(1);
3194
32.9M
    } else {
3195
27.3M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3196
27.3M
            (c == '.') || (c == '-') ||
3197
27.3M
      (c == '_') || (c == ':') ||
3198
27.3M
      (IS_COMBINING(c)) ||
3199
27.3M
      (IS_EXTENDER(c)))
3200
24.5M
      return(1);
3201
27.3M
    }
3202
6.02M
    return(0);
3203
60.2M
}
3204
3205
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3206
                                          int *len, int *alloc, int normalize);
3207
3208
static const xmlChar *
3209
10.5M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3210
10.5M
    int len = 0, l;
3211
10.5M
    int c;
3212
10.5M
    int count = 0;
3213
10.5M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3214
9.84M
                    XML_MAX_TEXT_LENGTH :
3215
10.5M
                    XML_MAX_NAME_LENGTH;
3216
3217
#ifdef DEBUG
3218
    nbParseNameComplex++;
3219
#endif
3220
3221
    /*
3222
     * Handler for more complex cases
3223
     */
3224
10.5M
    GROW;
3225
10.5M
    if (ctxt->instate == XML_PARSER_EOF)
3226
0
        return(NULL);
3227
10.5M
    c = CUR_CHAR(l);
3228
10.5M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3229
        /*
3230
   * Use the new checks of production [4] [4a] amd [5] of the
3231
   * Update 5 of XML-1.0
3232
   */
3233
6.54M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3234
6.54M
      (!(((c >= 'a') && (c <= 'z')) ||
3235
6.30M
         ((c >= 'A') && (c <= 'Z')) ||
3236
6.30M
         (c == '_') || (c == ':') ||
3237
6.30M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3238
6.30M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3239
6.30M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3240
6.30M
         ((c >= 0x370) && (c <= 0x37D)) ||
3241
6.30M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3242
6.30M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3243
6.30M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3244
6.30M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3245
6.30M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3246
6.30M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3247
6.30M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3248
6.30M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3249
4.51M
      return(NULL);
3250
4.51M
  }
3251
2.02M
  len += l;
3252
2.02M
  NEXTL(l);
3253
2.02M
  c = CUR_CHAR(l);
3254
38.3M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3255
38.3M
         (((c >= 'a') && (c <= 'z')) ||
3256
37.1M
          ((c >= 'A') && (c <= 'Z')) ||
3257
37.1M
          ((c >= '0') && (c <= '9')) || /* !start */
3258
37.1M
          (c == '_') || (c == ':') ||
3259
37.1M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3260
37.1M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3261
37.1M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3262
37.1M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3263
37.1M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3264
37.1M
          ((c >= 0x370) && (c <= 0x37D)) ||
3265
37.1M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3266
37.1M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3267
37.1M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3268
37.1M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3269
37.1M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270
37.1M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271
37.1M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272
37.1M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273
37.1M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3274
37.1M
    )) {
3275
36.3M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3276
223k
    count = 0;
3277
223k
    GROW;
3278
223k
                if (ctxt->instate == XML_PARSER_EOF)
3279
0
                    return(NULL);
3280
223k
      }
3281
36.3M
            if (len <= INT_MAX - l)
3282
36.3M
          len += l;
3283
36.3M
      NEXTL(l);
3284
36.3M
      c = CUR_CHAR(l);
3285
36.3M
  }
3286
4.00M
    } else {
3287
4.00M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3288
4.00M
      (!IS_LETTER(c) && (c != '_') &&
3289
3.82M
       (c != ':'))) {
3290
2.96M
      return(NULL);
3291
2.96M
  }
3292
1.04M
  len += l;
3293
1.04M
  NEXTL(l);
3294
1.04M
  c = CUR_CHAR(l);
3295
3296
13.8M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3297
13.8M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298
13.3M
    (c == '.') || (c == '-') ||
3299
13.3M
    (c == '_') || (c == ':') ||
3300
13.3M
    (IS_COMBINING(c)) ||
3301
13.3M
    (IS_EXTENDER(c)))) {
3302
12.8M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3303
52.3k
    count = 0;
3304
52.3k
    GROW;
3305
52.3k
                if (ctxt->instate == XML_PARSER_EOF)
3306
0
                    return(NULL);
3307
52.3k
      }
3308
12.8M
            if (len <= INT_MAX - l)
3309
12.8M
          len += l;
3310
12.8M
      NEXTL(l);
3311
12.8M
      c = CUR_CHAR(l);
3312
12.8M
  }
3313
1.04M
    }
3314
3.06M
    if (len > maxLength) {
3315
3
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316
3
        return(NULL);
3317
3
    }
3318
3.06M
    if (ctxt->input->cur - ctxt->input->base < len) {
3319
        /*
3320
         * There were a couple of bugs where PERefs lead to to a change
3321
         * of the buffer. Check the buffer size to avoid passing an invalid
3322
         * pointer to xmlDictLookup.
3323
         */
3324
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3325
0
                    "unexpected change of input buffer");
3326
0
        return (NULL);
3327
0
    }
3328
3.06M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3329
29.6k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3330
3.03M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3331
3.06M
}
3332
3333
/**
3334
 * xmlParseName:
3335
 * @ctxt:  an XML parser context
3336
 *
3337
 * DEPRECATED: Internal function, don't use.
3338
 *
3339
 * parse an XML name.
3340
 *
3341
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3342
 *                  CombiningChar | Extender
3343
 *
3344
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3345
 *
3346
 * [6] Names ::= Name (#x20 Name)*
3347
 *
3348
 * Returns the Name parsed or NULL
3349
 */
3350
3351
const xmlChar *
3352
70.0M
xmlParseName(xmlParserCtxtPtr ctxt) {
3353
70.0M
    const xmlChar *in;
3354
70.0M
    const xmlChar *ret;
3355
70.0M
    size_t count = 0;
3356
70.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3357
59.6M
                       XML_MAX_TEXT_LENGTH :
3358
70.0M
                       XML_MAX_NAME_LENGTH;
3359
3360
70.0M
    GROW;
3361
3362
#ifdef DEBUG
3363
    nbParseName++;
3364
#endif
3365
3366
    /*
3367
     * Accelerator for simple ASCII names
3368
     */
3369
70.0M
    in = ctxt->input->cur;
3370
70.0M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3371
70.0M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3372
70.0M
  (*in == '_') || (*in == ':')) {
3373
62.2M
  in++;
3374
532M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3375
532M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3376
532M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3377
532M
         (*in == '_') || (*in == '-') ||
3378
532M
         (*in == ':') || (*in == '.'))
3379
470M
      in++;
3380
62.2M
  if ((*in > 0) && (*in < 0x80)) {
3381
59.4M
      count = in - ctxt->input->cur;
3382
59.4M
            if (count > maxLength) {
3383
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384
0
                return(NULL);
3385
0
            }
3386
59.4M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3387
59.4M
      ctxt->input->cur = in;
3388
59.4M
      ctxt->input->col += count;
3389
59.4M
      if (ret == NULL)
3390
0
          xmlErrMemory(ctxt, NULL);
3391
59.4M
      return(ret);
3392
59.4M
  }
3393
62.2M
    }
3394
    /* accelerator for special cases */
3395
10.5M
    return(xmlParseNameComplex(ctxt));
3396
70.0M
}
3397
3398
static const xmlChar *
3399
484k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3400
484k
    int len = 0, l;
3401
484k
    int c;
3402
484k
    int count = 0;
3403
484k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3404
196k
                    XML_MAX_TEXT_LENGTH :
3405
484k
                    XML_MAX_NAME_LENGTH;
3406
484k
    size_t startPosition = 0;
3407
3408
#ifdef DEBUG
3409
    nbParseNCNameComplex++;
3410
#endif
3411
3412
    /*
3413
     * Handler for more complex cases
3414
     */
3415
484k
    GROW;
3416
484k
    startPosition = CUR_PTR - BASE_PTR;
3417
484k
    c = CUR_CHAR(l);
3418
484k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
484k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
363k
  return(NULL);
3421
363k
    }
3422
3423
3.75M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
3.75M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
3.63M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3426
28.3k
      count = 0;
3427
28.3k
      GROW;
3428
28.3k
            if (ctxt->instate == XML_PARSER_EOF)
3429
0
                return(NULL);
3430
28.3k
  }
3431
3.63M
        if (len <= INT_MAX - l)
3432
3.63M
      len += l;
3433
3.63M
  NEXTL(l);
3434
3.63M
  c = CUR_CHAR(l);
3435
3.63M
  if (c == 0) {
3436
14.6k
      count = 0;
3437
      /*
3438
       * when shrinking to extend the buffer we really need to preserve
3439
       * the part of the name we already parsed. Hence rolling back
3440
       * by current length.
3441
       */
3442
14.6k
      ctxt->input->cur -= l;
3443
14.6k
      GROW;
3444
14.6k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
14.6k
      ctxt->input->cur += l;
3447
14.6k
      c = CUR_CHAR(l);
3448
14.6k
  }
3449
3.63M
    }
3450
121k
    if (len > maxLength) {
3451
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3452
0
        return(NULL);
3453
0
    }
3454
121k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3455
121k
}
3456
3457
/**
3458
 * xmlParseNCName:
3459
 * @ctxt:  an XML parser context
3460
 * @len:  length of the string parsed
3461
 *
3462
 * parse an XML name.
3463
 *
3464
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3465
 *                      CombiningChar | Extender
3466
 *
3467
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3468
 *
3469
 * Returns the Name parsed or NULL
3470
 */
3471
3472
static const xmlChar *
3473
5.37M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3474
5.37M
    const xmlChar *in, *e;
3475
5.37M
    const xmlChar *ret;
3476
5.37M
    size_t count = 0;
3477
5.37M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3478
2.23M
                       XML_MAX_TEXT_LENGTH :
3479
5.37M
                       XML_MAX_NAME_LENGTH;
3480
3481
#ifdef DEBUG
3482
    nbParseNCName++;
3483
#endif
3484
3485
    /*
3486
     * Accelerator for simple ASCII names
3487
     */
3488
5.37M
    in = ctxt->input->cur;
3489
5.37M
    e = ctxt->input->end;
3490
5.37M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3491
5.37M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3492
5.37M
   (*in == '_')) && (in < e)) {
3493
4.98M
  in++;
3494
41.9M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3495
41.9M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3496
41.9M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3497
41.9M
          (*in == '_') || (*in == '-') ||
3498
41.9M
          (*in == '.')) && (in < e))
3499
37.0M
      in++;
3500
4.98M
  if (in >= e)
3501
2.82k
      goto complex;
3502
4.98M
  if ((*in > 0) && (*in < 0x80)) {
3503
4.89M
      count = in - ctxt->input->cur;
3504
4.89M
            if (count > maxLength) {
3505
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3506
0
                return(NULL);
3507
0
            }
3508
4.89M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3509
4.89M
      ctxt->input->cur = in;
3510
4.89M
      ctxt->input->col += count;
3511
4.89M
      if (ret == NULL) {
3512
0
          xmlErrMemory(ctxt, NULL);
3513
0
      }
3514
4.89M
      return(ret);
3515
4.89M
  }
3516
4.98M
    }
3517
484k
complex:
3518
484k
    return(xmlParseNCNameComplex(ctxt));
3519
5.37M
}
3520
3521
/**
3522
 * xmlParseNameAndCompare:
3523
 * @ctxt:  an XML parser context
3524
 *
3525
 * parse an XML name and compares for match
3526
 * (specialized for endtag parsing)
3527
 *
3528
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3529
 * and the name for mismatch
3530
 */
3531
3532
static const xmlChar *
3533
5.72M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3534
5.72M
    register const xmlChar *cmp = other;
3535
5.72M
    register const xmlChar *in;
3536
5.72M
    const xmlChar *ret;
3537
3538
5.72M
    GROW;
3539
5.72M
    if (ctxt->instate == XML_PARSER_EOF)
3540
0
        return(NULL);
3541
3542
5.72M
    in = ctxt->input->cur;
3543
33.1M
    while (*in != 0 && *in == *cmp) {
3544
27.4M
  ++in;
3545
27.4M
  ++cmp;
3546
27.4M
    }
3547
5.72M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3548
  /* success */
3549
3.84M
  ctxt->input->col += in - ctxt->input->cur;
3550
3.84M
  ctxt->input->cur = in;
3551
3.84M
  return (const xmlChar*) 1;
3552
3.84M
    }
3553
    /* failure (or end of input buffer), check with full function */
3554
1.88M
    ret = xmlParseName (ctxt);
3555
    /* strings coming from the dictionary direct compare possible */
3556
1.88M
    if (ret == other) {
3557
15.3k
  return (const xmlChar*) 1;
3558
15.3k
    }
3559
1.86M
    return ret;
3560
1.88M
}
3561
3562
/**
3563
 * xmlParseStringName:
3564
 * @ctxt:  an XML parser context
3565
 * @str:  a pointer to the string pointer (IN/OUT)
3566
 *
3567
 * parse an XML name.
3568
 *
3569
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3570
 *                  CombiningChar | Extender
3571
 *
3572
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3573
 *
3574
 * [6] Names ::= Name (#x20 Name)*
3575
 *
3576
 * Returns the Name parsed or NULL. The @str pointer
3577
 * is updated to the current location in the string.
3578
 */
3579
3580
static xmlChar *
3581
4.37M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3582
4.37M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3583
4.37M
    const xmlChar *cur = *str;
3584
4.37M
    int len = 0, l;
3585
4.37M
    int c;
3586
4.37M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3587
3.95M
                    XML_MAX_TEXT_LENGTH :
3588
4.37M
                    XML_MAX_NAME_LENGTH;
3589
3590
#ifdef DEBUG
3591
    nbParseStringName++;
3592
#endif
3593
3594
4.37M
    c = CUR_SCHAR(cur, l);
3595
4.37M
    if (!xmlIsNameStartChar(ctxt, c)) {
3596
505k
  return(NULL);
3597
505k
    }
3598
3599
3.86M
    COPY_BUF(l,buf,len,c);
3600
3.86M
    cur += l;
3601
3.86M
    c = CUR_SCHAR(cur, l);
3602
37.4M
    while (xmlIsNameChar(ctxt, c)) {
3603
33.6M
  COPY_BUF(l,buf,len,c);
3604
33.6M
  cur += l;
3605
33.6M
  c = CUR_SCHAR(cur, l);
3606
33.6M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3607
      /*
3608
       * Okay someone managed to make a huge name, so he's ready to pay
3609
       * for the processing speed.
3610
       */
3611
22.0k
      xmlChar *buffer;
3612
22.0k
      int max = len * 2;
3613
3614
22.0k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3615
22.0k
      if (buffer == NULL) {
3616
0
          xmlErrMemory(ctxt, NULL);
3617
0
    return(NULL);
3618
0
      }
3619
22.0k
      memcpy(buffer, buf, len);
3620
3.00M
      while (xmlIsNameChar(ctxt, c)) {
3621
2.97M
    if (len + 10 > max) {
3622
7.35k
        xmlChar *tmp;
3623
3624
7.35k
        max *= 2;
3625
7.35k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3626
7.35k
        if (tmp == NULL) {
3627
0
      xmlErrMemory(ctxt, NULL);
3628
0
      xmlFree(buffer);
3629
0
      return(NULL);
3630
0
        }
3631
7.35k
        buffer = tmp;
3632
7.35k
    }
3633
2.97M
    COPY_BUF(l,buffer,len,c);
3634
2.97M
    cur += l;
3635
2.97M
    c = CUR_SCHAR(cur, l);
3636
2.97M
                if (len > maxLength) {
3637
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638
0
                    xmlFree(buffer);
3639
0
                    return(NULL);
3640
0
                }
3641
2.97M
      }
3642
22.0k
      buffer[len] = 0;
3643
22.0k
      *str = cur;
3644
22.0k
      return(buffer);
3645
22.0k
  }
3646
33.6M
    }
3647
3.84M
    if (len > maxLength) {
3648
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3649
0
        return(NULL);
3650
0
    }
3651
3.84M
    *str = cur;
3652
3.84M
    return(xmlStrndup(buf, len));
3653
3.84M
}
3654
3655
/**
3656
 * xmlParseNmtoken:
3657
 * @ctxt:  an XML parser context
3658
 *
3659
 * DEPRECATED: Internal function, don't use.
3660
 *
3661
 * parse an XML Nmtoken.
3662
 *
3663
 * [7] Nmtoken ::= (NameChar)+
3664
 *
3665
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3666
 *
3667
 * Returns the Nmtoken parsed or NULL
3668
 */
3669
3670
xmlChar *
3671
2.09M
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3672
2.09M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3673
2.09M
    int len = 0, l;
3674
2.09M
    int c;
3675
2.09M
    int count = 0;
3676
2.09M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3677
1.44M
                    XML_MAX_TEXT_LENGTH :
3678
2.09M
                    XML_MAX_NAME_LENGTH;
3679
3680
#ifdef DEBUG
3681
    nbParseNmToken++;
3682
#endif
3683
3684
2.09M
    GROW;
3685
2.09M
    if (ctxt->instate == XML_PARSER_EOF)
3686
0
        return(NULL);
3687
2.09M
    c = CUR_CHAR(l);
3688
3689
13.8M
    while (xmlIsNameChar(ctxt, c)) {
3690
11.7M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3691
0
      count = 0;
3692
0
      GROW;
3693
0
  }
3694
11.7M
  COPY_BUF(l,buf,len,c);
3695
11.7M
  NEXTL(l);
3696
11.7M
  c = CUR_CHAR(l);
3697
11.7M
  if (c == 0) {
3698
790
      count = 0;
3699
790
      GROW;
3700
790
      if (ctxt->instate == XML_PARSER_EOF)
3701
0
    return(NULL);
3702
790
            c = CUR_CHAR(l);
3703
790
  }
3704
11.7M
  if (len >= XML_MAX_NAMELEN) {
3705
      /*
3706
       * Okay someone managed to make a huge token, so he's ready to pay
3707
       * for the processing speed.
3708
       */
3709
1.55k
      xmlChar *buffer;
3710
1.55k
      int max = len * 2;
3711
3712
1.55k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3713
1.55k
      if (buffer == NULL) {
3714
0
          xmlErrMemory(ctxt, NULL);
3715
0
    return(NULL);
3716
0
      }
3717
1.55k
      memcpy(buffer, buf, len);
3718
2.19M
      while (xmlIsNameChar(ctxt, c)) {
3719
2.19M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3720
22.3k
        count = 0;
3721
22.3k
        GROW;
3722
22.3k
                    if (ctxt->instate == XML_PARSER_EOF) {
3723
0
                        xmlFree(buffer);
3724
0
                        return(NULL);
3725
0
                    }
3726
22.3k
    }
3727
2.19M
    if (len + 10 > max) {
3728
3.02k
        xmlChar *tmp;
3729
3730
3.02k
        max *= 2;
3731
3.02k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3732
3.02k
        if (tmp == NULL) {
3733
0
      xmlErrMemory(ctxt, NULL);
3734
0
      xmlFree(buffer);
3735
0
      return(NULL);
3736
0
        }
3737
3.02k
        buffer = tmp;
3738
3.02k
    }
3739
2.19M
    COPY_BUF(l,buffer,len,c);
3740
2.19M
    NEXTL(l);
3741
2.19M
    c = CUR_CHAR(l);
3742
2.19M
                if (len > maxLength) {
3743
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3744
0
                    xmlFree(buffer);
3745
0
                    return(NULL);
3746
0
                }
3747
2.19M
      }
3748
1.55k
      buffer[len] = 0;
3749
1.55k
      return(buffer);
3750
1.55k
  }
3751
11.7M
    }
3752
2.09M
    if (len == 0)
3753
12.2k
        return(NULL);
3754
2.08M
    if (len > maxLength) {
3755
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3756
0
        return(NULL);
3757
0
    }
3758
2.08M
    return(xmlStrndup(buf, len));
3759
2.08M
}
3760
3761
/**
3762
 * xmlParseEntityValue:
3763
 * @ctxt:  an XML parser context
3764
 * @orig:  if non-NULL store a copy of the original entity value
3765
 *
3766
 * DEPRECATED: Internal function, don't use.
3767
 *
3768
 * parse a value for ENTITY declarations
3769
 *
3770
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3771
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3772
 *
3773
 * Returns the EntityValue parsed with reference substituted or NULL
3774
 */
3775
3776
xmlChar *
3777
4.61M
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3778
4.61M
    xmlChar *buf = NULL;
3779
4.61M
    int len = 0;
3780
4.61M
    int size = XML_PARSER_BUFFER_SIZE;
3781
4.61M
    int c, l;
3782
4.61M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3783
3.89M
                    XML_MAX_HUGE_LENGTH :
3784
4.61M
                    XML_MAX_TEXT_LENGTH;
3785
4.61M
    xmlChar stop;
3786
4.61M
    xmlChar *ret = NULL;
3787
4.61M
    const xmlChar *cur = NULL;
3788
4.61M
    xmlParserInputPtr input;
3789
3790
4.61M
    if (RAW == '"') stop = '"';
3791
2.86M
    else if (RAW == '\'') stop = '\'';
3792
0
    else {
3793
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3794
0
  return(NULL);
3795
0
    }
3796
4.61M
    buf = (xmlChar *) xmlMallocAtomic(size);
3797
4.61M
    if (buf == NULL) {
3798
0
  xmlErrMemory(ctxt, NULL);
3799
0
  return(NULL);
3800
0
    }
3801
3802
    /*
3803
     * The content of the entity definition is copied in a buffer.
3804
     */
3805
3806
4.61M
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3807
4.61M
    input = ctxt->input;
3808
4.61M
    GROW;
3809
4.61M
    if (ctxt->instate == XML_PARSER_EOF)
3810
0
        goto error;
3811
4.61M
    NEXT;
3812
4.61M
    c = CUR_CHAR(l);
3813
    /*
3814
     * NOTE: 4.4.5 Included in Literal
3815
     * When a parameter entity reference appears in a literal entity
3816
     * value, ... a single or double quote character in the replacement
3817
     * text is always treated as a normal data character and will not
3818
     * terminate the literal.
3819
     * In practice it means we stop the loop only when back at parsing
3820
     * the initial entity and the quote is found
3821
     */
3822
343M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3823
343M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3824
339M
  if (len + 5 >= size) {
3825
1.50M
      xmlChar *tmp;
3826
3827
1.50M
      size *= 2;
3828
1.50M
      tmp = (xmlChar *) xmlRealloc(buf, size);
3829
1.50M
      if (tmp == NULL) {
3830
0
    xmlErrMemory(ctxt, NULL);
3831
0
                goto error;
3832
0
      }
3833
1.50M
      buf = tmp;
3834
1.50M
  }
3835
339M
  COPY_BUF(l,buf,len,c);
3836
339M
  NEXTL(l);
3837
3838
339M
  GROW;
3839
339M
  c = CUR_CHAR(l);
3840
339M
  if (c == 0) {
3841
1.03k
      GROW;
3842
1.03k
      c = CUR_CHAR(l);
3843
1.03k
  }
3844
3845
339M
        if (len > maxLength) {
3846
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3847
0
                           "entity value too long\n");
3848
0
            goto error;
3849
0
        }
3850
339M
    }
3851
4.61M
    buf[len] = 0;
3852
4.61M
    if (ctxt->instate == XML_PARSER_EOF)
3853
0
        goto error;
3854
4.61M
    if (c != stop) {
3855
1.72k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3856
1.72k
        goto error;
3857
1.72k
    }
3858
4.61M
    NEXT;
3859
3860
    /*
3861
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3862
     * reference constructs. Note Charref will be handled in
3863
     * xmlStringDecodeEntities()
3864
     */
3865
4.61M
    cur = buf;
3866
317M
    while (*cur != 0) { /* non input consuming */
3867
312M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3868
694k
      xmlChar *name;
3869
694k
      xmlChar tmp = *cur;
3870
694k
            int nameOk = 0;
3871
3872
694k
      cur++;
3873
694k
      name = xmlParseStringName(ctxt, &cur);
3874
694k
            if (name != NULL) {
3875
582k
                nameOk = 1;
3876
582k
                xmlFree(name);
3877
582k
            }
3878
694k
            if ((nameOk == 0) || (*cur != ';')) {
3879
228k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3880
228k
      "EntityValue: '%c' forbidden except for entities references\n",
3881
228k
                            tmp);
3882
228k
                goto error;
3883
228k
      }
3884
466k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3885
466k
    (ctxt->inputNr == 1)) {
3886
542
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3887
542
                goto error;
3888
542
      }
3889
466k
      if (*cur == 0)
3890
0
          break;
3891
466k
  }
3892
312M
  cur++;
3893
312M
    }
3894
3895
    /*
3896
     * Then PEReference entities are substituted.
3897
     *
3898
     * NOTE: 4.4.7 Bypassed
3899
     * When a general entity reference appears in the EntityValue in
3900
     * an entity declaration, it is bypassed and left as is.
3901
     * so XML_SUBSTITUTE_REF is not set here.
3902
     */
3903
4.38M
    ++ctxt->depth;
3904
4.38M
    ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3905
4.38M
                                  0, 0, 0);
3906
4.38M
    --ctxt->depth;
3907
4.38M
    if (orig != NULL) {
3908
4.38M
        *orig = buf;
3909
4.38M
        buf = NULL;
3910
4.38M
    }
3911
3912
4.61M
error:
3913
4.61M
    if (buf != NULL)
3914
230k
        xmlFree(buf);
3915
4.61M
    return(ret);
3916
4.38M
}
3917
3918
/**
3919
 * xmlParseAttValueComplex:
3920
 * @ctxt:  an XML parser context
3921
 * @len:   the resulting attribute len
3922
 * @normalize:  whether to apply the inner normalization
3923
 *
3924
 * parse a value for an attribute, this is the fallback function
3925
 * of xmlParseAttValue() when the attribute parsing requires handling
3926
 * of non-ASCII characters, or normalization compaction.
3927
 *
3928
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3929
 */
3930
static xmlChar *
3931
3.19M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3932
3.19M
    xmlChar limit = 0;
3933
3.19M
    xmlChar *buf = NULL;
3934
3.19M
    xmlChar *rep = NULL;
3935
3.19M
    size_t len = 0;
3936
3.19M
    size_t buf_size = 0;
3937
3.19M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3938
2.86M
                       XML_MAX_HUGE_LENGTH :
3939
3.19M
                       XML_MAX_TEXT_LENGTH;
3940
3.19M
    int c, l, in_space = 0;
3941
3.19M
    xmlChar *current = NULL;
3942
3.19M
    xmlEntityPtr ent;
3943
3944
3.19M
    if (NXT(0) == '"') {
3945
931k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3946
931k
  limit = '"';
3947
931k
        NEXT;
3948
2.26M
    } else if (NXT(0) == '\'') {
3949
2.26M
  limit = '\'';
3950
2.26M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3951
2.26M
        NEXT;
3952
2.26M
    } else {
3953
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3954
0
  return(NULL);
3955
0
    }
3956
3957
    /*
3958
     * allocate a translation buffer.
3959
     */
3960
3.19M
    buf_size = XML_PARSER_BUFFER_SIZE;
3961
3.19M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3962
3.19M
    if (buf == NULL) goto mem_error;
3963
3964
    /*
3965
     * OK loop until we reach one of the ending char or a size limit.
3966
     */
3967
3.19M
    c = CUR_CHAR(l);
3968
88.9M
    while (((NXT(0) != limit) && /* checked */
3969
88.9M
            (IS_CHAR(c)) && (c != '<')) &&
3970
88.9M
            (ctxt->instate != XML_PARSER_EOF)) {
3971
85.7M
  if (c == '&') {
3972
3.01M
      in_space = 0;
3973
3.01M
      if (NXT(1) == '#') {
3974
656k
    int val = xmlParseCharRef(ctxt);
3975
3976
656k
    if (val == '&') {
3977
74.6k
        if (ctxt->replaceEntities) {
3978
2.09k
      if (len + 10 > buf_size) {
3979
238
          growBuffer(buf, 10);
3980
238
      }
3981
2.09k
      buf[len++] = '&';
3982
72.5k
        } else {
3983
      /*
3984
       * The reparsing will be done in xmlStringGetNodeList()
3985
       * called by the attribute() function in SAX.c
3986
       */
3987
72.5k
      if (len + 10 > buf_size) {
3988
262
          growBuffer(buf, 10);
3989
262
      }
3990
72.5k
      buf[len++] = '&';
3991
72.5k
      buf[len++] = '#';
3992
72.5k
      buf[len++] = '3';
3993
72.5k
      buf[len++] = '8';
3994
72.5k
      buf[len++] = ';';
3995
72.5k
        }
3996
581k
    } else if (val != 0) {
3997
523k
        if (len + 10 > buf_size) {
3998
2.57k
      growBuffer(buf, 10);
3999
2.57k
        }
4000
523k
        len += xmlCopyChar(0, &buf[len], val);
4001
523k
    }
4002
2.36M
      } else {
4003
2.36M
    ent = xmlParseEntityRef(ctxt);
4004
2.36M
    ctxt->nbentities++;
4005
2.36M
    if (ent != NULL)
4006
1.97M
        ctxt->nbentities += ent->owner;
4007
2.36M
    if ((ent != NULL) &&
4008
2.36M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4009
1.86M
        if (len + 10 > buf_size) {
4010
220
      growBuffer(buf, 10);
4011
220
        }
4012
1.86M
        if ((ctxt->replaceEntities == 0) &&
4013
1.86M
            (ent->content[0] == '&')) {
4014
1.24M
      buf[len++] = '&';
4015
1.24M
      buf[len++] = '#';
4016
1.24M
      buf[len++] = '3';
4017
1.24M
      buf[len++] = '8';
4018
1.24M
      buf[len++] = ';';
4019
1.24M
        } else {
4020
624k
      buf[len++] = ent->content[0];
4021
624k
        }
4022
1.86M
    } else if ((ent != NULL) &&
4023
494k
               (ctxt->replaceEntities != 0)) {
4024
37.8k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4025
37.8k
      ++ctxt->depth;
4026
37.8k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4027
37.8k
                  XML_SUBSTITUTE_REF,
4028
37.8k
                  0, 0, 0);
4029
37.8k
      --ctxt->depth;
4030
37.8k
      if (rep != NULL) {
4031
34.7k
          current = rep;
4032
1.42M
          while (*current != 0) { /* non input consuming */
4033
1.39M
                                if ((*current == 0xD) || (*current == 0xA) ||
4034
1.39M
                                    (*current == 0x9)) {
4035
42.0k
                                    buf[len++] = 0x20;
4036
42.0k
                                    current++;
4037
42.0k
                                } else
4038
1.35M
                                    buf[len++] = *current++;
4039
1.39M
        if (len + 10 > buf_size) {
4040
4.60k
            growBuffer(buf, 10);
4041
4.60k
        }
4042
1.39M
          }
4043
34.7k
          xmlFree(rep);
4044
34.7k
          rep = NULL;
4045
34.7k
      }
4046
37.8k
        } else {
4047
0
      if (len + 10 > buf_size) {
4048
0
          growBuffer(buf, 10);
4049
0
      }
4050
0
      if (ent->content != NULL)
4051
0
          buf[len++] = ent->content[0];
4052
0
        }
4053
456k
    } else if (ent != NULL) {
4054
68.3k
        int i = xmlStrlen(ent->name);
4055
68.3k
        const xmlChar *cur = ent->name;
4056
4057
        /*
4058
         * This may look absurd but is needed to detect
4059
         * entities problems
4060
         */
4061
68.3k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4062
68.3k
      (ent->content != NULL) && (ent->checked == 0)) {
4063
12.0k
      unsigned long oldnbent = ctxt->nbentities, diff;
4064
4065
12.0k
      ++ctxt->depth;
4066
12.0k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4067
12.0k
              XML_SUBSTITUTE_REF, 0, 0, 0);
4068
12.0k
      --ctxt->depth;
4069
4070
12.0k
                        diff = ctxt->nbentities - oldnbent + 1;
4071
12.0k
                        if (diff > INT_MAX / 2)
4072
0
                            diff = INT_MAX / 2;
4073
12.0k
                        ent->checked = diff * 2;
4074
12.0k
      if (rep != NULL) {
4075
11.7k
          if (xmlStrchr(rep, '<'))
4076
719
              ent->checked |= 1;
4077
11.7k
          xmlFree(rep);
4078
11.7k
          rep = NULL;
4079
11.7k
      } else {
4080
289
                            ent->content[0] = 0;
4081
289
                        }
4082
12.0k
        }
4083
4084
        /*
4085
         * Just output the reference
4086
         */
4087
68.3k
        buf[len++] = '&';
4088
68.5k
        while (len + i + 10 > buf_size) {
4089
358
      growBuffer(buf, i + 10);
4090
358
        }
4091
232k
        for (;i > 0;i--)
4092
164k
      buf[len++] = *cur++;
4093
68.3k
        buf[len++] = ';';
4094
68.3k
    }
4095
2.36M
      }
4096
82.7M
  } else {
4097
82.7M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4098
17.0M
          if ((len != 0) || (!normalize)) {
4099
16.9M
        if ((!normalize) || (!in_space)) {
4100
16.6M
      COPY_BUF(l,buf,len,0x20);
4101
16.6M
      while (len + 10 > buf_size) {
4102
87.1k
          growBuffer(buf, 10);
4103
87.1k
      }
4104
16.6M
        }
4105
16.9M
        in_space = 1;
4106
16.9M
    }
4107
65.6M
      } else {
4108
65.6M
          in_space = 0;
4109
65.6M
    COPY_BUF(l,buf,len,c);
4110
65.6M
    if (len + 10 > buf_size) {
4111
468k
        growBuffer(buf, 10);
4112
468k
    }
4113
65.6M
      }
4114
82.7M
      NEXTL(l);
4115
82.7M
  }
4116
85.7M
  GROW;
4117
85.7M
  c = CUR_CHAR(l);
4118
85.7M
        if (len > maxLength) {
4119
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4120
0
                           "AttValue length too long\n");
4121
0
            goto mem_error;
4122
0
        }
4123
85.7M
    }
4124
3.19M
    if (ctxt->instate == XML_PARSER_EOF)
4125
0
        goto error;
4126
4127
3.19M
    if ((in_space) && (normalize)) {
4128
26.2k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4129
12.3k
    }
4130
3.19M
    buf[len] = 0;
4131
3.19M
    if (RAW == '<') {
4132
376k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4133
2.81M
    } else if (RAW != limit) {
4134
260k
  if ((c != 0) && (!IS_CHAR(c))) {
4135
163k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4136
163k
         "invalid character in attribute value\n");
4137
163k
  } else {
4138
96.9k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4139
96.9k
         "AttValue: ' expected\n");
4140
96.9k
        }
4141
260k
    } else
4142
2.55M
  NEXT;
4143
4144
3.19M
    if (attlen != NULL) *attlen = len;
4145
3.19M
    return(buf);
4146
4147
0
mem_error:
4148
0
    xmlErrMemory(ctxt, NULL);
4149
0
error:
4150
0
    if (buf != NULL)
4151
0
        xmlFree(buf);
4152
0
    if (rep != NULL)
4153
0
        xmlFree(rep);
4154
0
    return(NULL);
4155
0
}
4156
4157
/**
4158
 * xmlParseAttValue:
4159
 * @ctxt:  an XML parser context
4160
 *
4161
 * DEPRECATED: Internal function, don't use.
4162
 *
4163
 * parse a value for an attribute
4164
 * Note: the parser won't do substitution of entities here, this
4165
 * will be handled later in xmlStringGetNodeList
4166
 *
4167
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4168
 *                   "'" ([^<&'] | Reference)* "'"
4169
 *
4170
 * 3.3.3 Attribute-Value Normalization:
4171
 * Before the value of an attribute is passed to the application or
4172
 * checked for validity, the XML processor must normalize it as follows:
4173
 * - a character reference is processed by appending the referenced
4174
 *   character to the attribute value
4175
 * - an entity reference is processed by recursively processing the
4176
 *   replacement text of the entity
4177
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4178
 *   appending #x20 to the normalized value, except that only a single
4179
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4180
 *   parsed entity or the literal entity value of an internal parsed entity
4181
 * - other characters are processed by appending them to the normalized value
4182
 * If the declared value is not CDATA, then the XML processor must further
4183
 * process the normalized attribute value by discarding any leading and
4184
 * trailing space (#x20) characters, and by replacing sequences of space
4185
 * (#x20) characters by a single space (#x20) character.
4186
 * All attributes for which no declaration has been read should be treated
4187
 * by a non-validating parser as if declared CDATA.
4188
 *
4189
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4190
 */
4191
4192
4193
xmlChar *
4194
9.61M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4195
9.61M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4196
9.61M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4197
9.61M
}
4198
4199
/**
4200
 * xmlParseSystemLiteral:
4201
 * @ctxt:  an XML parser context
4202
 *
4203
 * DEPRECATED: Internal function, don't use.
4204
 *
4205
 * parse an XML Literal
4206
 *
4207
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4208
 *
4209
 * Returns the SystemLiteral parsed or NULL
4210
 */
4211
4212
xmlChar *
4213
454k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4214
454k
    xmlChar *buf = NULL;
4215
454k
    int len = 0;
4216
454k
    int size = XML_PARSER_BUFFER_SIZE;
4217
454k
    int cur, l;
4218
454k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4219
297k
                    XML_MAX_TEXT_LENGTH :
4220
454k
                    XML_MAX_NAME_LENGTH;
4221
454k
    xmlChar stop;
4222
454k
    int state = ctxt->instate;
4223
454k
    int count = 0;
4224
4225
454k
    SHRINK;
4226
454k
    if (RAW == '"') {
4227
432k
        NEXT;
4228
432k
  stop = '"';
4229
432k
    } else if (RAW == '\'') {
4230
14.9k
        NEXT;
4231
14.9k
  stop = '\'';
4232
14.9k
    } else {
4233
6.86k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4234
6.86k
  return(NULL);
4235
6.86k
    }
4236
4237
447k
    buf = (xmlChar *) xmlMallocAtomic(size);
4238
447k
    if (buf == NULL) {
4239
0
        xmlErrMemory(ctxt, NULL);
4240
0
  return(NULL);
4241
0
    }
4242
447k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4243
447k
    cur = CUR_CHAR(l);
4244
11.1M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4245
10.7M
  if (len + 5 >= size) {
4246
7.00k
      xmlChar *tmp;
4247
4248
7.00k
      size *= 2;
4249
7.00k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4250
7.00k
      if (tmp == NULL) {
4251
0
          xmlFree(buf);
4252
0
    xmlErrMemory(ctxt, NULL);
4253
0
    ctxt->instate = (xmlParserInputState) state;
4254
0
    return(NULL);
4255
0
      }
4256
7.00k
      buf = tmp;
4257
7.00k
  }
4258
10.7M
  count++;
4259
10.7M
  if (count > 50) {
4260
57.0k
      SHRINK;
4261
57.0k
      GROW;
4262
57.0k
      count = 0;
4263
57.0k
            if (ctxt->instate == XML_PARSER_EOF) {
4264
0
          xmlFree(buf);
4265
0
    return(NULL);
4266
0
            }
4267
57.0k
  }
4268
10.7M
  COPY_BUF(l,buf,len,cur);
4269
10.7M
  NEXTL(l);
4270
10.7M
  cur = CUR_CHAR(l);
4271
10.7M
  if (cur == 0) {
4272
2.12k
      GROW;
4273
2.12k
      SHRINK;
4274
2.12k
      cur = CUR_CHAR(l);
4275
2.12k
  }
4276
10.7M
        if (len > maxLength) {
4277
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4278
0
            xmlFree(buf);
4279
0
            ctxt->instate = (xmlParserInputState) state;
4280
0
            return(NULL);
4281
0
        }
4282
10.7M
    }
4283
447k
    buf[len] = 0;
4284
447k
    ctxt->instate = (xmlParserInputState) state;
4285
447k
    if (!IS_CHAR(cur)) {
4286
3.20k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4287
444k
    } else {
4288
444k
  NEXT;
4289
444k
    }
4290
447k
    return(buf);
4291
447k
}
4292
4293
/**
4294
 * xmlParsePubidLiteral:
4295
 * @ctxt:  an XML parser context
4296
 *
4297
 * DEPRECATED: Internal function, don't use.
4298
 *
4299
 * parse an XML public literal
4300
 *
4301
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4302
 *
4303
 * Returns the PubidLiteral parsed or NULL.
4304
 */
4305
4306
xmlChar *
4307
43.9k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4308
43.9k
    xmlChar *buf = NULL;
4309
43.9k
    int len = 0;
4310
43.9k
    int size = XML_PARSER_BUFFER_SIZE;
4311
43.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4312
16.7k
                    XML_MAX_TEXT_LENGTH :
4313
43.9k
                    XML_MAX_NAME_LENGTH;
4314
43.9k
    xmlChar cur;
4315
43.9k
    xmlChar stop;
4316
43.9k
    int count = 0;
4317
43.9k
    xmlParserInputState oldstate = ctxt->instate;
4318
4319
43.9k
    SHRINK;
4320
43.9k
    if (RAW == '"') {
4321
36.6k
        NEXT;
4322
36.6k
  stop = '"';
4323
36.6k
    } else if (RAW == '\'') {
4324
6.42k
        NEXT;
4325
6.42k
  stop = '\'';
4326
6.42k
    } else {
4327
861
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4328
861
  return(NULL);
4329
861
    }
4330
43.1k
    buf = (xmlChar *) xmlMallocAtomic(size);
4331
43.1k
    if (buf == NULL) {
4332
0
  xmlErrMemory(ctxt, NULL);
4333
0
  return(NULL);
4334
0
    }
4335
43.1k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4336
43.1k
    cur = CUR;
4337
2.49M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4338
2.44M
  if (len + 1 >= size) {
4339
4.39k
      xmlChar *tmp;
4340
4341
4.39k
      size *= 2;
4342
4.39k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4343
4.39k
      if (tmp == NULL) {
4344
0
    xmlErrMemory(ctxt, NULL);
4345
0
    xmlFree(buf);
4346
0
    return(NULL);
4347
0
      }
4348
4.39k
      buf = tmp;
4349
4.39k
  }
4350
2.44M
  buf[len++] = cur;
4351
2.44M
  count++;
4352
2.44M
  if (count > 50) {
4353
29.5k
      SHRINK;
4354
29.5k
      GROW;
4355
29.5k
      count = 0;
4356
29.5k
            if (ctxt->instate == XML_PARSER_EOF) {
4357
0
    xmlFree(buf);
4358
0
    return(NULL);
4359
0
            }
4360
29.5k
  }
4361
2.44M
  NEXT;
4362
2.44M
  cur = CUR;
4363
2.44M
  if (cur == 0) {
4364
1.09k
      GROW;
4365
1.09k
      SHRINK;
4366
1.09k
      cur = CUR;
4367
1.09k
  }
4368
2.44M
        if (len > maxLength) {
4369
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4370
0
            xmlFree(buf);
4371
0
            return(NULL);
4372
0
        }
4373
2.44M
    }
4374
43.1k
    buf[len] = 0;
4375
43.1k
    if (cur != stop) {
4376
3.25k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4377
39.8k
    } else {
4378
39.8k
  NEXT;
4379
39.8k
    }
4380
43.1k
    ctxt->instate = oldstate;
4381
43.1k
    return(buf);
4382
43.1k
}
4383
4384
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4385
4386
/*
4387
 * used for the test in the inner loop of the char data testing
4388
 */
4389
static const unsigned char test_char_data[256] = {
4390
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4392
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4395
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4396
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4397
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4398
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4399
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4400
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4401
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4402
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4403
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4404
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4405
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4406
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4407
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4416
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4417
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4418
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4420
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4422
};
4423
4424
/**
4425
 * xmlParseCharData:
4426
 * @ctxt:  an XML parser context
4427
 * @cdata:  int indicating whether we are within a CDATA section
4428
 *
4429
 * DEPRECATED: Internal function, don't use.
4430
 *
4431
 * parse a CharData section.
4432
 * if we are within a CDATA section ']]>' marks an end of section.
4433
 *
4434
 * The right angle bracket (>) may be represented using the string "&gt;",
4435
 * and must, for compatibility, be escaped using "&gt;" or a character
4436
 * reference when it appears in the string "]]>" in content, when that
4437
 * string is not marking the end of a CDATA section.
4438
 *
4439
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4440
 */
4441
4442
void
4443
36.7M
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4444
36.7M
    const xmlChar *in;
4445
36.7M
    int nbchar = 0;
4446
36.7M
    int line = ctxt->input->line;
4447
36.7M
    int col = ctxt->input->col;
4448
36.7M
    int ccol;
4449
4450
36.7M
    SHRINK;
4451
36.7M
    GROW;
4452
    /*
4453
     * Accelerated common case where input don't need to be
4454
     * modified before passing it to the handler.
4455
     */
4456
36.7M
    if (!cdata) {
4457
36.7M
  in = ctxt->input->cur;
4458
46.3M
  do {
4459
59.1M
get_more_space:
4460
138M
      while (*in == 0x20) { in++; ctxt->input->col++; }
4461
59.1M
      if (*in == 0xA) {
4462
13.0M
    do {
4463
13.0M
        ctxt->input->line++; ctxt->input->col = 1;
4464
13.0M
        in++;
4465
13.0M
    } while (*in == 0xA);
4466
12.7M
    goto get_more_space;
4467
12.7M
      }
4468
46.3M
      if (*in == '<') {
4469
14.6M
    nbchar = in - ctxt->input->cur;
4470
14.6M
    if (nbchar > 0) {
4471
14.6M
        const xmlChar *tmp = ctxt->input->cur;
4472
14.6M
        ctxt->input->cur = in;
4473
4474
14.6M
        if ((ctxt->sax != NULL) &&
4475
14.6M
            (ctxt->sax->ignorableWhitespace !=
4476
14.6M
             ctxt->sax->characters)) {
4477
7.20M
      if (areBlanks(ctxt, tmp, nbchar, 1)) {
4478
2.18M
          if (ctxt->sax->ignorableWhitespace != NULL)
4479
2.18M
        ctxt->sax->ignorableWhitespace(ctxt->userData,
4480
2.18M
                   tmp, nbchar);
4481
5.01M
      } else {
4482
5.01M
          if (ctxt->sax->characters != NULL)
4483
5.01M
        ctxt->sax->characters(ctxt->userData,
4484
5.01M
                  tmp, nbchar);
4485
5.01M
          if (*ctxt->space == -1)
4486
1.16M
              *ctxt->space = -2;
4487
5.01M
      }
4488
7.44M
        } else if ((ctxt->sax != NULL) &&
4489
7.44M
                   (ctxt->sax->characters != NULL)) {
4490
7.44M
      ctxt->sax->characters(ctxt->userData,
4491
7.44M
                tmp, nbchar);
4492
7.44M
        }
4493
14.6M
    }
4494
14.6M
    return;
4495
14.6M
      }
4496
4497
38.4M
get_more:
4498
38.4M
            ccol = ctxt->input->col;
4499
500M
      while (test_char_data[*in]) {
4500
462M
    in++;
4501
462M
    ccol++;
4502
462M
      }
4503
38.4M
      ctxt->input->col = ccol;
4504
38.4M
      if (*in == 0xA) {
4505
6.57M
    do {
4506
6.57M
        ctxt->input->line++; ctxt->input->col = 1;
4507
6.57M
        in++;
4508
6.57M
    } while (*in == 0xA);
4509
5.87M
    goto get_more;
4510
5.87M
      }
4511
32.5M
      if (*in == ']') {
4512
994k
    if ((in[1] == ']') && (in[2] == '>')) {
4513
95.5k
        xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4514
95.5k
        ctxt->input->cur = in + 1;
4515
95.5k
        return;
4516
95.5k
    }
4517
898k
    in++;
4518
898k
    ctxt->input->col++;
4519
898k
    goto get_more;
4520
994k
      }
4521
31.5M
      nbchar = in - ctxt->input->cur;
4522
31.5M
      if (nbchar > 0) {
4523
26.1M
    if ((ctxt->sax != NULL) &&
4524
26.1M
        (ctxt->sax->ignorableWhitespace !=
4525
26.1M
         ctxt->sax->characters) &&
4526
26.1M
        (IS_BLANK_CH(*ctxt->input->cur))) {
4527
5.61M
        const xmlChar *tmp = ctxt->input->cur;
4528
5.61M
        ctxt->input->cur = in;
4529
4530
5.61M
        if (areBlanks(ctxt, tmp, nbchar, 0)) {
4531
45.6k
            if (ctxt->sax->ignorableWhitespace != NULL)
4532
45.6k
          ctxt->sax->ignorableWhitespace(ctxt->userData,
4533
45.6k
                 tmp, nbchar);
4534
5.56M
        } else {
4535
5.56M
            if (ctxt->sax->characters != NULL)
4536
5.56M
          ctxt->sax->characters(ctxt->userData,
4537
5.56M
              tmp, nbchar);
4538
5.56M
      if (*ctxt->space == -1)
4539
1.75M
          *ctxt->space = -2;
4540
5.56M
        }
4541
5.61M
                    line = ctxt->input->line;
4542
5.61M
                    col = ctxt->input->col;
4543
20.5M
    } else if (ctxt->sax != NULL) {
4544
20.5M
        if (ctxt->sax->characters != NULL)
4545
20.5M
      ctxt->sax->characters(ctxt->userData,
4546
20.5M
                ctxt->input->cur, nbchar);
4547
20.5M
                    line = ctxt->input->line;
4548
20.5M
                    col = ctxt->input->col;
4549
20.5M
    }
4550
                /* something really bad happened in the SAX callback */
4551
26.1M
                if (ctxt->instate != XML_PARSER_CONTENT)
4552
0
                    return;
4553
26.1M
      }
4554
31.5M
      ctxt->input->cur = in;
4555
31.5M
      if (*in == 0xD) {
4556
10.0M
    in++;
4557
10.0M
    if (*in == 0xA) {
4558
9.74M
        ctxt->input->cur = in;
4559
9.74M
        in++;
4560
9.74M
        ctxt->input->line++; ctxt->input->col = 1;
4561
9.74M
        continue; /* while */
4562
9.74M
    }
4563
292k
    in--;
4564
292k
      }
4565
21.8M
      if (*in == '<') {
4566
9.13M
    return;
4567
9.13M
      }
4568
12.7M
      if (*in == '&') {
4569
5.02M
    return;
4570
5.02M
      }
4571
7.68M
      SHRINK;
4572
7.68M
      GROW;
4573
7.68M
            if (ctxt->instate == XML_PARSER_EOF)
4574
0
    return;
4575
7.68M
      in = ctxt->input->cur;
4576
17.4M
  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4577
7.84M
  nbchar = 0;
4578
7.84M
    }
4579
7.84M
    ctxt->input->line = line;
4580
7.84M
    ctxt->input->col = col;
4581
7.84M
    xmlParseCharDataComplex(ctxt, cdata);
4582
7.84M
}
4583
4584
/**
4585
 * xmlParseCharDataComplex:
4586
 * @ctxt:  an XML parser context
4587
 * @cdata:  int indicating whether we are within a CDATA section
4588
 *
4589
 * parse a CharData section.this is the fallback function
4590
 * of xmlParseCharData() when the parsing requires handling
4591
 * of non-ASCII characters.
4592
 */
4593
static void
4594
7.84M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4595
7.84M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4596
7.84M
    int nbchar = 0;
4597
7.84M
    int cur, l;
4598
7.84M
    int count = 0;
4599
4600
7.84M
    SHRINK;
4601
7.84M
    GROW;
4602
7.84M
    cur = CUR_CHAR(l);
4603
196M
    while ((cur != '<') && /* checked */
4604
196M
           (cur != '&') &&
4605
196M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4606
188M
  if ((cur == ']') && (NXT(1) == ']') &&
4607
188M
      (NXT(2) == '>')) {
4608
50.5k
      if (cdata) break;
4609
50.5k
      else {
4610
50.5k
    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4611
50.5k
      }
4612
50.5k
  }
4613
188M
  COPY_BUF(l,buf,nbchar,cur);
4614
  /* move current position before possible calling of ctxt->sax->characters */
4615
188M
  NEXTL(l);
4616
188M
  cur = CUR_CHAR(l);
4617
188M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4618
207k
      buf[nbchar] = 0;
4619
4620
      /*
4621
       * OK the segment is to be consumed as chars.
4622
       */
4623
207k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4624
38.3k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4625
271
        if (ctxt->sax->ignorableWhitespace != NULL)
4626
271
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4627
271
                                     buf, nbchar);
4628
38.0k
    } else {
4629
38.0k
        if (ctxt->sax->characters != NULL)
4630
38.0k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4631
38.0k
        if ((ctxt->sax->characters !=
4632
38.0k
             ctxt->sax->ignorableWhitespace) &&
4633
38.0k
      (*ctxt->space == -1))
4634
1.70k
      *ctxt->space = -2;
4635
38.0k
    }
4636
38.3k
      }
4637
207k
      nbchar = 0;
4638
            /* something really bad happened in the SAX callback */
4639
207k
            if (ctxt->instate != XML_PARSER_CONTENT)
4640
0
                return;
4641
207k
  }
4642
188M
  count++;
4643
188M
  if (count > 50) {
4644
2.47M
      SHRINK;
4645
2.47M
      GROW;
4646
2.47M
      count = 0;
4647
2.47M
            if (ctxt->instate == XML_PARSER_EOF)
4648
0
    return;
4649
2.47M
  }
4650
188M
    }
4651
7.84M
    if (nbchar != 0) {
4652
4.43M
        buf[nbchar] = 0;
4653
  /*
4654
   * OK the segment is to be consumed as chars.
4655
   */
4656
4.43M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4657
387k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4658
6.14k
    if (ctxt->sax->ignorableWhitespace != NULL)
4659
6.14k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4660
381k
      } else {
4661
381k
    if (ctxt->sax->characters != NULL)
4662
381k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4663
381k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4664
381k
        (*ctxt->space == -1))
4665
107k
        *ctxt->space = -2;
4666
381k
      }
4667
387k
  }
4668
4.43M
    }
4669
7.84M
    if ((cur != 0) && (!IS_CHAR(cur))) {
4670
  /* Generate the error and skip the offending character */
4671
3.54M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4672
3.54M
                          "PCDATA invalid Char value %d\n",
4673
3.54M
                    cur);
4674
3.54M
  NEXTL(l);
4675
3.54M
    }
4676
7.84M
}
4677
4678
/**
4679
 * xmlParseExternalID:
4680
 * @ctxt:  an XML parser context
4681
 * @publicID:  a xmlChar** receiving PubidLiteral
4682
 * @strict: indicate whether we should restrict parsing to only
4683
 *          production [75], see NOTE below
4684
 *
4685
 * DEPRECATED: Internal function, don't use.
4686
 *
4687
 * Parse an External ID or a Public ID
4688
 *
4689
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4690
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4691
 *
4692
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4693
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4694
 *
4695
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4696
 *
4697
 * Returns the function returns SystemLiteral and in the second
4698
 *                case publicID receives PubidLiteral, is strict is off
4699
 *                it is possible to return NULL and have publicID set.
4700
 */
4701
4702
xmlChar *
4703
680k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4704
680k
    xmlChar *URI = NULL;
4705
4706
680k
    SHRINK;
4707
4708
680k
    *publicID = NULL;
4709
680k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4710
413k
        SKIP(6);
4711
413k
  if (SKIP_BLANKS == 0) {
4712
535
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4713
535
                     "Space required after 'SYSTEM'\n");
4714
535
  }
4715
413k
  URI = xmlParseSystemLiteral(ctxt);
4716
413k
  if (URI == NULL) {
4717
1.52k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4718
1.52k
        }
4719
413k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4720
43.9k
        SKIP(6);
4721
43.9k
  if (SKIP_BLANKS == 0) {
4722
561
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4723
561
        "Space required after 'PUBLIC'\n");
4724
561
  }
4725
43.9k
  *publicID = xmlParsePubidLiteral(ctxt);
4726
43.9k
  if (*publicID == NULL) {
4727
861
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4728
861
  }
4729
43.9k
  if (strict) {
4730
      /*
4731
       * We don't handle [83] so "S SystemLiteral" is required.
4732
       */
4733
40.1k
      if (SKIP_BLANKS == 0) {
4734
4.80k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4735
4.80k
      "Space required after the Public Identifier\n");
4736
4.80k
      }
4737
40.1k
  } else {
4738
      /*
4739
       * We handle [83] so we return immediately, if
4740
       * "S SystemLiteral" is not detected. We skip blanks if no
4741
             * system literal was found, but this is harmless since we must
4742
             * be at the end of a NotationDecl.
4743
       */
4744
3.82k
      if (SKIP_BLANKS == 0) return(NULL);
4745
939
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4746
939
  }
4747
40.5k
  URI = xmlParseSystemLiteral(ctxt);
4748
40.5k
  if (URI == NULL) {
4749
5.33k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4750
5.33k
        }
4751
40.5k
    }
4752
676k
    return(URI);
4753
680k
}
4754
4755
/**
4756
 * xmlParseCommentComplex:
4757
 * @ctxt:  an XML parser context
4758
 * @buf:  the already parsed part of the buffer
4759
 * @len:  number of bytes in the buffer
4760
 * @size:  allocated size of the buffer
4761
 *
4762
 * Skip an XML (SGML) comment <!-- .... -->
4763
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4764
 *  must not occur within comments. "
4765
 * This is the slow routine in case the accelerator for ascii didn't work
4766
 *
4767
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4768
 */
4769
static void
4770
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4771
874k
                       size_t len, size_t size) {
4772
874k
    int q, ql;
4773
874k
    int r, rl;
4774
874k
    int cur, l;
4775
874k
    size_t count = 0;
4776
874k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4777
765k
                       XML_MAX_HUGE_LENGTH :
4778
874k
                       XML_MAX_TEXT_LENGTH;
4779
874k
    int inputid;
4780
4781
874k
    inputid = ctxt->input->id;
4782
4783
874k
    if (buf == NULL) {
4784
38.3k
        len = 0;
4785
38.3k
  size = XML_PARSER_BUFFER_SIZE;
4786
38.3k
  buf = (xmlChar *) xmlMallocAtomic(size);
4787
38.3k
  if (buf == NULL) {
4788
0
      xmlErrMemory(ctxt, NULL);
4789
0
      return;
4790
0
  }
4791
38.3k
    }
4792
874k
    GROW; /* Assure there's enough input data */
4793
874k
    q = CUR_CHAR(ql);
4794
874k
    if (q == 0)
4795
60.8k
        goto not_terminated;
4796
813k
    if (!IS_CHAR(q)) {
4797
97.0k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4798
97.0k
                          "xmlParseComment: invalid xmlChar value %d\n",
4799
97.0k
                    q);
4800
97.0k
  xmlFree (buf);
4801
97.0k
  return;
4802
97.0k
    }
4803
716k
    NEXTL(ql);
4804
716k
    r = CUR_CHAR(rl);
4805
716k
    if (r == 0)
4806
17.1k
        goto not_terminated;
4807
698k
    if (!IS_CHAR(r)) {
4808
17.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4809
17.6k
                          "xmlParseComment: invalid xmlChar value %d\n",
4810
17.6k
                    q);
4811
17.6k
  xmlFree (buf);
4812
17.6k
  return;
4813
17.6k
    }
4814
681k
    NEXTL(rl);
4815
681k
    cur = CUR_CHAR(l);
4816
681k
    if (cur == 0)
4817
10.3k
        goto not_terminated;
4818
146M
    while (IS_CHAR(cur) && /* checked */
4819
146M
           ((cur != '>') ||
4820
146M
      (r != '-') || (q != '-'))) {
4821
145M
  if ((r == '-') && (q == '-')) {
4822
142k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4823
142k
  }
4824
145M
  if (len + 5 >= size) {
4825
668k
      xmlChar *new_buf;
4826
668k
            size_t new_size;
4827
4828
668k
      new_size = size * 2;
4829
668k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4830
668k
      if (new_buf == NULL) {
4831
0
    xmlFree (buf);
4832
0
    xmlErrMemory(ctxt, NULL);
4833
0
    return;
4834
0
      }
4835
668k
      buf = new_buf;
4836
668k
            size = new_size;
4837
668k
  }
4838
145M
  COPY_BUF(ql,buf,len,q);
4839
145M
  q = r;
4840
145M
  ql = rl;
4841
145M
  r = cur;
4842
145M
  rl = l;
4843
4844
145M
  count++;
4845
145M
  if (count > 50) {
4846
2.55M
      SHRINK;
4847
2.55M
      GROW;
4848
2.55M
      count = 0;
4849
2.55M
            if (ctxt->instate == XML_PARSER_EOF) {
4850
0
    xmlFree(buf);
4851
0
    return;
4852
0
            }
4853
2.55M
  }
4854
145M
  NEXTL(l);
4855
145M
  cur = CUR_CHAR(l);
4856
145M
  if (cur == 0) {
4857
118k
      SHRINK;
4858
118k
      GROW;
4859
118k
      cur = CUR_CHAR(l);
4860
118k
  }
4861
4862
145M
        if (len > maxLength) {
4863
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4864
0
                         "Comment too big found", NULL);
4865
0
            xmlFree (buf);
4866
0
            return;
4867
0
        }
4868
145M
    }
4869
670k
    buf[len] = 0;
4870
670k
    if (cur == 0) {
4871
118k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872
118k
                       "Comment not terminated \n<!--%.50s\n", buf);
4873
551k
    } else if (!IS_CHAR(cur)) {
4874
123k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4875
123k
                          "xmlParseComment: invalid xmlChar value %d\n",
4876
123k
                    cur);
4877
428k
    } else {
4878
428k
  if (inputid != ctxt->input->id) {
4879
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4880
0
               "Comment doesn't start and stop in the same"
4881
0
                           " entity\n");
4882
0
  }
4883
428k
        NEXT;
4884
428k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4885
428k
      (!ctxt->disableSAX))
4886
211k
      ctxt->sax->comment(ctxt->userData, buf);
4887
428k
    }
4888
670k
    xmlFree(buf);
4889
670k
    return;
4890
88.4k
not_terminated:
4891
88.4k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4892
88.4k
       "Comment not terminated\n", NULL);
4893
88.4k
    xmlFree(buf);
4894
88.4k
    return;
4895
670k
}
4896
4897
/**
4898
 * xmlParseComment:
4899
 * @ctxt:  an XML parser context
4900
 *
4901
 * DEPRECATED: Internal function, don't use.
4902
 *
4903
 * Skip an XML (SGML) comment <!-- .... -->
4904
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4905
 *  must not occur within comments. "
4906
 *
4907
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4908
 */
4909
void
4910
2.47M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4911
2.47M
    xmlChar *buf = NULL;
4912
2.47M
    size_t size = XML_PARSER_BUFFER_SIZE;
4913
2.47M
    size_t len = 0;
4914
2.47M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4915
2.03M
                       XML_MAX_HUGE_LENGTH :
4916
2.47M
                       XML_MAX_TEXT_LENGTH;
4917
2.47M
    xmlParserInputState state;
4918
2.47M
    const xmlChar *in;
4919
2.47M
    size_t nbchar = 0;
4920
2.47M
    int ccol;
4921
2.47M
    int inputid;
4922
4923
    /*
4924
     * Check that there is a comment right here.
4925
     */
4926
2.47M
    if ((RAW != '<') || (NXT(1) != '!') ||
4927
2.47M
        (NXT(2) != '-') || (NXT(3) != '-')) return;
4928
2.47M
    state = ctxt->instate;
4929
2.47M
    ctxt->instate = XML_PARSER_COMMENT;
4930
2.47M
    inputid = ctxt->input->id;
4931
2.47M
    SKIP(4);
4932
2.47M
    SHRINK;
4933
2.47M
    GROW;
4934
4935
    /*
4936
     * Accelerated common case where input don't need to be
4937
     * modified before passing it to the handler.
4938
     */
4939
2.47M
    in = ctxt->input->cur;
4940
2.47M
    do {
4941
2.47M
  if (*in == 0xA) {
4942
62.3k
      do {
4943
62.3k
    ctxt->input->line++; ctxt->input->col = 1;
4944
62.3k
    in++;
4945
62.3k
      } while (*in == 0xA);
4946
52.2k
  }
4947
19.8M
get_more:
4948
19.8M
        ccol = ctxt->input->col;
4949
576M
  while (((*in > '-') && (*in <= 0x7F)) ||
4950
576M
         ((*in >= 0x20) && (*in < '-')) ||
4951
576M
         (*in == 0x09)) {
4952
556M
        in++;
4953
556M
        ccol++;
4954
556M
  }
4955
19.8M
  ctxt->input->col = ccol;
4956
19.8M
  if (*in == 0xA) {
4957
915k
      do {
4958
915k
    ctxt->input->line++; ctxt->input->col = 1;
4959
915k
    in++;
4960
915k
      } while (*in == 0xA);
4961
711k
      goto get_more;
4962
711k
  }
4963
19.0M
  nbchar = in - ctxt->input->cur;
4964
  /*
4965
   * save current set of data
4966
   */
4967
19.0M
  if (nbchar > 0) {
4968
18.1M
      if ((ctxt->sax != NULL) &&
4969
18.1M
    (ctxt->sax->comment != NULL)) {
4970
18.1M
    if (buf == NULL) {
4971
2.41M
        if ((*in == '-') && (in[1] == '-'))
4972
566k
            size = nbchar + 1;
4973
1.84M
        else
4974
1.84M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4975
2.41M
        buf = (xmlChar *) xmlMallocAtomic(size);
4976
2.41M
        if (buf == NULL) {
4977
0
            xmlErrMemory(ctxt, NULL);
4978
0
      ctxt->instate = state;
4979
0
      return;
4980
0
        }
4981
2.41M
        len = 0;
4982
15.7M
    } else if (len + nbchar + 1 >= size) {
4983
2.12M
        xmlChar *new_buf;
4984
2.12M
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4985
2.12M
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4986
2.12M
        if (new_buf == NULL) {
4987
0
            xmlFree (buf);
4988
0
      xmlErrMemory(ctxt, NULL);
4989
0
      ctxt->instate = state;
4990
0
      return;
4991
0
        }
4992
2.12M
        buf = new_buf;
4993
2.12M
    }
4994
18.1M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4995
18.1M
    len += nbchar;
4996
18.1M
    buf[len] = 0;
4997
18.1M
      }
4998
18.1M
  }
4999
19.0M
        if (len > maxLength) {
5000
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5001
0
                         "Comment too big found", NULL);
5002
0
            xmlFree (buf);
5003
0
            return;
5004
0
        }
5005
19.0M
  ctxt->input->cur = in;
5006
19.0M
  if (*in == 0xA) {
5007
0
      in++;
5008
0
      ctxt->input->line++; ctxt->input->col = 1;
5009
0
  }
5010
19.0M
  if (*in == 0xD) {
5011
14.2M
      in++;
5012
14.2M
      if (*in == 0xA) {
5013
14.1M
    ctxt->input->cur = in;
5014
14.1M
    in++;
5015
14.1M
    ctxt->input->line++; ctxt->input->col = 1;
5016
14.1M
    goto get_more;
5017
14.1M
      }
5018
129k
      in--;
5019
129k
  }
5020
4.97M
  SHRINK;
5021
4.97M
  GROW;
5022
4.97M
        if (ctxt->instate == XML_PARSER_EOF) {
5023
0
            xmlFree(buf);
5024
0
            return;
5025
0
        }
5026
4.97M
  in = ctxt->input->cur;
5027
4.97M
  if (*in == '-') {
5028
4.10M
      if (in[1] == '-') {
5029
1.72M
          if (in[2] == '>') {
5030
1.60M
        if (ctxt->input->id != inputid) {
5031
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5032
0
                     "comment doesn't start and stop in the"
5033
0
                                       " same entity\n");
5034
0
        }
5035
1.60M
        SKIP(3);
5036
1.60M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5037
1.60M
            (!ctxt->disableSAX)) {
5038
735k
      if (buf != NULL)
5039
731k
          ctxt->sax->comment(ctxt->userData, buf);
5040
4.22k
      else
5041
4.22k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5042
735k
        }
5043
1.60M
        if (buf != NULL)
5044
1.58M
            xmlFree(buf);
5045
1.60M
        if (ctxt->instate != XML_PARSER_EOF)
5046
1.60M
      ctxt->instate = state;
5047
1.60M
        return;
5048
1.60M
    }
5049
121k
    if (buf != NULL) {
5050
114k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5051
114k
                          "Double hyphen within comment: "
5052
114k
                                      "<!--%.50s\n",
5053
114k
              buf);
5054
114k
    } else
5055
6.99k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5056
6.99k
                          "Double hyphen within comment\n", NULL);
5057
121k
                if (ctxt->instate == XML_PARSER_EOF) {
5058
0
                    xmlFree(buf);
5059
0
                    return;
5060
0
                }
5061
121k
    in++;
5062
121k
    ctxt->input->col++;
5063
121k
      }
5064
2.50M
      in++;
5065
2.50M
      ctxt->input->col++;
5066
2.50M
      goto get_more;
5067
4.10M
  }
5068
4.97M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5069
874k
    xmlParseCommentComplex(ctxt, buf, len, size);
5070
874k
    ctxt->instate = state;
5071
874k
    return;
5072
2.47M
}
5073
5074
5075
/**
5076
 * xmlParsePITarget:
5077
 * @ctxt:  an XML parser context
5078
 *
5079
 * DEPRECATED: Internal function, don't use.
5080
 *
5081
 * parse the name of a PI
5082
 *
5083
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5084
 *
5085
 * Returns the PITarget name or NULL
5086
 */
5087
5088
const xmlChar *
5089
841k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5090
841k
    const xmlChar *name;
5091
5092
841k
    name = xmlParseName(ctxt);
5093
841k
    if ((name != NULL) &&
5094
841k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5095
841k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5096
841k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5097
86.0k
  int i;
5098
86.0k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5099
86.0k
      (name[2] == 'l') && (name[3] == 0)) {
5100
69.7k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5101
69.7k
     "XML declaration allowed only at the start of the document\n");
5102
69.7k
      return(name);
5103
69.7k
  } else if (name[3] == 0) {
5104
1.76k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5105
1.76k
      return(name);
5106
1.76k
  }
5107
43.6k
  for (i = 0;;i++) {
5108
43.6k
      if (xmlW3CPIs[i] == NULL) break;
5109
29.0k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5110
0
          return(name);
5111
29.0k
  }
5112
14.5k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5113
14.5k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5114
14.5k
          NULL, NULL);
5115
14.5k
    }
5116
770k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5117
25.8k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5118
25.8k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5119
25.8k
    }
5120
770k
    return(name);
5121
841k
}
5122
5123
#ifdef LIBXML_CATALOG_ENABLED
5124
/**
5125
 * xmlParseCatalogPI:
5126
 * @ctxt:  an XML parser context
5127
 * @catalog:  the PI value string
5128
 *
5129
 * parse an XML Catalog Processing Instruction.
5130
 *
5131
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5132
 *
5133
 * Occurs only if allowed by the user and if happening in the Misc
5134
 * part of the document before any doctype information
5135
 * This will add the given catalog to the parsing context in order
5136
 * to be used if there is a resolution need further down in the document
5137
 */
5138
5139
static void
5140
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5141
0
    xmlChar *URL = NULL;
5142
0
    const xmlChar *tmp, *base;
5143
0
    xmlChar marker;
5144
5145
0
    tmp = catalog;
5146
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5147
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5148
0
  goto error;
5149
0
    tmp += 7;
5150
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5151
0
    if (*tmp != '=') {
5152
0
  return;
5153
0
    }
5154
0
    tmp++;
5155
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5156
0
    marker = *tmp;
5157
0
    if ((marker != '\'') && (marker != '"'))
5158
0
  goto error;
5159
0
    tmp++;
5160
0
    base = tmp;
5161
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5162
0
    if (*tmp == 0)
5163
0
  goto error;
5164
0
    URL = xmlStrndup(base, tmp - base);
5165
0
    tmp++;
5166
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5167
0
    if (*tmp != 0)
5168
0
  goto error;
5169
5170
0
    if (URL != NULL) {
5171
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5172
0
  xmlFree(URL);
5173
0
    }
5174
0
    return;
5175
5176
0
error:
5177
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5178
0
            "Catalog PI syntax error: %s\n",
5179
0
      catalog, NULL);
5180
0
    if (URL != NULL)
5181
0
  xmlFree(URL);
5182
0
}
5183
#endif
5184
5185
/**
5186
 * xmlParsePI:
5187
 * @ctxt:  an XML parser context
5188
 *
5189
 * DEPRECATED: Internal function, don't use.
5190
 *
5191
 * parse an XML Processing Instruction.
5192
 *
5193
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5194
 *
5195
 * The processing is transferred to SAX once parsed.
5196
 */
5197
5198
void
5199
841k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5200
841k
    xmlChar *buf = NULL;
5201
841k
    size_t len = 0;
5202
841k
    size_t size = XML_PARSER_BUFFER_SIZE;
5203
841k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5204
755k
                       XML_MAX_HUGE_LENGTH :
5205
841k
                       XML_MAX_TEXT_LENGTH;
5206
841k
    int cur, l;
5207
841k
    const xmlChar *target;
5208
841k
    xmlParserInputState state;
5209
841k
    int count = 0;
5210
5211
841k
    if ((RAW == '<') && (NXT(1) == '?')) {
5212
841k
  int inputid = ctxt->input->id;
5213
841k
  state = ctxt->instate;
5214
841k
        ctxt->instate = XML_PARSER_PI;
5215
  /*
5216
   * this is a Processing Instruction.
5217
   */
5218
841k
  SKIP(2);
5219
841k
  SHRINK;
5220
5221
  /*
5222
   * Parse the target name and check for special support like
5223
   * namespace.
5224
   */
5225
841k
        target = xmlParsePITarget(ctxt);
5226
841k
  if (target != NULL) {
5227
786k
      if ((RAW == '?') && (NXT(1) == '>')) {
5228
199k
    if (inputid != ctxt->input->id) {
5229
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5230
0
                             "PI declaration doesn't start and stop in"
5231
0
                                   " the same entity\n");
5232
0
    }
5233
199k
    SKIP(2);
5234
5235
    /*
5236
     * SAX: PI detected.
5237
     */
5238
199k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5239
199k
        (ctxt->sax->processingInstruction != NULL))
5240
50.5k
        ctxt->sax->processingInstruction(ctxt->userData,
5241
50.5k
                                         target, NULL);
5242
199k
    if (ctxt->instate != XML_PARSER_EOF)
5243
199k
        ctxt->instate = state;
5244
199k
    return;
5245
199k
      }
5246
587k
      buf = (xmlChar *) xmlMallocAtomic(size);
5247
587k
      if (buf == NULL) {
5248
0
    xmlErrMemory(ctxt, NULL);
5249
0
    ctxt->instate = state;
5250
0
    return;
5251
0
      }
5252
587k
      if (SKIP_BLANKS == 0) {
5253
269k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5254
269k
        "ParsePI: PI %s space expected\n", target);
5255
269k
      }
5256
587k
      cur = CUR_CHAR(l);
5257
98.8M
      while (IS_CHAR(cur) && /* checked */
5258
98.8M
       ((cur != '?') || (NXT(1) != '>'))) {
5259
98.2M
    if (len + 5 >= size) {
5260
367k
        xmlChar *tmp;
5261
367k
                    size_t new_size = size * 2;
5262
367k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5263
367k
        if (tmp == NULL) {
5264
0
      xmlErrMemory(ctxt, NULL);
5265
0
      xmlFree(buf);
5266
0
      ctxt->instate = state;
5267
0
      return;
5268
0
        }
5269
367k
        buf = tmp;
5270
367k
                    size = new_size;
5271
367k
    }
5272
98.2M
    count++;
5273
98.2M
    if (count > 50) {
5274
1.71M
        SHRINK;
5275
1.71M
        GROW;
5276
1.71M
                    if (ctxt->instate == XML_PARSER_EOF) {
5277
0
                        xmlFree(buf);
5278
0
                        return;
5279
0
                    }
5280
1.71M
        count = 0;
5281
1.71M
    }
5282
98.2M
    COPY_BUF(l,buf,len,cur);
5283
98.2M
    NEXTL(l);
5284
98.2M
    cur = CUR_CHAR(l);
5285
98.2M
    if (cur == 0) {
5286
133k
        SHRINK;
5287
133k
        GROW;
5288
133k
        cur = CUR_CHAR(l);
5289
133k
    }
5290
98.2M
                if (len > maxLength) {
5291
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5292
0
                                      "PI %s too big found", target);
5293
0
                    xmlFree(buf);
5294
0
                    ctxt->instate = state;
5295
0
                    return;
5296
0
                }
5297
98.2M
      }
5298
587k
      buf[len] = 0;
5299
587k
      if (cur != '?') {
5300
224k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5301
224k
          "ParsePI: PI %s never end ...\n", target);
5302
362k
      } else {
5303
362k
    if (inputid != ctxt->input->id) {
5304
63.9k
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5305
63.9k
                             "PI declaration doesn't start and stop in"
5306
63.9k
                                   " the same entity\n");
5307
63.9k
    }
5308
362k
    SKIP(2);
5309
5310
362k
#ifdef LIBXML_CATALOG_ENABLED
5311
362k
    if (((state == XML_PARSER_MISC) ||
5312
362k
               (state == XML_PARSER_START)) &&
5313
362k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5314
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5315
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5316
0
      (allow == XML_CATA_ALLOW_ALL))
5317
0
      xmlParseCatalogPI(ctxt, buf);
5318
0
    }
5319
362k
#endif
5320
5321
5322
    /*
5323
     * SAX: PI detected.
5324
     */
5325
362k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5326
362k
        (ctxt->sax->processingInstruction != NULL))
5327
130k
        ctxt->sax->processingInstruction(ctxt->userData,
5328
130k
                                         target, buf);
5329
362k
      }
5330
587k
      xmlFree(buf);
5331
587k
  } else {
5332
54.8k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5333
54.8k
  }
5334
642k
  if (ctxt->instate != XML_PARSER_EOF)
5335
642k
      ctxt->instate = state;
5336
642k
    }
5337
841k
}
5338
5339
/**
5340
 * xmlParseNotationDecl:
5341
 * @ctxt:  an XML parser context
5342
 *
5343
 * DEPRECATED: Internal function, don't use.
5344
 *
5345
 * parse a notation declaration
5346
 *
5347
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5348
 *
5349
 * Hence there is actually 3 choices:
5350
 *     'PUBLIC' S PubidLiteral
5351
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5352
 * and 'SYSTEM' S SystemLiteral
5353
 *
5354
 * See the NOTE on xmlParseExternalID().
5355
 */
5356
5357
void
5358
23.0k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5359
23.0k
    const xmlChar *name;
5360
23.0k
    xmlChar *Pubid;
5361
23.0k
    xmlChar *Systemid;
5362
5363
23.0k
    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5364
19.1k
  int inputid = ctxt->input->id;
5365
19.1k
  SHRINK;
5366
19.1k
  SKIP(10);
5367
19.1k
  if (SKIP_BLANKS == 0) {
5368
529
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5369
529
         "Space required after '<!NOTATION'\n");
5370
529
      return;
5371
529
  }
5372
5373
18.5k
        name = xmlParseName(ctxt);
5374
18.5k
  if (name == NULL) {
5375
1.95k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5376
1.95k
      return;
5377
1.95k
  }
5378
16.6k
  if (xmlStrchr(name, ':') != NULL) {
5379
1.06k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5380
1.06k
         "colons are forbidden from notation names '%s'\n",
5381
1.06k
         name, NULL, NULL);
5382
1.06k
  }
5383
16.6k
  if (SKIP_BLANKS == 0) {
5384
685
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385
685
         "Space required after the NOTATION name'\n");
5386
685
      return;
5387
685
  }
5388
5389
  /*
5390
   * Parse the IDs.
5391
   */
5392
15.9k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5393
15.9k
  SKIP_BLANKS;
5394
5395
15.9k
  if (RAW == '>') {
5396
10.7k
      if (inputid != ctxt->input->id) {
5397
45
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5398
45
                         "Notation declaration doesn't start and stop"
5399
45
                               " in the same entity\n");
5400
45
      }
5401
10.7k
      NEXT;
5402
10.7k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5403
10.7k
    (ctxt->sax->notationDecl != NULL))
5404
8.95k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5405
10.7k
  } else {
5406
5.22k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5407
5.22k
  }
5408
15.9k
  if (Systemid != NULL) xmlFree(Systemid);
5409
15.9k
  if (Pubid != NULL) xmlFree(Pubid);
5410
15.9k
    }
5411
23.0k
}
5412
5413
/**
5414
 * xmlParseEntityDecl:
5415
 * @ctxt:  an XML parser context
5416
 *
5417
 * DEPRECATED: Internal function, don't use.
5418
 *
5419
 * parse <!ENTITY declarations
5420
 *
5421
 * [70] EntityDecl ::= GEDecl | PEDecl
5422
 *
5423
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5424
 *
5425
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5426
 *
5427
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5428
 *
5429
 * [74] PEDef ::= EntityValue | ExternalID
5430
 *
5431
 * [76] NDataDecl ::= S 'NDATA' S Name
5432
 *
5433
 * [ VC: Notation Declared ]
5434
 * The Name must match the declared name of a notation.
5435
 */
5436
5437
void
5438
5.02M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5439
5.02M
    const xmlChar *name = NULL;
5440
5.02M
    xmlChar *value = NULL;
5441
5.02M
    xmlChar *URI = NULL, *literal = NULL;
5442
5.02M
    const xmlChar *ndata = NULL;
5443
5.02M
    int isParameter = 0;
5444
5.02M
    xmlChar *orig = NULL;
5445
5446
    /* GROW; done in the caller */
5447
5.02M
    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5448
5.01M
  int inputid = ctxt->input->id;
5449
5.01M
  SHRINK;
5450
5.01M
  SKIP(8);
5451
5.01M
  if (SKIP_BLANKS == 0) {
5452
2.96k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5453
2.96k
         "Space required after '<!ENTITY'\n");
5454
2.96k
  }
5455
5456
5.01M
  if (RAW == '%') {
5457
4.72M
      NEXT;
5458
4.72M
      if (SKIP_BLANKS == 0) {
5459
866
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
866
             "Space required after '%%'\n");
5461
866
      }
5462
4.72M
      isParameter = 1;
5463
4.72M
  }
5464
5465
5.01M
        name = xmlParseName(ctxt);
5466
5.01M
  if (name == NULL) {
5467
6.33k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5468
6.33k
                     "xmlParseEntityDecl: no name\n");
5469
6.33k
            return;
5470
6.33k
  }
5471
5.01M
  if (xmlStrchr(name, ':') != NULL) {
5472
18.8k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5473
18.8k
         "colons are forbidden from entities names '%s'\n",
5474
18.8k
         name, NULL, NULL);
5475
18.8k
  }
5476
5.01M
  if (SKIP_BLANKS == 0) {
5477
83.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478
83.7k
         "Space required after the entity name\n");
5479
83.7k
  }
5480
5481
5.01M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5482
  /*
5483
   * handle the various case of definitions...
5484
   */
5485
5.01M
  if (isParameter) {
5486
4.72M
      if ((RAW == '"') || (RAW == '\'')) {
5487
4.38M
          value = xmlParseEntityValue(ctxt, &orig);
5488
4.38M
    if (value) {
5489
4.15M
        if ((ctxt->sax != NULL) &&
5490
4.15M
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5491
2.36M
      ctxt->sax->entityDecl(ctxt->userData, name,
5492
2.36M
                        XML_INTERNAL_PARAMETER_ENTITY,
5493
2.36M
            NULL, NULL, value);
5494
4.15M
    }
5495
4.38M
      } else {
5496
342k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5497
342k
    if ((URI == NULL) && (literal == NULL)) {
5498
65.4k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5499
65.4k
    }
5500
342k
    if (URI) {
5501
277k
        xmlURIPtr uri;
5502
5503
277k
        uri = xmlParseURI((const char *) URI);
5504
277k
        if (uri == NULL) {
5505
11.0k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5506
11.0k
             "Invalid URI: %s\n", URI);
5507
      /*
5508
       * This really ought to be a well formedness error
5509
       * but the XML Core WG decided otherwise c.f. issue
5510
       * E26 of the XML erratas.
5511
       */
5512
266k
        } else {
5513
266k
      if (uri->fragment != NULL) {
5514
          /*
5515
           * Okay this is foolish to block those but not
5516
           * invalid URIs.
5517
           */
5518
159
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5519
265k
      } else {
5520
265k
          if ((ctxt->sax != NULL) &&
5521
265k
        (!ctxt->disableSAX) &&
5522
265k
        (ctxt->sax->entityDecl != NULL))
5523
149k
        ctxt->sax->entityDecl(ctxt->userData, name,
5524
149k
              XML_EXTERNAL_PARAMETER_ENTITY,
5525
149k
              literal, URI, NULL);
5526
265k
      }
5527
266k
      xmlFreeURI(uri);
5528
266k
        }
5529
277k
    }
5530
342k
      }
5531
4.72M
  } else {
5532
285k
      if ((RAW == '"') || (RAW == '\'')) {
5533
226k
          value = xmlParseEntityValue(ctxt, &orig);
5534
226k
    if ((ctxt->sax != NULL) &&
5535
226k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5536
187k
        ctxt->sax->entityDecl(ctxt->userData, name,
5537
187k
        XML_INTERNAL_GENERAL_ENTITY,
5538
187k
        NULL, NULL, value);
5539
    /*
5540
     * For expat compatibility in SAX mode.
5541
     */
5542
226k
    if ((ctxt->myDoc == NULL) ||
5543
226k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5544
9.57k
        if (ctxt->myDoc == NULL) {
5545
1.23k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5546
1.23k
      if (ctxt->myDoc == NULL) {
5547
0
          xmlErrMemory(ctxt, "New Doc failed");
5548
0
          return;
5549
0
      }
5550
1.23k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5551
1.23k
        }
5552
9.57k
        if (ctxt->myDoc->intSubset == NULL)
5553
1.23k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5554
1.23k
              BAD_CAST "fake", NULL, NULL);
5555
5556
9.57k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5557
9.57k
                    NULL, NULL, value);
5558
9.57k
    }
5559
226k
      } else {
5560
58.2k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5561
58.2k
    if ((URI == NULL) && (literal == NULL)) {
5562
4.33k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5563
4.33k
    }
5564
58.2k
    if (URI) {
5565
52.1k
        xmlURIPtr uri;
5566
5567
52.1k
        uri = xmlParseURI((const char *)URI);
5568
52.1k
        if (uri == NULL) {
5569
3.09k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5570
3.09k
             "Invalid URI: %s\n", URI);
5571
      /*
5572
       * This really ought to be a well formedness error
5573
       * but the XML Core WG decided otherwise c.f. issue
5574
       * E26 of the XML erratas.
5575
       */
5576
49.0k
        } else {
5577
49.0k
      if (uri->fragment != NULL) {
5578
          /*
5579
           * Okay this is foolish to block those but not
5580
           * invalid URIs.
5581
           */
5582
919
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5583
919
      }
5584
49.0k
      xmlFreeURI(uri);
5585
49.0k
        }
5586
52.1k
    }
5587
58.2k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5588
4.11k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5589
4.11k
           "Space required before 'NDATA'\n");
5590
4.11k
    }
5591
58.2k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5592
8.67k
        SKIP(5);
5593
8.67k
        if (SKIP_BLANKS == 0) {
5594
805
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5595
805
               "Space required after 'NDATA'\n");
5596
805
        }
5597
8.67k
        ndata = xmlParseName(ctxt);
5598
8.67k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5599
8.67k
            (ctxt->sax->unparsedEntityDecl != NULL))
5600
7.64k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5601
7.64k
            literal, URI, ndata);
5602
49.5k
    } else {
5603
49.5k
        if ((ctxt->sax != NULL) &&
5604
49.5k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5605
32.4k
      ctxt->sax->entityDecl(ctxt->userData, name,
5606
32.4k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5607
32.4k
            literal, URI, NULL);
5608
        /*
5609
         * For expat compatibility in SAX mode.
5610
         * assuming the entity replacement was asked for
5611
         */
5612
49.5k
        if ((ctxt->replaceEntities != 0) &&
5613
49.5k
      ((ctxt->myDoc == NULL) ||
5614
25.9k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5615
928
      if (ctxt->myDoc == NULL) {
5616
291
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5617
291
          if (ctxt->myDoc == NULL) {
5618
0
              xmlErrMemory(ctxt, "New Doc failed");
5619
0
        return;
5620
0
          }
5621
291
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5622
291
      }
5623
5624
928
      if (ctxt->myDoc->intSubset == NULL)
5625
291
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5626
291
            BAD_CAST "fake", NULL, NULL);
5627
928
      xmlSAX2EntityDecl(ctxt, name,
5628
928
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5629
928
                  literal, URI, NULL);
5630
928
        }
5631
49.5k
    }
5632
58.2k
      }
5633
285k
  }
5634
5.01M
  if (ctxt->instate == XML_PARSER_EOF)
5635
0
      goto done;
5636
5.01M
  SKIP_BLANKS;
5637
5.01M
  if (RAW != '>') {
5638
10.1k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5639
10.1k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5640
10.1k
      xmlHaltParser(ctxt);
5641
5.00M
  } else {
5642
5.00M
      if (inputid != ctxt->input->id) {
5643
97
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5644
97
                         "Entity declaration doesn't start and stop in"
5645
97
                               " the same entity\n");
5646
97
      }
5647
5.00M
      NEXT;
5648
5.00M
  }
5649
5.01M
  if (orig != NULL) {
5650
      /*
5651
       * Ugly mechanism to save the raw entity value.
5652
       */
5653
4.38M
      xmlEntityPtr cur = NULL;
5654
5655
4.38M
      if (isParameter) {
5656
4.16M
          if ((ctxt->sax != NULL) &&
5657
4.16M
        (ctxt->sax->getParameterEntity != NULL))
5658
4.16M
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5659
4.16M
      } else {
5660
215k
          if ((ctxt->sax != NULL) &&
5661
215k
        (ctxt->sax->getEntity != NULL))
5662
215k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5663
215k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5664
24.8k
        cur = xmlSAX2GetEntity(ctxt, name);
5665
24.8k
    }
5666
215k
      }
5667
4.38M
            if ((cur != NULL) && (cur->orig == NULL)) {
5668
312k
    cur->orig = orig;
5669
312k
                orig = NULL;
5670
312k
      }
5671
4.38M
  }
5672
5673
5.01M
done:
5674
5.01M
  if (value != NULL) xmlFree(value);
5675
5.01M
  if (URI != NULL) xmlFree(URI);
5676
5.01M
  if (literal != NULL) xmlFree(literal);
5677
5.01M
        if (orig != NULL) xmlFree(orig);
5678
5.01M
    }
5679
5.02M
}
5680
5681
/**
5682
 * xmlParseDefaultDecl:
5683
 * @ctxt:  an XML parser context
5684
 * @value:  Receive a possible fixed default value for the attribute
5685
 *
5686
 * DEPRECATED: Internal function, don't use.
5687
 *
5688
 * Parse an attribute default declaration
5689
 *
5690
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5691
 *
5692
 * [ VC: Required Attribute ]
5693
 * if the default declaration is the keyword #REQUIRED, then the
5694
 * attribute must be specified for all elements of the type in the
5695
 * attribute-list declaration.
5696
 *
5697
 * [ VC: Attribute Default Legal ]
5698
 * The declared default value must meet the lexical constraints of
5699
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5700
 *
5701
 * [ VC: Fixed Attribute Default ]
5702
 * if an attribute has a default value declared with the #FIXED
5703
 * keyword, instances of that attribute must match the default value.
5704
 *
5705
 * [ WFC: No < in Attribute Values ]
5706
 * handled in xmlParseAttValue()
5707
 *
5708
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5709
 *          or XML_ATTRIBUTE_FIXED.
5710
 */
5711
5712
int
5713
7.38M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5714
7.38M
    int val;
5715
7.38M
    xmlChar *ret;
5716
5717
7.38M
    *value = NULL;
5718
7.38M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5719
403k
  SKIP(9);
5720
403k
  return(XML_ATTRIBUTE_REQUIRED);
5721
403k
    }
5722
6.98M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5723
5.58M
  SKIP(8);
5724
5.58M
  return(XML_ATTRIBUTE_IMPLIED);
5725
5.58M
    }
5726
1.39M
    val = XML_ATTRIBUTE_NONE;
5727
1.39M
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5728
912k
  SKIP(6);
5729
912k
  val = XML_ATTRIBUTE_FIXED;
5730
912k
  if (SKIP_BLANKS == 0) {
5731
450
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
450
         "Space required after '#FIXED'\n");
5733
450
  }
5734
912k
    }
5735
1.39M
    ret = xmlParseAttValue(ctxt);
5736
1.39M
    ctxt->instate = XML_PARSER_DTD;
5737
1.39M
    if (ret == NULL) {
5738
79.4k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5739
79.4k
           "Attribute default value declaration error\n");
5740
79.4k
    } else
5741
1.31M
        *value = ret;
5742
1.39M
    return(val);
5743
6.98M
}
5744
5745
/**
5746
 * xmlParseNotationType:
5747
 * @ctxt:  an XML parser context
5748
 *
5749
 * DEPRECATED: Internal function, don't use.
5750
 *
5751
 * parse an Notation attribute type.
5752
 *
5753
 * Note: the leading 'NOTATION' S part has already being parsed...
5754
 *
5755
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5756
 *
5757
 * [ VC: Notation Attributes ]
5758
 * Values of this type must match one of the notation names included
5759
 * in the declaration; all notation names in the declaration must be declared.
5760
 *
5761
 * Returns: the notation attribute tree built while parsing
5762
 */
5763
5764
xmlEnumerationPtr
5765
3.59k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5766
3.59k
    const xmlChar *name;
5767
3.59k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5768
5769
3.59k
    if (RAW != '(') {
5770
340
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5771
340
  return(NULL);
5772
340
    }
5773
3.25k
    SHRINK;
5774
9.00k
    do {
5775
9.00k
        NEXT;
5776
9.00k
  SKIP_BLANKS;
5777
9.00k
        name = xmlParseName(ctxt);
5778
9.00k
  if (name == NULL) {
5779
381
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5780
381
         "Name expected in NOTATION declaration\n");
5781
381
            xmlFreeEnumeration(ret);
5782
381
      return(NULL);
5783
381
  }
5784
8.62k
  tmp = ret;
5785
31.6k
  while (tmp != NULL) {
5786
24.3k
      if (xmlStrEqual(name, tmp->name)) {
5787
1.24k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5788
1.24k
    "standalone: attribute notation value token %s duplicated\n",
5789
1.24k
         name, NULL);
5790
1.24k
    if (!xmlDictOwns(ctxt->dict, name))
5791
0
        xmlFree((xmlChar *) name);
5792
1.24k
    break;
5793
1.24k
      }
5794
23.0k
      tmp = tmp->next;
5795
23.0k
  }
5796
8.62k
  if (tmp == NULL) {
5797
7.38k
      cur = xmlCreateEnumeration(name);
5798
7.38k
      if (cur == NULL) {
5799
0
                xmlFreeEnumeration(ret);
5800
0
                return(NULL);
5801
0
            }
5802
7.38k
      if (last == NULL) ret = last = cur;
5803
4.37k
      else {
5804
4.37k
    last->next = cur;
5805
4.37k
    last = cur;
5806
4.37k
      }
5807
7.38k
  }
5808
8.62k
  SKIP_BLANKS;
5809
8.62k
    } while (RAW == '|');
5810
2.87k
    if (RAW != ')') {
5811
676
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5812
676
        xmlFreeEnumeration(ret);
5813
676
  return(NULL);
5814
676
    }
5815
2.19k
    NEXT;
5816
2.19k
    return(ret);
5817
2.87k
}
5818
5819
/**
5820
 * xmlParseEnumerationType:
5821
 * @ctxt:  an XML parser context
5822
 *
5823
 * DEPRECATED: Internal function, don't use.
5824
 *
5825
 * parse an Enumeration attribute type.
5826
 *
5827
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5828
 *
5829
 * [ VC: Enumeration ]
5830
 * Values of this type must match one of the Nmtoken tokens in
5831
 * the declaration
5832
 *
5833
 * Returns: the enumeration attribute tree built while parsing
5834
 */
5835
5836
xmlEnumerationPtr
5837
534k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5838
534k
    xmlChar *name;
5839
534k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5840
5841
534k
    if (RAW != '(') {
5842
34.9k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5843
34.9k
  return(NULL);
5844
34.9k
    }
5845
499k
    SHRINK;
5846
2.08M
    do {
5847
2.08M
        NEXT;
5848
2.08M
  SKIP_BLANKS;
5849
2.08M
        name = xmlParseNmtoken(ctxt);
5850
2.08M
  if (name == NULL) {
5851
3.07k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5852
3.07k
      return(ret);
5853
3.07k
  }
5854
2.07M
  tmp = ret;
5855
6.61M
  while (tmp != NULL) {
5856
4.54M
      if (xmlStrEqual(name, tmp->name)) {
5857
971
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5858
971
    "standalone: attribute enumeration value token %s duplicated\n",
5859
971
         name, NULL);
5860
971
    if (!xmlDictOwns(ctxt->dict, name))
5861
971
        xmlFree(name);
5862
971
    break;
5863
971
      }
5864
4.53M
      tmp = tmp->next;
5865
4.53M
  }
5866
2.07M
  if (tmp == NULL) {
5867
2.07M
      cur = xmlCreateEnumeration(name);
5868
2.07M
      if (!xmlDictOwns(ctxt->dict, name))
5869
2.07M
    xmlFree(name);
5870
2.07M
      if (cur == NULL) {
5871
0
                xmlFreeEnumeration(ret);
5872
0
                return(NULL);
5873
0
            }
5874
2.07M
      if (last == NULL) ret = last = cur;
5875
1.57M
      else {
5876
1.57M
    last->next = cur;
5877
1.57M
    last = cur;
5878
1.57M
      }
5879
2.07M
  }
5880
2.07M
  SKIP_BLANKS;
5881
2.07M
    } while (RAW == '|');
5882
496k
    if (RAW != ')') {
5883
74.0k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5884
74.0k
  return(ret);
5885
74.0k
    }
5886
422k
    NEXT;
5887
422k
    return(ret);
5888
496k
}
5889
5890
/**
5891
 * xmlParseEnumeratedType:
5892
 * @ctxt:  an XML parser context
5893
 * @tree:  the enumeration tree built while parsing
5894
 *
5895
 * DEPRECATED: Internal function, don't use.
5896
 *
5897
 * parse an Enumerated attribute type.
5898
 *
5899
 * [57] EnumeratedType ::= NotationType | Enumeration
5900
 *
5901
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5902
 *
5903
 *
5904
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5905
 */
5906
5907
int
5908
538k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5909
538k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5910
3.93k
  SKIP(8);
5911
3.93k
  if (SKIP_BLANKS == 0) {
5912
342
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5913
342
         "Space required after 'NOTATION'\n");
5914
342
      return(0);
5915
342
  }
5916
3.59k
  *tree = xmlParseNotationType(ctxt);
5917
3.59k
  if (*tree == NULL) return(0);
5918
2.19k
  return(XML_ATTRIBUTE_NOTATION);
5919
3.59k
    }
5920
534k
    *tree = xmlParseEnumerationType(ctxt);
5921
534k
    if (*tree == NULL) return(0);
5922
496k
    return(XML_ATTRIBUTE_ENUMERATION);
5923
534k
}
5924
5925
/**
5926
 * xmlParseAttributeType:
5927
 * @ctxt:  an XML parser context
5928
 * @tree:  the enumeration tree built while parsing
5929
 *
5930
 * DEPRECATED: Internal function, don't use.
5931
 *
5932
 * parse the Attribute list def for an element
5933
 *
5934
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5935
 *
5936
 * [55] StringType ::= 'CDATA'
5937
 *
5938
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5939
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5940
 *
5941
 * Validity constraints for attribute values syntax are checked in
5942
 * xmlValidateAttributeValue()
5943
 *
5944
 * [ VC: ID ]
5945
 * Values of type ID must match the Name production. A name must not
5946
 * appear more than once in an XML document as a value of this type;
5947
 * i.e., ID values must uniquely identify the elements which bear them.
5948
 *
5949
 * [ VC: One ID per Element Type ]
5950
 * No element type may have more than one ID attribute specified.
5951
 *
5952
 * [ VC: ID Attribute Default ]
5953
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5954
 *
5955
 * [ VC: IDREF ]
5956
 * Values of type IDREF must match the Name production, and values
5957
 * of type IDREFS must match Names; each IDREF Name must match the value
5958
 * of an ID attribute on some element in the XML document; i.e. IDREF
5959
 * values must match the value of some ID attribute.
5960
 *
5961
 * [ VC: Entity Name ]
5962
 * Values of type ENTITY must match the Name production, values
5963
 * of type ENTITIES must match Names; each Entity Name must match the
5964
 * name of an unparsed entity declared in the DTD.
5965
 *
5966
 * [ VC: Name Token ]
5967
 * Values of type NMTOKEN must match the Nmtoken production; values
5968
 * of type NMTOKENS must match Nmtokens.
5969
 *
5970
 * Returns the attribute type
5971
 */
5972
int
5973
7.50M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5974
7.50M
    SHRINK;
5975
7.50M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5976
3.80M
  SKIP(5);
5977
3.80M
  return(XML_ATTRIBUTE_CDATA);
5978
3.80M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5979
10.6k
  SKIP(6);
5980
10.6k
  return(XML_ATTRIBUTE_IDREFS);
5981
3.69M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5982
21.8k
  SKIP(5);
5983
21.8k
  return(XML_ATTRIBUTE_IDREF);
5984
3.66M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5985
2.01M
        SKIP(2);
5986
2.01M
  return(XML_ATTRIBUTE_ID);
5987
2.01M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5988
11.2k
  SKIP(6);
5989
11.2k
  return(XML_ATTRIBUTE_ENTITY);
5990
1.64M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5991
984
  SKIP(8);
5992
984
  return(XML_ATTRIBUTE_ENTITIES);
5993
1.63M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5994
373k
  SKIP(8);
5995
373k
  return(XML_ATTRIBUTE_NMTOKENS);
5996
1.26M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5997
727k
  SKIP(7);
5998
727k
  return(XML_ATTRIBUTE_NMTOKEN);
5999
727k
     }
6000
538k
     return(xmlParseEnumeratedType(ctxt, tree));
6001
7.50M
}
6002
6003
/**
6004
 * xmlParseAttributeListDecl:
6005
 * @ctxt:  an XML parser context
6006
 *
6007
 * DEPRECATED: Internal function, don't use.
6008
 *
6009
 * : parse the Attribute list def for an element
6010
 *
6011
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6012
 *
6013
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6014
 *
6015
 */
6016
void
6017
2.46M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6018
2.46M
    const xmlChar *elemName;
6019
2.46M
    const xmlChar *attrName;
6020
2.46M
    xmlEnumerationPtr tree;
6021
6022
2.46M
    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6023
2.46M
  int inputid = ctxt->input->id;
6024
6025
2.46M
  SKIP(9);
6026
2.46M
  if (SKIP_BLANKS == 0) {
6027
47.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6028
47.9k
                     "Space required after '<!ATTLIST'\n");
6029
47.9k
  }
6030
2.46M
        elemName = xmlParseName(ctxt);
6031
2.46M
  if (elemName == NULL) {
6032
4.11k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6033
4.11k
         "ATTLIST: no name for Element\n");
6034
4.11k
      return;
6035
4.11k
  }
6036
2.45M
  SKIP_BLANKS;
6037
2.45M
  GROW;
6038
9.75M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6039
7.63M
      int type;
6040
7.63M
      int def;
6041
7.63M
      xmlChar *defaultValue = NULL;
6042
6043
7.63M
      GROW;
6044
7.63M
            tree = NULL;
6045
7.63M
      attrName = xmlParseName(ctxt);
6046
7.63M
      if (attrName == NULL) {
6047
96.4k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6048
96.4k
             "ATTLIST: no name for Attribute\n");
6049
96.4k
    break;
6050
96.4k
      }
6051
7.53M
      GROW;
6052
7.53M
      if (SKIP_BLANKS == 0) {
6053
30.9k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6054
30.9k
            "Space required after the attribute name\n");
6055
30.9k
    break;
6056
30.9k
      }
6057
6058
7.50M
      type = xmlParseAttributeType(ctxt, &tree);
6059
7.50M
      if (type <= 0) {
6060
39.5k
          break;
6061
39.5k
      }
6062
6063
7.46M
      GROW;
6064
7.46M
      if (SKIP_BLANKS == 0) {
6065
76.6k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6066
76.6k
             "Space required after the attribute type\n");
6067
76.6k
          if (tree != NULL)
6068
74.4k
        xmlFreeEnumeration(tree);
6069
76.6k
    break;
6070
76.6k
      }
6071
6072
7.38M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6073
7.38M
      if (def <= 0) {
6074
0
                if (defaultValue != NULL)
6075
0
        xmlFree(defaultValue);
6076
0
          if (tree != NULL)
6077
0
        xmlFreeEnumeration(tree);
6078
0
          break;
6079
0
      }
6080
7.38M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6081
112k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6082
6083
7.38M
      GROW;
6084
7.38M
            if (RAW != '>') {
6085
7.20M
    if (SKIP_BLANKS == 0) {
6086
95.1k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6087
95.1k
      "Space required after the attribute default value\n");
6088
95.1k
        if (defaultValue != NULL)
6089
15.1k
      xmlFree(defaultValue);
6090
95.1k
        if (tree != NULL)
6091
4.46k
      xmlFreeEnumeration(tree);
6092
95.1k
        break;
6093
95.1k
    }
6094
7.20M
      }
6095
7.29M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6096
7.29M
    (ctxt->sax->attributeDecl != NULL))
6097
3.48M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6098
3.48M
                          type, def, defaultValue, tree);
6099
3.81M
      else if (tree != NULL)
6100
145k
    xmlFreeEnumeration(tree);
6101
6102
7.29M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6103
7.29M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6104
7.29M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6105
619k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6106
619k
      }
6107
7.29M
      if (ctxt->sax2) {
6108
3.68M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6109
3.68M
      }
6110
7.29M
      if (defaultValue != NULL)
6111
1.30M
          xmlFree(defaultValue);
6112
7.29M
      GROW;
6113
7.29M
  }
6114
2.45M
  if (RAW == '>') {
6115
2.13M
      if (inputid != ctxt->input->id) {
6116
295
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6117
295
                               "Attribute list declaration doesn't start and"
6118
295
                               " stop in the same entity\n");
6119
295
      }
6120
2.13M
      NEXT;
6121
2.13M
  }
6122
2.45M
    }
6123
2.46M
}
6124
6125
/**
6126
 * xmlParseElementMixedContentDecl:
6127
 * @ctxt:  an XML parser context
6128
 * @inputchk:  the input used for the current entity, needed for boundary checks
6129
 *
6130
 * DEPRECATED: Internal function, don't use.
6131
 *
6132
 * parse the declaration for a Mixed Element content
6133
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6134
 *
6135
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6136
 *                '(' S? '#PCDATA' S? ')'
6137
 *
6138
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6139
 *
6140
 * [ VC: No Duplicate Types ]
6141
 * The same name must not appear more than once in a single
6142
 * mixed-content declaration.
6143
 *
6144
 * returns: the list of the xmlElementContentPtr describing the element choices
6145
 */
6146
xmlElementContentPtr
6147
501k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6148
501k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6149
501k
    const xmlChar *elem = NULL;
6150
6151
501k
    GROW;
6152
501k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6153
501k
  SKIP(7);
6154
501k
  SKIP_BLANKS;
6155
501k
  SHRINK;
6156
501k
  if (RAW == ')') {
6157
351k
      if (ctxt->input->id != inputchk) {
6158
71
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159
71
                               "Element content declaration doesn't start and"
6160
71
                               " stop in the same entity\n");
6161
71
      }
6162
351k
      NEXT;
6163
351k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6164
351k
      if (ret == NULL)
6165
0
          return(NULL);
6166
351k
      if (RAW == '*') {
6167
211
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168
211
    NEXT;
6169
211
      }
6170
351k
      return(ret);
6171
351k
  }
6172
149k
  if ((RAW == '(') || (RAW == '|')) {
6173
148k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6174
148k
      if (ret == NULL) return(NULL);
6175
148k
  }
6176
1.41M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6177
1.27M
      NEXT;
6178
1.27M
      if (elem == NULL) {
6179
148k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6180
148k
    if (ret == NULL) {
6181
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6182
0
                    return(NULL);
6183
0
                }
6184
148k
    ret->c1 = cur;
6185
148k
    if (cur != NULL)
6186
148k
        cur->parent = ret;
6187
148k
    cur = ret;
6188
1.12M
      } else {
6189
1.12M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6190
1.12M
    if (n == NULL) {
6191
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6192
0
                    return(NULL);
6193
0
                }
6194
1.12M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6195
1.12M
    if (n->c1 != NULL)
6196
1.12M
        n->c1->parent = n;
6197
1.12M
          cur->c2 = n;
6198
1.12M
    if (n != NULL)
6199
1.12M
        n->parent = cur;
6200
1.12M
    cur = n;
6201
1.12M
      }
6202
1.27M
      SKIP_BLANKS;
6203
1.27M
      elem = xmlParseName(ctxt);
6204
1.27M
      if (elem == NULL) {
6205
8.32k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6206
8.32k
      "xmlParseElementMixedContentDecl : Name expected\n");
6207
8.32k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6208
8.32k
    return(NULL);
6209
8.32k
      }
6210
1.26M
      SKIP_BLANKS;
6211
1.26M
      GROW;
6212
1.26M
  }
6213
141k
  if ((RAW == ')') && (NXT(1) == '*')) {
6214
138k
      if (elem != NULL) {
6215
138k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6216
138k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6217
138k
    if (cur->c2 != NULL)
6218
138k
        cur->c2->parent = cur;
6219
138k
            }
6220
138k
            if (ret != NULL)
6221
138k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6222
138k
      if (ctxt->input->id != inputchk) {
6223
12
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6224
12
                               "Element content declaration doesn't start and"
6225
12
                               " stop in the same entity\n");
6226
12
      }
6227
138k
      SKIP(2);
6228
138k
  } else {
6229
2.77k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6230
2.77k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6231
2.77k
      return(NULL);
6232
2.77k
  }
6233
6234
141k
    } else {
6235
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6236
0
    }
6237
138k
    return(ret);
6238
501k
}
6239
6240
/**
6241
 * xmlParseElementChildrenContentDeclPriv:
6242
 * @ctxt:  an XML parser context
6243
 * @inputchk:  the input used for the current entity, needed for boundary checks
6244
 * @depth: the level of recursion
6245
 *
6246
 * parse the declaration for a Mixed Element content
6247
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6248
 *
6249
 *
6250
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6251
 *
6252
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6253
 *
6254
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6255
 *
6256
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6257
 *
6258
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6259
 * TODO Parameter-entity replacement text must be properly nested
6260
 *  with parenthesized groups. That is to say, if either of the
6261
 *  opening or closing parentheses in a choice, seq, or Mixed
6262
 *  construct is contained in the replacement text for a parameter
6263
 *  entity, both must be contained in the same replacement text. For
6264
 *  interoperability, if a parameter-entity reference appears in a
6265
 *  choice, seq, or Mixed construct, its replacement text should not
6266
 *  be empty, and neither the first nor last non-blank character of
6267
 *  the replacement text should be a connector (| or ,).
6268
 *
6269
 * Returns the tree of xmlElementContentPtr describing the element
6270
 *          hierarchy.
6271
 */
6272
static xmlElementContentPtr
6273
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6274
1.16M
                                       int depth) {
6275
1.16M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6276
1.16M
    const xmlChar *elem;
6277
1.16M
    xmlChar type = 0;
6278
6279
1.16M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6280
1.16M
        (depth >  2048)) {
6281
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6282
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6283
0
                          depth);
6284
0
  return(NULL);
6285
0
    }
6286
1.16M
    SKIP_BLANKS;
6287
1.16M
    GROW;
6288
1.16M
    if (RAW == '(') {
6289
24.4k
  int inputid = ctxt->input->id;
6290
6291
        /* Recurse on first child */
6292
24.4k
  NEXT;
6293
24.4k
  SKIP_BLANKS;
6294
24.4k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6295
24.4k
                                                           depth + 1);
6296
24.4k
        if (cur == NULL)
6297
3.45k
            return(NULL);
6298
20.9k
  SKIP_BLANKS;
6299
20.9k
  GROW;
6300
1.14M
    } else {
6301
1.14M
  elem = xmlParseName(ctxt);
6302
1.14M
  if (elem == NULL) {
6303
4.49k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6304
4.49k
      return(NULL);
6305
4.49k
  }
6306
1.13M
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6307
1.13M
  if (cur == NULL) {
6308
0
      xmlErrMemory(ctxt, NULL);
6309
0
      return(NULL);
6310
0
  }
6311
1.13M
  GROW;
6312
1.13M
  if (RAW == '?') {
6313
269k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6314
269k
      NEXT;
6315
868k
  } else if (RAW == '*') {
6316
220k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6317
220k
      NEXT;
6318
647k
  } else if (RAW == '+') {
6319
20.7k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6320
20.7k
      NEXT;
6321
626k
  } else {
6322
626k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6323
626k
  }
6324
1.13M
  GROW;
6325
1.13M
    }
6326
1.15M
    SKIP_BLANKS;
6327
1.15M
    SHRINK;
6328
4.14M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6329
        /*
6330
   * Each loop we parse one separator and one element.
6331
   */
6332
3.07M
        if (RAW == ',') {
6333
2.00M
      if (type == 0) type = CUR;
6334
6335
      /*
6336
       * Detect "Name | Name , Name" error
6337
       */
6338
1.19M
      else if (type != CUR) {
6339
367
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6340
367
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6341
367
                      type);
6342
367
    if ((last != NULL) && (last != ret))
6343
367
        xmlFreeDocElementContent(ctxt->myDoc, last);
6344
367
    if (ret != NULL)
6345
367
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6346
367
    return(NULL);
6347
367
      }
6348
2.00M
      NEXT;
6349
6350
2.00M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6351
2.00M
      if (op == NULL) {
6352
0
    if ((last != NULL) && (last != ret))
6353
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6354
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6355
0
    return(NULL);
6356
0
      }
6357
2.00M
      if (last == NULL) {
6358
811k
    op->c1 = ret;
6359
811k
    if (ret != NULL)
6360
811k
        ret->parent = op;
6361
811k
    ret = cur = op;
6362
1.19M
      } else {
6363
1.19M
          cur->c2 = op;
6364
1.19M
    if (op != NULL)
6365
1.19M
        op->parent = cur;
6366
1.19M
    op->c1 = last;
6367
1.19M
    if (last != NULL)
6368
1.19M
        last->parent = op;
6369
1.19M
    cur =op;
6370
1.19M
    last = NULL;
6371
1.19M
      }
6372
2.00M
  } else if (RAW == '|') {
6373
988k
      if (type == 0) type = CUR;
6374
6375
      /*
6376
       * Detect "Name , Name | Name" error
6377
       */
6378
869k
      else if (type != CUR) {
6379
363
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6380
363
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6381
363
          type);
6382
363
    if ((last != NULL) && (last != ret))
6383
363
        xmlFreeDocElementContent(ctxt->myDoc, last);
6384
363
    if (ret != NULL)
6385
363
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6386
363
    return(NULL);
6387
363
      }
6388
987k
      NEXT;
6389
6390
987k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6391
987k
      if (op == NULL) {
6392
0
    if ((last != NULL) && (last != ret))
6393
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6394
0
    if (ret != NULL)
6395
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6396
0
    return(NULL);
6397
0
      }
6398
987k
      if (last == NULL) {
6399
118k
    op->c1 = ret;
6400
118k
    if (ret != NULL)
6401
118k
        ret->parent = op;
6402
118k
    ret = cur = op;
6403
869k
      } else {
6404
869k
          cur->c2 = op;
6405
869k
    if (op != NULL)
6406
869k
        op->parent = cur;
6407
869k
    op->c1 = last;
6408
869k
    if (last != NULL)
6409
869k
        last->parent = op;
6410
869k
    cur =op;
6411
869k
    last = NULL;
6412
869k
      }
6413
987k
  } else {
6414
79.8k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6415
79.8k
      if ((last != NULL) && (last != ret))
6416
56.7k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6417
79.8k
      if (ret != NULL)
6418
79.8k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
79.8k
      return(NULL);
6420
79.8k
  }
6421
2.99M
  GROW;
6422
2.99M
  SKIP_BLANKS;
6423
2.99M
  GROW;
6424
2.99M
  if (RAW == '(') {
6425
36.5k
      int inputid = ctxt->input->id;
6426
      /* Recurse on second child */
6427
36.5k
      NEXT;
6428
36.5k
      SKIP_BLANKS;
6429
36.5k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6430
36.5k
                                                          depth + 1);
6431
36.5k
            if (last == NULL) {
6432
1.66k
    if (ret != NULL)
6433
1.66k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6434
1.66k
    return(NULL);
6435
1.66k
            }
6436
34.9k
      SKIP_BLANKS;
6437
2.96M
  } else {
6438
2.96M
      elem = xmlParseName(ctxt);
6439
2.96M
      if (elem == NULL) {
6440
13.8k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6441
13.8k
    if (ret != NULL)
6442
13.8k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
13.8k
    return(NULL);
6444
13.8k
      }
6445
2.94M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6446
2.94M
      if (last == NULL) {
6447
0
    if (ret != NULL)
6448
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6449
0
    return(NULL);
6450
0
      }
6451
2.94M
      if (RAW == '?') {
6452
762k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6453
762k
    NEXT;
6454
2.18M
      } else if (RAW == '*') {
6455
760k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6456
760k
    NEXT;
6457
1.42M
      } else if (RAW == '+') {
6458
21.4k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6459
21.4k
    NEXT;
6460
1.40M
      } else {
6461
1.40M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6462
1.40M
      }
6463
2.94M
  }
6464
2.98M
  SKIP_BLANKS;
6465
2.98M
  GROW;
6466
2.98M
    }
6467
1.06M
    if ((cur != NULL) && (last != NULL)) {
6468
856k
        cur->c2 = last;
6469
856k
  if (last != NULL)
6470
856k
      last->parent = cur;
6471
856k
    }
6472
1.06M
    if (ctxt->input->id != inputchk) {
6473
194
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6474
194
                       "Element content declaration doesn't start and stop in"
6475
194
                       " the same entity\n");
6476
194
    }
6477
1.06M
    NEXT;
6478
1.06M
    if (RAW == '?') {
6479
17.3k
  if (ret != NULL) {
6480
17.3k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6481
17.3k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6482
17
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6483
17.2k
      else
6484
17.2k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6485
17.3k
  }
6486
17.3k
  NEXT;
6487
1.04M
    } else if (RAW == '*') {
6488
155k
  if (ret != NULL) {
6489
155k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6490
155k
      cur = ret;
6491
      /*
6492
       * Some normalization:
6493
       * (a | b* | c?)* == (a | b | c)*
6494
       */
6495
924k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6496
769k
    if ((cur->c1 != NULL) &&
6497
769k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6498
769k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6499
18.0k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6500
769k
    if ((cur->c2 != NULL) &&
6501
769k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6502
769k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6503
2.77k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6504
769k
    cur = cur->c2;
6505
769k
      }
6506
155k
  }
6507
155k
  NEXT;
6508
889k
    } else if (RAW == '+') {
6509
31.1k
  if (ret != NULL) {
6510
31.1k
      int found = 0;
6511
6512
31.1k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6513
31.1k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6514
0
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6515
31.1k
      else
6516
31.1k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6517
      /*
6518
       * Some normalization:
6519
       * (a | b*)+ == (a | b)*
6520
       * (a | b?)+ == (a | b)*
6521
       */
6522
44.5k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6523
13.4k
    if ((cur->c1 != NULL) &&
6524
13.4k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6525
13.4k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6526
447
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6527
447
        found = 1;
6528
447
    }
6529
13.4k
    if ((cur->c2 != NULL) &&
6530
13.4k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6531
13.4k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6532
320
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6533
320
        found = 1;
6534
320
    }
6535
13.4k
    cur = cur->c2;
6536
13.4k
      }
6537
31.1k
      if (found)
6538
473
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6539
31.1k
  }
6540
31.1k
  NEXT;
6541
31.1k
    }
6542
1.06M
    return(ret);
6543
1.15M
}
6544
6545
/**
6546
 * xmlParseElementChildrenContentDecl:
6547
 * @ctxt:  an XML parser context
6548
 * @inputchk:  the input used for the current entity, needed for boundary checks
6549
 *
6550
 * DEPRECATED: Internal function, don't use.
6551
 *
6552
 * parse the declaration for a Mixed Element content
6553
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6554
 *
6555
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6556
 *
6557
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6558
 *
6559
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6560
 *
6561
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6562
 *
6563
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6564
 * TODO Parameter-entity replacement text must be properly nested
6565
 *  with parenthesized groups. That is to say, if either of the
6566
 *  opening or closing parentheses in a choice, seq, or Mixed
6567
 *  construct is contained in the replacement text for a parameter
6568
 *  entity, both must be contained in the same replacement text. For
6569
 *  interoperability, if a parameter-entity reference appears in a
6570
 *  choice, seq, or Mixed construct, its replacement text should not
6571
 *  be empty, and neither the first nor last non-blank character of
6572
 *  the replacement text should be a connector (| or ,).
6573
 *
6574
 * Returns the tree of xmlElementContentPtr describing the element
6575
 *          hierarchy.
6576
 */
6577
xmlElementContentPtr
6578
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6579
    /* stub left for API/ABI compat */
6580
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6581
0
}
6582
6583
/**
6584
 * xmlParseElementContentDecl:
6585
 * @ctxt:  an XML parser context
6586
 * @name:  the name of the element being defined.
6587
 * @result:  the Element Content pointer will be stored here if any
6588
 *
6589
 * DEPRECATED: Internal function, don't use.
6590
 *
6591
 * parse the declaration for an Element content either Mixed or Children,
6592
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6593
 *
6594
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6595
 *
6596
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6597
 */
6598
6599
int
6600
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6601
1.60M
                           xmlElementContentPtr *result) {
6602
6603
1.60M
    xmlElementContentPtr tree = NULL;
6604
1.60M
    int inputid = ctxt->input->id;
6605
1.60M
    int res;
6606
6607
1.60M
    *result = NULL;
6608
6609
1.60M
    if (RAW != '(') {
6610
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6611
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6612
0
  return(-1);
6613
0
    }
6614
1.60M
    NEXT;
6615
1.60M
    GROW;
6616
1.60M
    if (ctxt->instate == XML_PARSER_EOF)
6617
0
        return(-1);
6618
1.60M
    SKIP_BLANKS;
6619
1.60M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6620
501k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6621
501k
  res = XML_ELEMENT_TYPE_MIXED;
6622
1.10M
    } else {
6623
1.10M
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6624
1.10M
  res = XML_ELEMENT_TYPE_ELEMENT;
6625
1.10M
    }
6626
1.60M
    SKIP_BLANKS;
6627
1.60M
    *result = tree;
6628
1.60M
    return(res);
6629
1.60M
}
6630
6631
/**
6632
 * xmlParseElementDecl:
6633
 * @ctxt:  an XML parser context
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse an Element declaration.
6638
 *
6639
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6640
 *
6641
 * [ VC: Unique Element Type Declaration ]
6642
 * No element type may be declared more than once
6643
 *
6644
 * Returns the type of the element, or -1 in case of error
6645
 */
6646
int
6647
2.64M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6648
2.64M
    const xmlChar *name;
6649
2.64M
    int ret = -1;
6650
2.64M
    xmlElementContentPtr content  = NULL;
6651
6652
    /* GROW; done in the caller */
6653
2.64M
    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6654
2.64M
  int inputid = ctxt->input->id;
6655
6656
2.64M
  SKIP(9);
6657
2.64M
  if (SKIP_BLANKS == 0) {
6658
1.64k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6659
1.64k
               "Space required after 'ELEMENT'\n");
6660
1.64k
      return(-1);
6661
1.64k
  }
6662
2.64M
        name = xmlParseName(ctxt);
6663
2.64M
  if (name == NULL) {
6664
5.66k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6665
5.66k
         "xmlParseElementDecl: no name for Element\n");
6666
5.66k
      return(-1);
6667
5.66k
  }
6668
2.63M
  if (SKIP_BLANKS == 0) {
6669
3.72k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6670
3.72k
         "Space required after the element name\n");
6671
3.72k
  }
6672
2.63M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6673
1.00M
      SKIP(5);
6674
      /*
6675
       * Element must always be empty.
6676
       */
6677
1.00M
      ret = XML_ELEMENT_TYPE_EMPTY;
6678
1.63M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6679
1.63M
             (NXT(2) == 'Y')) {
6680
7.23k
      SKIP(3);
6681
      /*
6682
       * Element is a generic container.
6683
       */
6684
7.23k
      ret = XML_ELEMENT_TYPE_ANY;
6685
1.62M
  } else if (RAW == '(') {
6686
1.60M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6687
1.60M
  } else {
6688
      /*
6689
       * [ WFC: PEs in Internal Subset ] error handling.
6690
       */
6691
21.9k
      if ((RAW == '%') && (ctxt->external == 0) &&
6692
21.9k
          (ctxt->inputNr == 1)) {
6693
257
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6694
257
    "PEReference: forbidden within markup decl in internal subset\n");
6695
21.7k
      } else {
6696
21.7k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6697
21.7k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6698
21.7k
            }
6699
21.9k
      return(-1);
6700
21.9k
  }
6701
6702
2.61M
  SKIP_BLANKS;
6703
6704
2.61M
  if (RAW != '>') {
6705
117k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6706
117k
      if (content != NULL) {
6707
17.4k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6708
17.4k
      }
6709
2.49M
  } else {
6710
2.49M
      if (inputid != ctxt->input->id) {
6711
165
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
165
                               "Element declaration doesn't start and stop in"
6713
165
                               " the same entity\n");
6714
165
      }
6715
6716
2.49M
      NEXT;
6717
2.49M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6718
2.49M
    (ctxt->sax->elementDecl != NULL)) {
6719
1.20M
    if (content != NULL)
6720
831k
        content->parent = NULL;
6721
1.20M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6722
1.20M
                           content);
6723
1.20M
    if ((content != NULL) && (content->parent == NULL)) {
6724
        /*
6725
         * this is a trick: if xmlAddElementDecl is called,
6726
         * instead of copying the full tree it is plugged directly
6727
         * if called from the parser. Avoid duplicating the
6728
         * interfaces or change the API/ABI
6729
         */
6730
347k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6731
347k
    }
6732
1.28M
      } else if (content != NULL) {
6733
647k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6734
647k
      }
6735
2.49M
  }
6736
2.61M
    }
6737
2.62M
    return(ret);
6738
2.64M
}
6739
6740
/**
6741
 * xmlParseConditionalSections
6742
 * @ctxt:  an XML parser context
6743
 *
6744
 * [61] conditionalSect ::= includeSect | ignoreSect
6745
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6746
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6747
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6748
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6749
 */
6750
6751
static void
6752
6.41k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6753
6.41k
    int *inputIds = NULL;
6754
6.41k
    size_t inputIdsSize = 0;
6755
6.41k
    size_t depth = 0;
6756
6757
41.5k
    while (ctxt->instate != XML_PARSER_EOF) {
6758
41.4k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6759
19.8k
            int id = ctxt->input->id;
6760
6761
19.8k
            SKIP(3);
6762
19.8k
            SKIP_BLANKS;
6763
6764
19.8k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6765
15.9k
                SKIP(7);
6766
15.9k
                SKIP_BLANKS;
6767
15.9k
                if (RAW != '[') {
6768
51
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6769
51
                    xmlHaltParser(ctxt);
6770
51
                    goto error;
6771
51
                }
6772
15.9k
                if (ctxt->input->id != id) {
6773
48
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6774
48
                                   "All markup of the conditional section is"
6775
48
                                   " not in the same entity\n");
6776
48
                }
6777
15.9k
                NEXT;
6778
6779
15.9k
                if (inputIdsSize <= depth) {
6780
4.45k
                    int *tmp;
6781
6782
4.45k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6783
4.45k
                    tmp = (int *) xmlRealloc(inputIds,
6784
4.45k
                            inputIdsSize * sizeof(int));
6785
4.45k
                    if (tmp == NULL) {
6786
0
                        xmlErrMemory(ctxt, NULL);
6787
0
                        goto error;
6788
0
                    }
6789
4.45k
                    inputIds = tmp;
6790
4.45k
                }
6791
15.9k
                inputIds[depth] = id;
6792
15.9k
                depth++;
6793
15.9k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6794
3.14k
                int state;
6795
3.14k
                xmlParserInputState instate;
6796
3.14k
                size_t ignoreDepth = 0;
6797
6798
3.14k
                SKIP(6);
6799
3.14k
                SKIP_BLANKS;
6800
3.14k
                if (RAW != '[') {
6801
60
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6802
60
                    xmlHaltParser(ctxt);
6803
60
                    goto error;
6804
60
                }
6805
3.08k
                if (ctxt->input->id != id) {
6806
18
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6807
18
                                   "All markup of the conditional section is"
6808
18
                                   " not in the same entity\n");
6809
18
                }
6810
3.08k
                NEXT;
6811
6812
                /*
6813
                 * Parse up to the end of the conditional section but disable
6814
                 * SAX event generating DTD building in the meantime
6815
                 */
6816
3.08k
                state = ctxt->disableSAX;
6817
3.08k
                instate = ctxt->instate;
6818
3.08k
                if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6819
3.08k
                ctxt->instate = XML_PARSER_IGNORE;
6820
6821
1.86M
                while (RAW != 0) {
6822
1.86M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6823
7.78k
                        SKIP(3);
6824
7.78k
                        ignoreDepth++;
6825
                        /* Check for integer overflow */
6826
7.78k
                        if (ignoreDepth == 0) {
6827
0
                            xmlErrMemory(ctxt, NULL);
6828
0
                            goto error;
6829
0
                        }
6830
1.85M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6831
1.85M
                               (NXT(2) == '>')) {
6832
6.95k
                        if (ignoreDepth == 0)
6833
1.79k
                            break;
6834
5.16k
                        SKIP(3);
6835
5.16k
                        ignoreDepth--;
6836
1.85M
                    } else {
6837
1.85M
                        NEXT;
6838
1.85M
                    }
6839
1.86M
                }
6840
6841
3.08k
                ctxt->disableSAX = state;
6842
3.08k
                ctxt->instate = instate;
6843
6844
3.08k
    if (RAW == 0) {
6845
1.29k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6846
1.29k
                    goto error;
6847
1.29k
    }
6848
1.79k
                if (ctxt->input->id != id) {
6849
6
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6850
6
                                   "All markup of the conditional section is"
6851
6
                                   " not in the same entity\n");
6852
6
                }
6853
1.79k
                SKIP(3);
6854
1.79k
            } else {
6855
741
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6856
741
                xmlHaltParser(ctxt);
6857
741
                goto error;
6858
741
            }
6859
21.6k
        } else if ((depth > 0) &&
6860
21.6k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6861
8.98k
            depth--;
6862
8.98k
            if (ctxt->input->id != inputIds[depth]) {
6863
295
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6864
295
                               "All markup of the conditional section is not"
6865
295
                               " in the same entity\n");
6866
295
            }
6867
8.98k
            SKIP(3);
6868
12.6k
        } else {
6869
12.6k
            int id = ctxt->input->id;
6870
12.6k
            unsigned long cons = CUR_CONSUMED;
6871
6872
12.6k
            xmlParseMarkupDecl(ctxt);
6873
6874
12.6k
            if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6875
1.21k
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6876
1.21k
                xmlHaltParser(ctxt);
6877
1.21k
                goto error;
6878
1.21k
            }
6879
12.6k
        }
6880
6881
38.0k
        if (depth == 0)
6882
2.97k
            break;
6883
6884
35.1k
        SKIP_BLANKS;
6885
35.1k
        GROW;
6886
35.1k
    }
6887
6888
6.41k
error:
6889
6.41k
    xmlFree(inputIds);
6890
6.41k
}
6891
6892
/**
6893
 * xmlParseMarkupDecl:
6894
 * @ctxt:  an XML parser context
6895
 *
6896
 * DEPRECATED: Internal function, don't use.
6897
 *
6898
 * parse Markup declarations
6899
 *
6900
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6901
 *                     NotationDecl | PI | Comment
6902
 *
6903
 * [ VC: Proper Declaration/PE Nesting ]
6904
 * Parameter-entity replacement text must be properly nested with
6905
 * markup declarations. That is to say, if either the first character
6906
 * or the last character of a markup declaration (markupdecl above) is
6907
 * contained in the replacement text for a parameter-entity reference,
6908
 * both must be contained in the same replacement text.
6909
 *
6910
 * [ WFC: PEs in Internal Subset ]
6911
 * In the internal DTD subset, parameter-entity references can occur
6912
 * only where markup declarations can occur, not within markup declarations.
6913
 * (This does not apply to references that occur in external parameter
6914
 * entities or to the external subset.)
6915
 */
6916
void
6917
11.8M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6918
11.8M
    GROW;
6919
11.8M
    if (CUR == '<') {
6920
11.4M
        if (NXT(1) == '!') {
6921
11.2M
      switch (NXT(2)) {
6922
7.67M
          case 'E':
6923
7.67M
        if (NXT(3) == 'L')
6924
2.64M
      xmlParseElementDecl(ctxt);
6925
5.02M
        else if (NXT(3) == 'N')
6926
5.02M
      xmlParseEntityDecl(ctxt);
6927
7.67M
        break;
6928
2.46M
          case 'A':
6929
2.46M
        xmlParseAttributeListDecl(ctxt);
6930
2.46M
        break;
6931
23.0k
          case 'N':
6932
23.0k
        xmlParseNotationDecl(ctxt);
6933
23.0k
        break;
6934
1.07M
          case '-':
6935
1.07M
        xmlParseComment(ctxt);
6936
1.07M
        break;
6937
21.9k
    default:
6938
        /* there is an error but it will be detected later */
6939
21.9k
        break;
6940
11.2M
      }
6941
11.2M
  } else if (NXT(1) == '?') {
6942
185k
      xmlParsePI(ctxt);
6943
185k
  }
6944
11.4M
    }
6945
6946
    /*
6947
     * detect requirement to exit there and act accordingly
6948
     * and avoid having instate overridden later on
6949
     */
6950
11.8M
    if (ctxt->instate == XML_PARSER_EOF)
6951
10.1k
        return;
6952
6953
11.8M
    ctxt->instate = XML_PARSER_DTD;
6954
11.8M
}
6955
6956
/**
6957
 * xmlParseTextDecl:
6958
 * @ctxt:  an XML parser context
6959
 *
6960
 * DEPRECATED: Internal function, don't use.
6961
 *
6962
 * parse an XML declaration header for external entities
6963
 *
6964
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6965
 */
6966
6967
void
6968
178k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6969
178k
    xmlChar *version;
6970
178k
    const xmlChar *encoding;
6971
178k
    int oldstate;
6972
6973
    /*
6974
     * We know that '<?xml' is here.
6975
     */
6976
178k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6977
178k
  SKIP(5);
6978
178k
    } else {
6979
180
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6980
180
  return;
6981
180
    }
6982
6983
    /* Avoid expansion of parameter entities when skipping blanks. */
6984
178k
    oldstate = ctxt->instate;
6985
178k
    ctxt->instate = XML_PARSER_START;
6986
6987
178k
    if (SKIP_BLANKS == 0) {
6988
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6989
0
           "Space needed after '<?xml'\n");
6990
0
    }
6991
6992
    /*
6993
     * We may have the VersionInfo here.
6994
     */
6995
178k
    version = xmlParseVersionInfo(ctxt);
6996
178k
    if (version == NULL)
6997
78.9k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6998
99.3k
    else {
6999
99.3k
  if (SKIP_BLANKS == 0) {
7000
14.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7001
14.6k
               "Space needed here\n");
7002
14.6k
  }
7003
99.3k
    }
7004
178k
    ctxt->input->version = version;
7005
7006
    /*
7007
     * We must have the encoding declaration
7008
     */
7009
178k
    encoding = xmlParseEncodingDecl(ctxt);
7010
178k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7011
  /*
7012
   * The XML REC instructs us to stop parsing right here
7013
   */
7014
413
        ctxt->instate = oldstate;
7015
413
        return;
7016
413
    }
7017
177k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7018
3.84k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7019
3.84k
           "Missing encoding in text declaration\n");
7020
3.84k
    }
7021
7022
177k
    SKIP_BLANKS;
7023
177k
    if ((RAW == '?') && (NXT(1) == '>')) {
7024
33.3k
        SKIP(2);
7025
144k
    } else if (RAW == '>') {
7026
        /* Deprecated old WD ... */
7027
603
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7028
603
  NEXT;
7029
144k
    } else {
7030
144k
        int c;
7031
7032
144k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7033
13.9M
        while ((c = CUR) != 0) {
7034
13.9M
            NEXT;
7035
13.9M
            if (c == '>')
7036
140k
                break;
7037
13.9M
        }
7038
144k
    }
7039
7040
177k
    ctxt->instate = oldstate;
7041
177k
}
7042
7043
/**
7044
 * xmlParseExternalSubset:
7045
 * @ctxt:  an XML parser context
7046
 * @ExternalID: the external identifier
7047
 * @SystemID: the system identifier (or URL)
7048
 *
7049
 * parse Markup declarations from an external subset
7050
 *
7051
 * [30] extSubset ::= textDecl? extSubsetDecl
7052
 *
7053
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7054
 */
7055
void
7056
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7057
22.9k
                       const xmlChar *SystemID) {
7058
22.9k
    xmlDetectSAX2(ctxt);
7059
22.9k
    GROW;
7060
7061
22.9k
    if ((ctxt->encoding == NULL) &&
7062
22.9k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7063
22.9k
        xmlChar start[4];
7064
22.9k
  xmlCharEncoding enc;
7065
7066
22.9k
  start[0] = RAW;
7067
22.9k
  start[1] = NXT(1);
7068
22.9k
  start[2] = NXT(2);
7069
22.9k
  start[3] = NXT(3);
7070
22.9k
  enc = xmlDetectCharEncoding(start, 4);
7071
22.9k
  if (enc != XML_CHAR_ENCODING_NONE)
7072
5.32k
      xmlSwitchEncoding(ctxt, enc);
7073
22.9k
    }
7074
7075
22.9k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7076
5.11k
  xmlParseTextDecl(ctxt);
7077
5.11k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7078
      /*
7079
       * The XML REC instructs us to stop parsing right here
7080
       */
7081
84
      xmlHaltParser(ctxt);
7082
84
      return;
7083
84
  }
7084
5.11k
    }
7085
22.9k
    if (ctxt->myDoc == NULL) {
7086
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7087
0
  if (ctxt->myDoc == NULL) {
7088
0
      xmlErrMemory(ctxt, "New Doc failed");
7089
0
      return;
7090
0
  }
7091
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7092
0
    }
7093
22.9k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7094
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7095
7096
22.9k
    ctxt->instate = XML_PARSER_DTD;
7097
22.9k
    ctxt->external = 1;
7098
22.9k
    SKIP_BLANKS;
7099
303k
    while (((RAW == '<') && (NXT(1) == '?')) ||
7100
303k
           ((RAW == '<') && (NXT(1) == '!')) ||
7101
303k
     (RAW == '%')) {
7102
283k
  int id = ctxt->input->id;
7103
283k
  unsigned long cons = CUR_CONSUMED;
7104
7105
283k
  GROW;
7106
283k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7107
6.41k
      xmlParseConditionalSections(ctxt);
7108
6.41k
  } else
7109
276k
      xmlParseMarkupDecl(ctxt);
7110
283k
        SKIP_BLANKS;
7111
7112
283k
  if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7113
2.26k
      xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7114
2.26k
      break;
7115
2.26k
  }
7116
283k
    }
7117
7118
22.9k
    if (RAW != 0) {
7119
6.68k
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7120
6.68k
    }
7121
7122
22.9k
}
7123
7124
/**
7125
 * xmlParseReference:
7126
 * @ctxt:  an XML parser context
7127
 *
7128
 * DEPRECATED: Internal function, don't use.
7129
 *
7130
 * parse and handle entity references in content, depending on the SAX
7131
 * interface, this may end-up in a call to character() if this is a
7132
 * CharRef, a predefined entity, if there is no reference() callback.
7133
 * or if the parser was asked to switch to that mode.
7134
 *
7135
 * [67] Reference ::= EntityRef | CharRef
7136
 */
7137
void
7138
7.05M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7139
7.05M
    xmlEntityPtr ent;
7140
7.05M
    xmlChar *val;
7141
7.05M
    int was_checked;
7142
7.05M
    xmlNodePtr list = NULL;
7143
7.05M
    xmlParserErrors ret = XML_ERR_OK;
7144
7145
7146
7.05M
    if (RAW != '&')
7147
0
        return;
7148
7149
    /*
7150
     * Simple case of a CharRef
7151
     */
7152
7.05M
    if (NXT(1) == '#') {
7153
964k
  int i = 0;
7154
964k
  xmlChar out[16];
7155
964k
  int hex = NXT(2);
7156
964k
  int value = xmlParseCharRef(ctxt);
7157
7158
964k
  if (value == 0)
7159
198k
      return;
7160
766k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7161
      /*
7162
       * So we are using non-UTF-8 buffers
7163
       * Check that the char fit on 8bits, if not
7164
       * generate a CharRef.
7165
       */
7166
574k
      if (value <= 0xFF) {
7167
519k
    out[0] = value;
7168
519k
    out[1] = 0;
7169
519k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7170
519k
        (!ctxt->disableSAX))
7171
53.4k
        ctxt->sax->characters(ctxt->userData, out, 1);
7172
519k
      } else {
7173
54.9k
    if ((hex == 'x') || (hex == 'X'))
7174
1.01k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7175
53.9k
    else
7176
53.9k
        snprintf((char *)out, sizeof(out), "#%d", value);
7177
54.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7178
54.9k
        (!ctxt->disableSAX))
7179
3.09k
        ctxt->sax->reference(ctxt->userData, out);
7180
54.9k
      }
7181
574k
  } else {
7182
      /*
7183
       * Just encode the value in UTF-8
7184
       */
7185
192k
      COPY_BUF(0 ,out, i, value);
7186
192k
      out[i] = 0;
7187
192k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7188
192k
    (!ctxt->disableSAX))
7189
48.2k
    ctxt->sax->characters(ctxt->userData, out, i);
7190
192k
  }
7191
766k
  return;
7192
964k
    }
7193
7194
    /*
7195
     * We are seeing an entity reference
7196
     */
7197
6.08M
    ent = xmlParseEntityRef(ctxt);
7198
6.08M
    if (ent == NULL) return;
7199
4.16M
    if (!ctxt->wellFormed)
7200
1.33M
  return;
7201
2.82M
    was_checked = ent->checked;
7202
7203
    /* special case of predefined entities */
7204
2.82M
    if ((ent->name == NULL) ||
7205
2.82M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7206
18.6k
  val = ent->content;
7207
18.6k
  if (val == NULL) return;
7208
  /*
7209
   * inline the entity.
7210
   */
7211
18.6k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7212
18.6k
      (!ctxt->disableSAX))
7213
18.6k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7214
18.6k
  return;
7215
18.6k
    }
7216
7217
    /*
7218
     * The first reference to the entity trigger a parsing phase
7219
     * where the ent->children is filled with the result from
7220
     * the parsing.
7221
     * Note: external parsed entities will not be loaded, it is not
7222
     * required for a non-validating parser, unless the parsing option
7223
     * of validating, or substituting entities were given. Doing so is
7224
     * far more secure as the parser will only process data coming from
7225
     * the document entity by default.
7226
     */
7227
2.80M
    if (((ent->checked == 0) ||
7228
2.80M
         ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7229
2.80M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7230
2.73M
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7231
2.73M
  unsigned long oldnbent = ctxt->nbentities, diff;
7232
7233
  /*
7234
   * This is a bit hackish but this seems the best
7235
   * way to make sure both SAX and DOM entity support
7236
   * behaves okay.
7237
   */
7238
2.73M
  void *user_data;
7239
2.73M
  if (ctxt->userData == ctxt)
7240
2.73M
      user_data = NULL;
7241
0
  else
7242
0
      user_data = ctxt->userData;
7243
7244
  /*
7245
   * Check that this entity is well formed
7246
   * 4.3.2: An internal general parsed entity is well-formed
7247
   * if its replacement text matches the production labeled
7248
   * content.
7249
   */
7250
2.73M
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7251
253k
      ctxt->depth++;
7252
253k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7253
253k
                                                user_data, &list);
7254
253k
      ctxt->depth--;
7255
7256
2.47M
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7257
2.47M
      ctxt->depth++;
7258
2.47M
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7259
2.47M
                                     user_data, ctxt->depth, ent->URI,
7260
2.47M
             ent->ExternalID, &list);
7261
2.47M
      ctxt->depth--;
7262
2.47M
  } else {
7263
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7264
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7265
0
       "invalid entity type found\n", NULL);
7266
0
  }
7267
7268
  /*
7269
   * Store the number of entities needing parsing for this entity
7270
   * content and do checkings
7271
   */
7272
2.73M
        diff = ctxt->nbentities - oldnbent + 1;
7273
2.73M
        if (diff > INT_MAX / 2)
7274
0
            diff = INT_MAX / 2;
7275
2.73M
        ent->checked = diff * 2;
7276
2.73M
  if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7277
106k
      ent->checked |= 1;
7278
2.73M
  if (ret == XML_ERR_ENTITY_LOOP) {
7279
513k
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7280
513k
            xmlHaltParser(ctxt);
7281
513k
      xmlFreeNodeList(list);
7282
513k
      return;
7283
513k
  }
7284
2.21M
  if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7285
126
      xmlFreeNodeList(list);
7286
126
      return;
7287
126
  }
7288
7289
2.21M
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7290
18.8k
      if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7291
18.8k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7292
18.8k
    (ent->children == NULL)) {
7293
18.5k
    ent->children = list;
7294
                /*
7295
                 * Prune it directly in the generated document
7296
                 * except for single text nodes.
7297
                 */
7298
18.5k
                if ((ctxt->replaceEntities == 0) ||
7299
18.5k
                    (ctxt->parseMode == XML_PARSE_READER) ||
7300
18.5k
                    ((list->type == XML_TEXT_NODE) &&
7301
15.4k
                     (list->next == NULL))) {
7302
15.4k
                    ent->owner = 1;
7303
51.3k
                    while (list != NULL) {
7304
35.9k
                        list->parent = (xmlNodePtr) ent;
7305
35.9k
                        if (list->doc != ent->doc)
7306
0
                            xmlSetTreeDoc(list, ent->doc);
7307
35.9k
                        if (list->next == NULL)
7308
15.4k
                            ent->last = list;
7309
35.9k
                        list = list->next;
7310
35.9k
                    }
7311
15.4k
                    list = NULL;
7312
15.4k
                } else {
7313
3.09k
                    ent->owner = 0;
7314
17.9k
                    while (list != NULL) {
7315
14.8k
                        list->parent = (xmlNodePtr) ctxt->node;
7316
14.8k
                        list->doc = ctxt->myDoc;
7317
14.8k
                        if (list->next == NULL)
7318
3.09k
                            ent->last = list;
7319
14.8k
                        list = list->next;
7320
14.8k
                    }
7321
3.09k
                    list = ent->children;
7322
#ifdef LIBXML_LEGACY_ENABLED
7323
                    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7324
                        xmlAddEntityReference(ent, list, NULL);
7325
#endif /* LIBXML_LEGACY_ENABLED */
7326
3.09k
                }
7327
18.5k
      } else {
7328
365
    xmlFreeNodeList(list);
7329
365
    list = NULL;
7330
365
      }
7331
2.19M
  } else if ((ret != XML_ERR_OK) &&
7332
2.19M
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7333
2.16M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7334
2.16M
         "Entity '%s' failed to parse\n", ent->name);
7335
2.16M
            if (ent->content != NULL)
7336
63.6k
                ent->content[0] = 0;
7337
2.16M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
7338
2.16M
  } else if (list != NULL) {
7339
0
      xmlFreeNodeList(list);
7340
0
      list = NULL;
7341
0
  }
7342
2.21M
  if (ent->checked == 0)
7343
0
      ent->checked = 2;
7344
7345
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7346
2.21M
        was_checked = 0;
7347
2.21M
    } else if (ent->checked != 1) {
7348
79.0k
  ctxt->nbentities += ent->checked / 2;
7349
79.0k
    }
7350
7351
    /*
7352
     * Now that the entity content has been gathered
7353
     * provide it to the application, this can take different forms based
7354
     * on the parsing modes.
7355
     */
7356
2.29M
    if (ent->children == NULL) {
7357
  /*
7358
   * Probably running in SAX mode and the callbacks don't
7359
   * build the entity content. So unless we already went
7360
   * though parsing for first checking go though the entity
7361
   * content to generate callbacks associated to the entity
7362
   */
7363
2.21M
  if (was_checked != 0) {
7364
15.6k
      void *user_data;
7365
      /*
7366
       * This is a bit hackish but this seems the best
7367
       * way to make sure both SAX and DOM entity support
7368
       * behaves okay.
7369
       */
7370
15.6k
      if (ctxt->userData == ctxt)
7371
15.6k
    user_data = NULL;
7372
0
      else
7373
0
    user_data = ctxt->userData;
7374
7375
15.6k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7376
737
    ctxt->depth++;
7377
737
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7378
737
           ent->content, user_data, NULL);
7379
737
    ctxt->depth--;
7380
14.9k
      } else if (ent->etype ==
7381
14.9k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7382
14.9k
    ctxt->depth++;
7383
14.9k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7384
14.9k
         ctxt->sax, user_data, ctxt->depth,
7385
14.9k
         ent->URI, ent->ExternalID, NULL);
7386
14.9k
    ctxt->depth--;
7387
14.9k
      } else {
7388
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7389
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7390
0
           "invalid entity type found\n", NULL);
7391
0
      }
7392
15.6k
      if (ret == XML_ERR_ENTITY_LOOP) {
7393
3
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7394
3
    return;
7395
3
      }
7396
15.6k
  }
7397
2.21M
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7398
2.21M
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7399
      /*
7400
       * Entity reference callback comes second, it's somewhat
7401
       * superfluous but a compatibility to historical behaviour
7402
       */
7403
25.9k
      ctxt->sax->reference(ctxt->userData, ent->name);
7404
25.9k
  }
7405
2.21M
  return;
7406
2.21M
    }
7407
7408
    /*
7409
     * If we didn't get any children for the entity being built
7410
     */
7411
77.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7412
77.9k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7413
  /*
7414
   * Create a node.
7415
   */
7416
39.7k
  ctxt->sax->reference(ctxt->userData, ent->name);
7417
39.7k
  return;
7418
39.7k
    }
7419
7420
38.2k
    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7421
  /*
7422
   * There is a problem on the handling of _private for entities
7423
   * (bug 155816): Should we copy the content of the field from
7424
   * the entity (possibly overwriting some value set by the user
7425
   * when a copy is created), should we leave it alone, or should
7426
   * we try to take care of different situations?  The problem
7427
   * is exacerbated by the usage of this field by the xmlReader.
7428
   * To fix this bug, we look at _private on the created node
7429
   * and, if it's NULL, we copy in whatever was in the entity.
7430
   * If it's not NULL we leave it alone.  This is somewhat of a
7431
   * hack - maybe we should have further tests to determine
7432
   * what to do.
7433
   */
7434
38.2k
  if ((ctxt->node != NULL) && (ent->children != NULL)) {
7435
      /*
7436
       * Seems we are generating the DOM content, do
7437
       * a simple tree copy for all references except the first
7438
       * In the first occurrence list contains the replacement.
7439
       */
7440
38.2k
      if (((list == NULL) && (ent->owner == 0)) ||
7441
38.2k
    (ctxt->parseMode == XML_PARSE_READER)) {
7442
6.79k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7443
7444
    /*
7445
     * We are copying here, make sure there is no abuse
7446
     */
7447
6.79k
    ctxt->sizeentcopy += ent->length + 5;
7448
6.79k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7449
0
        return;
7450
7451
    /*
7452
     * when operating on a reader, the entities definitions
7453
     * are always owning the entities subtree.
7454
    if (ctxt->parseMode == XML_PARSE_READER)
7455
        ent->owner = 1;
7456
     */
7457
7458
6.79k
    cur = ent->children;
7459
14.3k
    while (cur != NULL) {
7460
14.3k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7461
14.3k
        if (nw != NULL) {
7462
14.3k
      if (nw->_private == NULL)
7463
14.3k
          nw->_private = cur->_private;
7464
14.3k
      if (firstChild == NULL){
7465
6.79k
          firstChild = nw;
7466
6.79k
      }
7467
14.3k
      nw = xmlAddChild(ctxt->node, nw);
7468
14.3k
        }
7469
14.3k
        if (cur == ent->last) {
7470
      /*
7471
       * needed to detect some strange empty
7472
       * node cases in the reader tests
7473
       */
7474
6.79k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7475
6.79k
          (nw != NULL) &&
7476
6.79k
          (nw->type == XML_ELEMENT_NODE) &&
7477
6.79k
          (nw->children == NULL))
7478
964
          nw->extra = 1;
7479
7480
6.79k
      break;
7481
6.79k
        }
7482
7.60k
        cur = cur->next;
7483
7.60k
    }
7484
#ifdef LIBXML_LEGACY_ENABLED
7485
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7486
      xmlAddEntityReference(ent, firstChild, nw);
7487
#endif /* LIBXML_LEGACY_ENABLED */
7488
31.4k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7489
31.4k
    xmlNodePtr nw = NULL, cur, next, last,
7490
31.4k
         firstChild = NULL;
7491
7492
    /*
7493
     * We are copying here, make sure there is no abuse
7494
     */
7495
31.4k
    ctxt->sizeentcopy += ent->length + 5;
7496
31.4k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7497
0
        return;
7498
7499
    /*
7500
     * Copy the entity child list and make it the new
7501
     * entity child list. The goal is to make sure any
7502
     * ID or REF referenced will be the one from the
7503
     * document content and not the entity copy.
7504
     */
7505
31.4k
    cur = ent->children;
7506
31.4k
    ent->children = NULL;
7507
31.4k
    last = ent->last;
7508
31.4k
    ent->last = NULL;
7509
54.9k
    while (cur != NULL) {
7510
54.9k
        next = cur->next;
7511
54.9k
        cur->next = NULL;
7512
54.9k
        cur->parent = NULL;
7513
54.9k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7514
54.9k
        if (nw != NULL) {
7515
54.9k
      if (nw->_private == NULL)
7516
54.9k
          nw->_private = cur->_private;
7517
54.9k
      if (firstChild == NULL){
7518
31.4k
          firstChild = cur;
7519
31.4k
      }
7520
54.9k
      xmlAddChild((xmlNodePtr) ent, nw);
7521
54.9k
      xmlAddChild(ctxt->node, cur);
7522
54.9k
        }
7523
54.9k
        if (cur == last)
7524
31.4k
      break;
7525
23.4k
        cur = next;
7526
23.4k
    }
7527
31.4k
    if (ent->owner == 0)
7528
3.09k
        ent->owner = 1;
7529
#ifdef LIBXML_LEGACY_ENABLED
7530
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7531
      xmlAddEntityReference(ent, firstChild, nw);
7532
#endif /* LIBXML_LEGACY_ENABLED */
7533
31.4k
      } else {
7534
0
    const xmlChar *nbktext;
7535
7536
    /*
7537
     * the name change is to avoid coalescing of the
7538
     * node with a possible previous text one which
7539
     * would make ent->children a dangling pointer
7540
     */
7541
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7542
0
          -1);
7543
0
    if (ent->children->type == XML_TEXT_NODE)
7544
0
        ent->children->name = nbktext;
7545
0
    if ((ent->last != ent->children) &&
7546
0
        (ent->last->type == XML_TEXT_NODE))
7547
0
        ent->last->name = nbktext;
7548
0
    xmlAddChildList(ctxt->node, ent->children);
7549
0
      }
7550
7551
      /*
7552
       * This is to avoid a nasty side effect, see
7553
       * characters() in SAX.c
7554
       */
7555
38.2k
      ctxt->nodemem = 0;
7556
38.2k
      ctxt->nodelen = 0;
7557
38.2k
      return;
7558
38.2k
  }
7559
38.2k
    }
7560
38.2k
}
7561
7562
/**
7563
 * xmlParseEntityRef:
7564
 * @ctxt:  an XML parser context
7565
 *
7566
 * DEPRECATED: Internal function, don't use.
7567
 *
7568
 * parse ENTITY references declarations
7569
 *
7570
 * [68] EntityRef ::= '&' Name ';'
7571
 *
7572
 * [ WFC: Entity Declared ]
7573
 * In a document without any DTD, a document with only an internal DTD
7574
 * subset which contains no parameter entity references, or a document
7575
 * with "standalone='yes'", the Name given in the entity reference
7576
 * must match that in an entity declaration, except that well-formed
7577
 * documents need not declare any of the following entities: amp, lt,
7578
 * gt, apos, quot.  The declaration of a parameter entity must precede
7579
 * any reference to it.  Similarly, the declaration of a general entity
7580
 * must precede any reference to it which appears in a default value in an
7581
 * attribute-list declaration. Note that if entities are declared in the
7582
 * external subset or in external parameter entities, a non-validating
7583
 * processor is not obligated to read and process their declarations;
7584
 * for such documents, the rule that an entity must be declared is a
7585
 * well-formedness constraint only if standalone='yes'.
7586
 *
7587
 * [ WFC: Parsed Entity ]
7588
 * An entity reference must not contain the name of an unparsed entity
7589
 *
7590
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7591
 */
7592
xmlEntityPtr
7593
8.44M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7594
8.44M
    const xmlChar *name;
7595
8.44M
    xmlEntityPtr ent = NULL;
7596
7597
8.44M
    GROW;
7598
8.44M
    if (ctxt->instate == XML_PARSER_EOF)
7599
0
        return(NULL);
7600
7601
8.44M
    if (RAW != '&')
7602
0
        return(NULL);
7603
8.44M
    NEXT;
7604
8.44M
    name = xmlParseName(ctxt);
7605
8.44M
    if (name == NULL) {
7606
651k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7607
651k
           "xmlParseEntityRef: no name\n");
7608
651k
        return(NULL);
7609
651k
    }
7610
7.79M
    if (RAW != ';') {
7611
851k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7612
851k
  return(NULL);
7613
851k
    }
7614
6.94M
    NEXT;
7615
7616
    /*
7617
     * Predefined entities override any extra definition
7618
     */
7619
6.94M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7620
3.96M
        ent = xmlGetPredefinedEntity(name);
7621
3.96M
        if (ent != NULL)
7622
1.06M
            return(ent);
7623
3.96M
    }
7624
7625
    /*
7626
     * Increase the number of entity references parsed
7627
     */
7628
5.87M
    ctxt->nbentities++;
7629
7630
    /*
7631
     * Ask first SAX for entity resolution, otherwise try the
7632
     * entities which may have stored in the parser context.
7633
     */
7634
5.87M
    if (ctxt->sax != NULL) {
7635
5.87M
  if (ctxt->sax->getEntity != NULL)
7636
5.87M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7637
5.87M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7638
5.87M
      (ctxt->options & XML_PARSE_OLDSAX))
7639
6.05k
      ent = xmlGetPredefinedEntity(name);
7640
5.87M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7641
5.87M
      (ctxt->userData==ctxt)) {
7642
14.4k
      ent = xmlSAX2GetEntity(ctxt, name);
7643
14.4k
  }
7644
5.87M
    }
7645
5.87M
    if (ctxt->instate == XML_PARSER_EOF)
7646
0
  return(NULL);
7647
    /*
7648
     * [ WFC: Entity Declared ]
7649
     * In a document without any DTD, a document with only an
7650
     * internal DTD subset which contains no parameter entity
7651
     * references, or a document with "standalone='yes'", the
7652
     * Name given in the entity reference must match that in an
7653
     * entity declaration, except that well-formed documents
7654
     * need not declare any of the following entities: amp, lt,
7655
     * gt, apos, quot.
7656
     * The declaration of a parameter entity must precede any
7657
     * reference to it.
7658
     * Similarly, the declaration of a general entity must
7659
     * precede any reference to it which appears in a default
7660
     * value in an attribute-list declaration. Note that if
7661
     * entities are declared in the external subset or in
7662
     * external parameter entities, a non-validating processor
7663
     * is not obligated to read and process their declarations;
7664
     * for such documents, the rule that an entity must be
7665
     * declared is a well-formedness constraint only if
7666
     * standalone='yes'.
7667
     */
7668
5.87M
    if (ent == NULL) {
7669
807k
  if ((ctxt->standalone == 1) ||
7670
807k
      ((ctxt->hasExternalSubset == 0) &&
7671
805k
       (ctxt->hasPErefs == 0))) {
7672
773k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7673
773k
         "Entity '%s' not defined\n", name);
7674
773k
  } else {
7675
34.6k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7676
34.6k
         "Entity '%s' not defined\n", name);
7677
34.6k
      if ((ctxt->inSubset == 0) &&
7678
34.6k
    (ctxt->sax != NULL) &&
7679
34.6k
    (ctxt->sax->reference != NULL)) {
7680
33.7k
    ctxt->sax->reference(ctxt->userData, name);
7681
33.7k
      }
7682
34.6k
  }
7683
807k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7684
807k
  ctxt->valid = 0;
7685
807k
    }
7686
7687
    /*
7688
     * [ WFC: Parsed Entity ]
7689
     * An entity reference must not contain the name of an
7690
     * unparsed entity
7691
     */
7692
5.06M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7693
1.64k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7694
1.64k
     "Entity reference to unparsed entity %s\n", name);
7695
1.64k
    }
7696
7697
    /*
7698
     * [ WFC: No External Entity References ]
7699
     * Attribute values cannot contain direct or indirect
7700
     * entity references to external entities.
7701
     */
7702
5.06M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7703
5.06M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7704
6.48k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7705
6.48k
       "Attribute references external entity '%s'\n", name);
7706
6.48k
    }
7707
    /*
7708
     * [ WFC: No < in Attribute Values ]
7709
     * The replacement text of any entity referred to directly or
7710
     * indirectly in an attribute value (other than "&lt;") must
7711
     * not contain a <.
7712
     */
7713
5.06M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7714
5.06M
       (ent != NULL) && 
7715
5.06M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7716
99.0k
  if (((ent->checked & 1) || (ent->checked == 0)) &&
7717
99.0k
       (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7718
3.89k
      xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7719
3.89k
  "'<' in entity '%s' is not allowed in attributes values\n", name);
7720
3.89k
        }
7721
99.0k
    }
7722
7723
    /*
7724
     * Internal check, no parameter entities here ...
7725
     */
7726
4.96M
    else {
7727
4.96M
  switch (ent->etype) {
7728
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7729
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7730
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7731
0
       "Attempt to reference the parameter entity '%s'\n",
7732
0
            name);
7733
0
      break;
7734
4.96M
      default:
7735
4.96M
      break;
7736
4.96M
  }
7737
4.96M
    }
7738
7739
    /*
7740
     * [ WFC: No Recursion ]
7741
     * A parsed entity must not contain a recursive reference
7742
     * to itself, either directly or indirectly.
7743
     * Done somewhere else
7744
     */
7745
5.87M
    return(ent);
7746
5.87M
}
7747
7748
/**
7749
 * xmlParseStringEntityRef:
7750
 * @ctxt:  an XML parser context
7751
 * @str:  a pointer to an index in the string
7752
 *
7753
 * parse ENTITY references declarations, but this version parses it from
7754
 * a string value.
7755
 *
7756
 * [68] EntityRef ::= '&' Name ';'
7757
 *
7758
 * [ WFC: Entity Declared ]
7759
 * In a document without any DTD, a document with only an internal DTD
7760
 * subset which contains no parameter entity references, or a document
7761
 * with "standalone='yes'", the Name given in the entity reference
7762
 * must match that in an entity declaration, except that well-formed
7763
 * documents need not declare any of the following entities: amp, lt,
7764
 * gt, apos, quot.  The declaration of a parameter entity must precede
7765
 * any reference to it.  Similarly, the declaration of a general entity
7766
 * must precede any reference to it which appears in a default value in an
7767
 * attribute-list declaration. Note that if entities are declared in the
7768
 * external subset or in external parameter entities, a non-validating
7769
 * processor is not obligated to read and process their declarations;
7770
 * for such documents, the rule that an entity must be declared is a
7771
 * well-formedness constraint only if standalone='yes'.
7772
 *
7773
 * [ WFC: Parsed Entity ]
7774
 * An entity reference must not contain the name of an unparsed entity
7775
 *
7776
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7777
 * is updated to the current location in the string.
7778
 */
7779
static xmlEntityPtr
7780
1.12M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7781
1.12M
    xmlChar *name;
7782
1.12M
    const xmlChar *ptr;
7783
1.12M
    xmlChar cur;
7784
1.12M
    xmlEntityPtr ent = NULL;
7785
7786
1.12M
    if ((str == NULL) || (*str == NULL))
7787
0
        return(NULL);
7788
1.12M
    ptr = *str;
7789
1.12M
    cur = *ptr;
7790
1.12M
    if (cur != '&')
7791
0
  return(NULL);
7792
7793
1.12M
    ptr++;
7794
1.12M
    name = xmlParseStringName(ctxt, &ptr);
7795
1.12M
    if (name == NULL) {
7796
14.9k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7797
14.9k
           "xmlParseStringEntityRef: no name\n");
7798
14.9k
  *str = ptr;
7799
14.9k
  return(NULL);
7800
14.9k
    }
7801
1.11M
    if (*ptr != ';') {
7802
12.6k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7803
12.6k
        xmlFree(name);
7804
12.6k
  *str = ptr;
7805
12.6k
  return(NULL);
7806
12.6k
    }
7807
1.09M
    ptr++;
7808
7809
7810
    /*
7811
     * Predefined entities override any extra definition
7812
     */
7813
1.09M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7814
583k
        ent = xmlGetPredefinedEntity(name);
7815
583k
        if (ent != NULL) {
7816
167k
            xmlFree(name);
7817
167k
            *str = ptr;
7818
167k
            return(ent);
7819
167k
        }
7820
583k
    }
7821
7822
    /*
7823
     * Increase the number of entity references parsed
7824
     */
7825
931k
    ctxt->nbentities++;
7826
7827
    /*
7828
     * Ask first SAX for entity resolution, otherwise try the
7829
     * entities which may have stored in the parser context.
7830
     */
7831
931k
    if (ctxt->sax != NULL) {
7832
931k
  if (ctxt->sax->getEntity != NULL)
7833
931k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7834
931k
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7835
67.8k
      ent = xmlGetPredefinedEntity(name);
7836
931k
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7837
139k
      ent = xmlSAX2GetEntity(ctxt, name);
7838
139k
  }
7839
931k
    }
7840
931k
    if (ctxt->instate == XML_PARSER_EOF) {
7841
0
  xmlFree(name);
7842
0
  return(NULL);
7843
0
    }
7844
7845
    /*
7846
     * [ WFC: Entity Declared ]
7847
     * In a document without any DTD, a document with only an
7848
     * internal DTD subset which contains no parameter entity
7849
     * references, or a document with "standalone='yes'", the
7850
     * Name given in the entity reference must match that in an
7851
     * entity declaration, except that well-formed documents
7852
     * need not declare any of the following entities: amp, lt,
7853
     * gt, apos, quot.
7854
     * The declaration of a parameter entity must precede any
7855
     * reference to it.
7856
     * Similarly, the declaration of a general entity must
7857
     * precede any reference to it which appears in a default
7858
     * value in an attribute-list declaration. Note that if
7859
     * entities are declared in the external subset or in
7860
     * external parameter entities, a non-validating processor
7861
     * is not obligated to read and process their declarations;
7862
     * for such documents, the rule that an entity must be
7863
     * declared is a well-formedness constraint only if
7864
     * standalone='yes'.
7865
     */
7866
931k
    if (ent == NULL) {
7867
139k
  if ((ctxt->standalone == 1) ||
7868
139k
      ((ctxt->hasExternalSubset == 0) &&
7869
138k
       (ctxt->hasPErefs == 0))) {
7870
132k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7871
132k
         "Entity '%s' not defined\n", name);
7872
132k
  } else {
7873
6.80k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7874
6.80k
        "Entity '%s' not defined\n",
7875
6.80k
        name);
7876
6.80k
  }
7877
139k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7878
  /* TODO ? check regressions ctxt->valid = 0; */
7879
139k
    }
7880
7881
    /*
7882
     * [ WFC: Parsed Entity ]
7883
     * An entity reference must not contain the name of an
7884
     * unparsed entity
7885
     */
7886
791k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7887
25.1k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7888
25.1k
     "Entity reference to unparsed entity %s\n", name);
7889
25.1k
    }
7890
7891
    /*
7892
     * [ WFC: No External Entity References ]
7893
     * Attribute values cannot contain direct or indirect
7894
     * entity references to external entities.
7895
     */
7896
766k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7897
766k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7898
8.72k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7899
8.72k
   "Attribute references external entity '%s'\n", name);
7900
8.72k
    }
7901
    /*
7902
     * [ WFC: No < in Attribute Values ]
7903
     * The replacement text of any entity referred to directly or
7904
     * indirectly in an attribute value (other than "&lt;") must
7905
     * not contain a <.
7906
     */
7907
758k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7908
758k
       (ent != NULL) && (ent->content != NULL) &&
7909
758k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7910
758k
       (xmlStrchr(ent->content, '<'))) {
7911
125k
  xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7912
125k
     "'<' in entity '%s' is not allowed in attributes values\n",
7913
125k
        name);
7914
125k
    }
7915
7916
    /*
7917
     * Internal check, no parameter entities here ...
7918
     */
7919
632k
    else {
7920
632k
  switch (ent->etype) {
7921
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7922
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7923
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7924
0
       "Attempt to reference the parameter entity '%s'\n",
7925
0
          name);
7926
0
      break;
7927
632k
      default:
7928
632k
      break;
7929
632k
  }
7930
632k
    }
7931
7932
    /*
7933
     * [ WFC: No Recursion ]
7934
     * A parsed entity must not contain a recursive reference
7935
     * to itself, either directly or indirectly.
7936
     * Done somewhere else
7937
     */
7938
7939
931k
    xmlFree(name);
7940
931k
    *str = ptr;
7941
931k
    return(ent);
7942
931k
}
7943
7944
/**
7945
 * xmlParsePEReference:
7946
 * @ctxt:  an XML parser context
7947
 *
7948
 * DEPRECATED: Internal function, don't use.
7949
 *
7950
 * parse PEReference declarations
7951
 * The entity content is handled directly by pushing it's content as
7952
 * a new input stream.
7953
 *
7954
 * [69] PEReference ::= '%' Name ';'
7955
 *
7956
 * [ WFC: No Recursion ]
7957
 * A parsed entity must not contain a recursive
7958
 * reference to itself, either directly or indirectly.
7959
 *
7960
 * [ WFC: Entity Declared ]
7961
 * In a document without any DTD, a document with only an internal DTD
7962
 * subset which contains no parameter entity references, or a document
7963
 * with "standalone='yes'", ...  ... The declaration of a parameter
7964
 * entity must precede any reference to it...
7965
 *
7966
 * [ VC: Entity Declared ]
7967
 * In a document with an external subset or external parameter entities
7968
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7969
 * must precede any reference to it...
7970
 *
7971
 * [ WFC: In DTD ]
7972
 * Parameter-entity references may only appear in the DTD.
7973
 * NOTE: misleading but this is handled.
7974
 */
7975
void
7976
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7977
15.6M
{
7978
15.6M
    const xmlChar *name;
7979
15.6M
    xmlEntityPtr entity = NULL;
7980
15.6M
    xmlParserInputPtr input;
7981
7982
15.6M
    if (RAW != '%')
7983
11.4M
        return;
7984
4.15M
    NEXT;
7985
4.15M
    name = xmlParseName(ctxt);
7986
4.15M
    if (name == NULL) {
7987
44.1k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7988
44.1k
  return;
7989
44.1k
    }
7990
4.10M
    if (xmlParserDebugEntities)
7991
0
  xmlGenericError(xmlGenericErrorContext,
7992
0
    "PEReference: %s\n", name);
7993
4.10M
    if (RAW != ';') {
7994
154k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7995
154k
        return;
7996
154k
    }
7997
7998
3.95M
    NEXT;
7999
8000
    /*
8001
     * Increase the number of entity references parsed
8002
     */
8003
3.95M
    ctxt->nbentities++;
8004
8005
    /*
8006
     * Request the entity from SAX
8007
     */
8008
3.95M
    if ((ctxt->sax != NULL) &&
8009
3.95M
  (ctxt->sax->getParameterEntity != NULL))
8010
3.95M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8011
3.95M
    if (ctxt->instate == XML_PARSER_EOF)
8012
0
  return;
8013
3.95M
    if (entity == NULL) {
8014
  /*
8015
   * [ WFC: Entity Declared ]
8016
   * In a document without any DTD, a document with only an
8017
   * internal DTD subset which contains no parameter entity
8018
   * references, or a document with "standalone='yes'", ...
8019
   * ... The declaration of a parameter entity must precede
8020
   * any reference to it...
8021
   */
8022
275k
  if ((ctxt->standalone == 1) ||
8023
275k
      ((ctxt->hasExternalSubset == 0) &&
8024
275k
       (ctxt->hasPErefs == 0))) {
8025
1.49k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8026
1.49k
            "PEReference: %%%s; not found\n",
8027
1.49k
            name);
8028
274k
  } else {
8029
      /*
8030
       * [ VC: Entity Declared ]
8031
       * In a document with an external subset or external
8032
       * parameter entities with "standalone='no'", ...
8033
       * ... The declaration of a parameter entity must
8034
       * precede any reference to it...
8035
       */
8036
274k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8037
19.4k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8038
19.4k
                                 "PEReference: %%%s; not found\n",
8039
19.4k
                                 name, NULL);
8040
19.4k
            } else
8041
254k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8042
254k
                              "PEReference: %%%s; not found\n",
8043
254k
                              name, NULL);
8044
274k
            ctxt->valid = 0;
8045
274k
  }
8046
275k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8047
3.67M
    } else {
8048
  /*
8049
   * Internal checking in case the entity quest barfed
8050
   */
8051
3.67M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8052
3.67M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8053
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8054
0
      "Internal: %%%s; is not a parameter entity\n",
8055
0
        name, NULL);
8056
3.67M
  } else {
8057
3.67M
            xmlChar start[4];
8058
3.67M
            xmlCharEncoding enc;
8059
8060
3.67M
      if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8061
1.93k
          return;
8062
8063
3.67M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8064
3.67M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8065
3.67M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8066
3.67M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8067
3.67M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8068
3.67M
    (ctxt->replaceEntities == 0) &&
8069
3.67M
    (ctxt->validate == 0))
8070
1.10k
    return;
8071
8072
3.67M
      input = xmlNewEntityInputStream(ctxt, entity);
8073
3.67M
      if (xmlPushInput(ctxt, input) < 0) {
8074
18.6k
                xmlFreeInputStream(input);
8075
18.6k
    return;
8076
18.6k
            }
8077
8078
3.65M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8079
                /*
8080
                 * Get the 4 first bytes and decode the charset
8081
                 * if enc != XML_CHAR_ENCODING_NONE
8082
                 * plug some encoding conversion routines.
8083
                 * Note that, since we may have some non-UTF8
8084
                 * encoding (like UTF16, bug 135229), the 'length'
8085
                 * is not known, but we can calculate based upon
8086
                 * the amount of data in the buffer.
8087
                 */
8088
1.32M
                GROW
8089
1.32M
                if (ctxt->instate == XML_PARSER_EOF)
8090
0
                    return;
8091
1.32M
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8092
1.31M
                    start[0] = RAW;
8093
1.31M
                    start[1] = NXT(1);
8094
1.31M
                    start[2] = NXT(2);
8095
1.31M
                    start[3] = NXT(3);
8096
1.31M
                    enc = xmlDetectCharEncoding(start, 4);
8097
1.31M
                    if (enc != XML_CHAR_ENCODING_NONE) {
8098
148k
                        xmlSwitchEncoding(ctxt, enc);
8099
148k
                    }
8100
1.31M
                }
8101
8102
1.32M
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8103
1.32M
                    (IS_BLANK_CH(NXT(5)))) {
8104
144k
                    xmlParseTextDecl(ctxt);
8105
144k
                }
8106
1.32M
            }
8107
3.65M
  }
8108
3.67M
    }
8109
3.93M
    ctxt->hasPErefs = 1;
8110
3.93M
}
8111
8112
/**
8113
 * xmlLoadEntityContent:
8114
 * @ctxt:  an XML parser context
8115
 * @entity: an unloaded system entity
8116
 *
8117
 * Load the original content of the given system entity from the
8118
 * ExternalID/SystemID given. This is to be used for Included in Literal
8119
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8120
 *
8121
 * Returns 0 in case of success and -1 in case of failure
8122
 */
8123
static int
8124
13.3k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8125
13.3k
    xmlParserInputPtr input;
8126
13.3k
    xmlBufferPtr buf;
8127
13.3k
    int l, c;
8128
13.3k
    int count = 0;
8129
8130
13.3k
    if ((ctxt == NULL) || (entity == NULL) ||
8131
13.3k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8132
13.3k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8133
13.3k
  (entity->content != NULL)) {
8134
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8135
0
              "xmlLoadEntityContent parameter error");
8136
0
        return(-1);
8137
0
    }
8138
8139
13.3k
    if (xmlParserDebugEntities)
8140
0
  xmlGenericError(xmlGenericErrorContext,
8141
0
    "Reading %s entity content input\n", entity->name);
8142
8143
13.3k
    buf = xmlBufferCreate();
8144
13.3k
    if (buf == NULL) {
8145
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8146
0
              "xmlLoadEntityContent parameter error");
8147
0
        return(-1);
8148
0
    }
8149
13.3k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8150
8151
13.3k
    input = xmlNewEntityInputStream(ctxt, entity);
8152
13.3k
    if (input == NULL) {
8153
207
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8154
207
              "xmlLoadEntityContent input error");
8155
207
  xmlBufferFree(buf);
8156
207
        return(-1);
8157
207
    }
8158
8159
    /*
8160
     * Push the entity as the current input, read char by char
8161
     * saving to the buffer until the end of the entity or an error
8162
     */
8163
13.1k
    if (xmlPushInput(ctxt, input) < 0) {
8164
18
        xmlBufferFree(buf);
8165
18
  xmlFreeInputStream(input);
8166
18
  return(-1);
8167
18
    }
8168
8169
13.1k
    GROW;
8170
13.1k
    c = CUR_CHAR(l);
8171
23.4M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8172
23.4M
           (IS_CHAR(c))) {
8173
23.4M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8174
23.4M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8175
227k
      count = 0;
8176
227k
      GROW;
8177
227k
            if (ctxt->instate == XML_PARSER_EOF) {
8178
0
                xmlBufferFree(buf);
8179
0
                return(-1);
8180
0
            }
8181
227k
  }
8182
23.4M
  NEXTL(l);
8183
23.4M
  c = CUR_CHAR(l);
8184
23.4M
  if (c == 0) {
8185
12.8k
      count = 0;
8186
12.8k
      GROW;
8187
12.8k
            if (ctxt->instate == XML_PARSER_EOF) {
8188
0
                xmlBufferFree(buf);
8189
0
                return(-1);
8190
0
            }
8191
12.8k
      c = CUR_CHAR(l);
8192
12.8k
  }
8193
23.4M
    }
8194
8195
13.1k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8196
2.60k
        xmlPopInput(ctxt);
8197
10.5k
    } else if (!IS_CHAR(c)) {
8198
10.5k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8199
10.5k
                          "xmlLoadEntityContent: invalid char value %d\n",
8200
10.5k
                    c);
8201
10.5k
  xmlBufferFree(buf);
8202
10.5k
  return(-1);
8203
10.5k
    }
8204
2.60k
    entity->content = buf->content;
8205
2.60k
    buf->content = NULL;
8206
2.60k
    xmlBufferFree(buf);
8207
8208
2.60k
    return(0);
8209
13.1k
}
8210
8211
/**
8212
 * xmlParseStringPEReference:
8213
 * @ctxt:  an XML parser context
8214
 * @str:  a pointer to an index in the string
8215
 *
8216
 * parse PEReference declarations
8217
 *
8218
 * [69] PEReference ::= '%' Name ';'
8219
 *
8220
 * [ WFC: No Recursion ]
8221
 * A parsed entity must not contain a recursive
8222
 * reference to itself, either directly or indirectly.
8223
 *
8224
 * [ WFC: Entity Declared ]
8225
 * In a document without any DTD, a document with only an internal DTD
8226
 * subset which contains no parameter entity references, or a document
8227
 * with "standalone='yes'", ...  ... The declaration of a parameter
8228
 * entity must precede any reference to it...
8229
 *
8230
 * [ VC: Entity Declared ]
8231
 * In a document with an external subset or external parameter entities
8232
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8233
 * must precede any reference to it...
8234
 *
8235
 * [ WFC: In DTD ]
8236
 * Parameter-entity references may only appear in the DTD.
8237
 * NOTE: misleading but this is handled.
8238
 *
8239
 * Returns the string of the entity content.
8240
 *         str is updated to the current value of the index
8241
 */
8242
static xmlEntityPtr
8243
2.55M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8244
2.55M
    const xmlChar *ptr;
8245
2.55M
    xmlChar cur;
8246
2.55M
    xmlChar *name;
8247
2.55M
    xmlEntityPtr entity = NULL;
8248
8249
2.55M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8250
2.55M
    ptr = *str;
8251
2.55M
    cur = *ptr;
8252
2.55M
    if (cur != '%')
8253
0
        return(NULL);
8254
2.55M
    ptr++;
8255
2.55M
    name = xmlParseStringName(ctxt, &ptr);
8256
2.55M
    if (name == NULL) {
8257
378k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8258
378k
           "xmlParseStringPEReference: no name\n");
8259
378k
  *str = ptr;
8260
378k
  return(NULL);
8261
378k
    }
8262
2.17M
    cur = *ptr;
8263
2.17M
    if (cur != ';') {
8264
325k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8265
325k
  xmlFree(name);
8266
325k
  *str = ptr;
8267
325k
  return(NULL);
8268
325k
    }
8269
1.84M
    ptr++;
8270
8271
    /*
8272
     * Increase the number of entity references parsed
8273
     */
8274
1.84M
    ctxt->nbentities++;
8275
8276
    /*
8277
     * Request the entity from SAX
8278
     */
8279
1.84M
    if ((ctxt->sax != NULL) &&
8280
1.84M
  (ctxt->sax->getParameterEntity != NULL))
8281
1.84M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8282
1.84M
    if (ctxt->instate == XML_PARSER_EOF) {
8283
0
  xmlFree(name);
8284
0
  *str = ptr;
8285
0
  return(NULL);
8286
0
    }
8287
1.84M
    if (entity == NULL) {
8288
  /*
8289
   * [ WFC: Entity Declared ]
8290
   * In a document without any DTD, a document with only an
8291
   * internal DTD subset which contains no parameter entity
8292
   * references, or a document with "standalone='yes'", ...
8293
   * ... The declaration of a parameter entity must precede
8294
   * any reference to it...
8295
   */
8296
1.03M
  if ((ctxt->standalone == 1) ||
8297
1.03M
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8298
24
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8299
24
     "PEReference: %%%s; not found\n", name);
8300
1.03M
  } else {
8301
      /*
8302
       * [ VC: Entity Declared ]
8303
       * In a document with an external subset or external
8304
       * parameter entities with "standalone='no'", ...
8305
       * ... The declaration of a parameter entity must
8306
       * precede any reference to it...
8307
       */
8308
1.03M
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8309
1.03M
        "PEReference: %%%s; not found\n",
8310
1.03M
        name, NULL);
8311
1.03M
      ctxt->valid = 0;
8312
1.03M
  }
8313
1.03M
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8314
1.03M
    } else {
8315
  /*
8316
   * Internal checking in case the entity quest barfed
8317
   */
8318
810k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8319
810k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8320
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8321
0
        "%%%s; is not a parameter entity\n",
8322
0
        name, NULL);
8323
0
  }
8324
810k
    }
8325
1.84M
    ctxt->hasPErefs = 1;
8326
1.84M
    xmlFree(name);
8327
1.84M
    *str = ptr;
8328
1.84M
    return(entity);
8329
1.84M
}
8330
8331
/**
8332
 * xmlParseDocTypeDecl:
8333
 * @ctxt:  an XML parser context
8334
 *
8335
 * DEPRECATED: Internal function, don't use.
8336
 *
8337
 * parse a DOCTYPE declaration
8338
 *
8339
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8340
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8341
 *
8342
 * [ VC: Root Element Type ]
8343
 * The Name in the document type declaration must match the element
8344
 * type of the root element.
8345
 */
8346
8347
void
8348
263k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8349
263k
    const xmlChar *name = NULL;
8350
263k
    xmlChar *ExternalID = NULL;
8351
263k
    xmlChar *URI = NULL;
8352
8353
    /*
8354
     * We know that '<!DOCTYPE' has been detected.
8355
     */
8356
263k
    SKIP(9);
8357
8358
263k
    SKIP_BLANKS;
8359
8360
    /*
8361
     * Parse the DOCTYPE name.
8362
     */
8363
263k
    name = xmlParseName(ctxt);
8364
263k
    if (name == NULL) {
8365
1.08k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8366
1.08k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8367
1.08k
    }
8368
263k
    ctxt->intSubName = name;
8369
8370
263k
    SKIP_BLANKS;
8371
8372
    /*
8373
     * Check for SystemID and ExternalID
8374
     */
8375
263k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8376
8377
263k
    if ((URI != NULL) || (ExternalID != NULL)) {
8378
112k
        ctxt->hasExternalSubset = 1;
8379
112k
    }
8380
263k
    ctxt->extSubURI = URI;
8381
263k
    ctxt->extSubSystem = ExternalID;
8382
8383
263k
    SKIP_BLANKS;
8384
8385
    /*
8386
     * Create and update the internal subset.
8387
     */
8388
263k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8389
263k
  (!ctxt->disableSAX))
8390
255k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8391
263k
    if (ctxt->instate == XML_PARSER_EOF)
8392
0
  return;
8393
8394
    /*
8395
     * Is there any internal subset declarations ?
8396
     * they are handled separately in xmlParseInternalSubset()
8397
     */
8398
263k
    if (RAW == '[')
8399
206k
  return;
8400
8401
    /*
8402
     * We should be at the end of the DOCTYPE declaration.
8403
     */
8404
56.6k
    if (RAW != '>') {
8405
9.63k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8406
9.63k
    }
8407
56.6k
    NEXT;
8408
56.6k
}
8409
8410
/**
8411
 * xmlParseInternalSubset:
8412
 * @ctxt:  an XML parser context
8413
 *
8414
 * parse the internal subset declaration
8415
 *
8416
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8417
 */
8418
8419
static void
8420
196k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8421
    /*
8422
     * Is there any DTD definition ?
8423
     */
8424
196k
    if (RAW == '[') {
8425
196k
        int baseInputNr = ctxt->inputNr;
8426
196k
        ctxt->instate = XML_PARSER_DTD;
8427
196k
        NEXT;
8428
  /*
8429
   * Parse the succession of Markup declarations and
8430
   * PEReferences.
8431
   * Subsequence (markupdecl | PEReference | S)*
8432
   */
8433
11.7M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8434
11.7M
               (ctxt->instate != XML_PARSER_EOF)) {
8435
11.5M
      int id = ctxt->input->id;
8436
11.5M
      unsigned long cons = CUR_CONSUMED;
8437
8438
11.5M
      SKIP_BLANKS;
8439
11.5M
      xmlParseMarkupDecl(ctxt);
8440
11.5M
      xmlParsePEReference(ctxt);
8441
8442
            /*
8443
             * Conditional sections are allowed from external entities included
8444
             * by PE References in the internal subset.
8445
             */
8446
11.5M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8447
11.5M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8448
0
                xmlParseConditionalSections(ctxt);
8449
0
            }
8450
8451
11.5M
      if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8452
155k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8453
155k
       "xmlParseInternalSubset: error detected in Markup declaration\n");
8454
155k
                if (ctxt->inputNr > baseInputNr)
8455
115k
                    xmlPopInput(ctxt);
8456
39.6k
                else
8457
39.6k
        break;
8458
155k
      }
8459
11.5M
  }
8460
196k
  if (RAW == ']') {
8461
148k
      NEXT;
8462
148k
      SKIP_BLANKS;
8463
148k
  }
8464
196k
    }
8465
8466
    /*
8467
     * We should be at the end of the DOCTYPE declaration.
8468
     */
8469
196k
    if (RAW != '>') {
8470
49.6k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8471
49.6k
  return;
8472
49.6k
    }
8473
146k
    NEXT;
8474
146k
}
8475
8476
#ifdef LIBXML_SAX1_ENABLED
8477
/**
8478
 * xmlParseAttribute:
8479
 * @ctxt:  an XML parser context
8480
 * @value:  a xmlChar ** used to store the value of the attribute
8481
 *
8482
 * DEPRECATED: Internal function, don't use.
8483
 *
8484
 * parse an attribute
8485
 *
8486
 * [41] Attribute ::= Name Eq AttValue
8487
 *
8488
 * [ WFC: No External Entity References ]
8489
 * Attribute values cannot contain direct or indirect entity references
8490
 * to external entities.
8491
 *
8492
 * [ WFC: No < in Attribute Values ]
8493
 * The replacement text of any entity referred to directly or indirectly in
8494
 * an attribute value (other than "&lt;") must not contain a <.
8495
 *
8496
 * [ VC: Attribute Value Type ]
8497
 * The attribute must have been declared; the value must be of the type
8498
 * declared for it.
8499
 *
8500
 * [25] Eq ::= S? '=' S?
8501
 *
8502
 * With namespace:
8503
 *
8504
 * [NS 11] Attribute ::= QName Eq AttValue
8505
 *
8506
 * Also the case QName == xmlns:??? is handled independently as a namespace
8507
 * definition.
8508
 *
8509
 * Returns the attribute name, and the value in *value.
8510
 */
8511
8512
const xmlChar *
8513
11.0M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8514
11.0M
    const xmlChar *name;
8515
11.0M
    xmlChar *val;
8516
8517
11.0M
    *value = NULL;
8518
11.0M
    GROW;
8519
11.0M
    name = xmlParseName(ctxt);
8520
11.0M
    if (name == NULL) {
8521
1.92M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8522
1.92M
                 "error parsing attribute name\n");
8523
1.92M
        return(NULL);
8524
1.92M
    }
8525
8526
    /*
8527
     * read the value
8528
     */
8529
9.12M
    SKIP_BLANKS;
8530
9.12M
    if (RAW == '=') {
8531
8.21M
        NEXT;
8532
8.21M
  SKIP_BLANKS;
8533
8.21M
  val = xmlParseAttValue(ctxt);
8534
8.21M
  ctxt->instate = XML_PARSER_CONTENT;
8535
8.21M
    } else {
8536
910k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8537
910k
         "Specification mandates value for attribute %s\n", name);
8538
910k
  return(NULL);
8539
910k
    }
8540
8541
    /*
8542
     * Check that xml:lang conforms to the specification
8543
     * No more registered as an error, just generate a warning now
8544
     * since this was deprecated in XML second edition
8545
     */
8546
8.21M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8547
7.41k
  if (!xmlCheckLanguageID(val)) {
8548
4.23k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8549
4.23k
              "Malformed value for xml:lang : %s\n",
8550
4.23k
        val, NULL);
8551
4.23k
  }
8552
7.41k
    }
8553
8554
    /*
8555
     * Check that xml:space conforms to the specification
8556
     */
8557
8.21M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8558
17.3k
  if (xmlStrEqual(val, BAD_CAST "default"))
8559
0
      *(ctxt->space) = 0;
8560
17.3k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8561
10.1k
      *(ctxt->space) = 1;
8562
7.18k
  else {
8563
7.18k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8564
7.18k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8565
7.18k
                                 val, NULL);
8566
7.18k
  }
8567
17.3k
    }
8568
8569
8.21M
    *value = val;
8570
8.21M
    return(name);
8571
9.12M
}
8572
8573
/**
8574
 * xmlParseStartTag:
8575
 * @ctxt:  an XML parser context
8576
 *
8577
 * DEPRECATED: Internal function, don't use.
8578
 *
8579
 * parse a start of tag either for rule element or
8580
 * EmptyElement. In both case we don't parse the tag closing chars.
8581
 *
8582
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8583
 *
8584
 * [ WFC: Unique Att Spec ]
8585
 * No attribute name may appear more than once in the same start-tag or
8586
 * empty-element tag.
8587
 *
8588
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8589
 *
8590
 * [ WFC: Unique Att Spec ]
8591
 * No attribute name may appear more than once in the same start-tag or
8592
 * empty-element tag.
8593
 *
8594
 * With namespace:
8595
 *
8596
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8597
 *
8598
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8599
 *
8600
 * Returns the element name parsed
8601
 */
8602
8603
const xmlChar *
8604
20.2M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8605
20.2M
    const xmlChar *name;
8606
20.2M
    const xmlChar *attname;
8607
20.2M
    xmlChar *attvalue;
8608
20.2M
    const xmlChar **atts = ctxt->atts;
8609
20.2M
    int nbatts = 0;
8610
20.2M
    int maxatts = ctxt->maxatts;
8611
20.2M
    int i;
8612
8613
20.2M
    if (RAW != '<') return(NULL);
8614
20.2M
    NEXT1;
8615
8616
20.2M
    name = xmlParseName(ctxt);
8617
20.2M
    if (name == NULL) {
8618
4.53M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8619
4.53M
       "xmlParseStartTag: invalid element name\n");
8620
4.53M
        return(NULL);
8621
4.53M
    }
8622
8623
    /*
8624
     * Now parse the attributes, it ends up with the ending
8625
     *
8626
     * (S Attribute)* S?
8627
     */
8628
15.6M
    SKIP_BLANKS;
8629
15.6M
    GROW;
8630
8631
18.2M
    while (((RAW != '>') &&
8632
18.2M
     ((RAW != '/') || (NXT(1) != '>')) &&
8633
18.2M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8634
11.0M
        int id = ctxt->input->id;
8635
11.0M
  unsigned long cons = CUR_CONSUMED;
8636
8637
11.0M
  attname = xmlParseAttribute(ctxt, &attvalue);
8638
11.0M
        if ((attname != NULL) && (attvalue != NULL)) {
8639
      /*
8640
       * [ WFC: Unique Att Spec ]
8641
       * No attribute name may appear more than once in the same
8642
       * start-tag or empty-element tag.
8643
       */
8644
8.91M
      for (i = 0; i < nbatts;i += 2) {
8645
823k
          if (xmlStrEqual(atts[i], attname)) {
8646
16.5k
        xmlErrAttributeDup(ctxt, NULL, attname);
8647
16.5k
        xmlFree(attvalue);
8648
16.5k
        goto failed;
8649
16.5k
    }
8650
823k
      }
8651
      /*
8652
       * Add the pair to atts
8653
       */
8654
8.09M
      if (atts == NULL) {
8655
2.28M
          maxatts = 22; /* allow for 10 attrs by default */
8656
2.28M
          atts = (const xmlChar **)
8657
2.28M
           xmlMalloc(maxatts * sizeof(xmlChar *));
8658
2.28M
    if (atts == NULL) {
8659
0
        xmlErrMemory(ctxt, NULL);
8660
0
        if (attvalue != NULL)
8661
0
      xmlFree(attvalue);
8662
0
        goto failed;
8663
0
    }
8664
2.28M
    ctxt->atts = atts;
8665
2.28M
    ctxt->maxatts = maxatts;
8666
5.80M
      } else if (nbatts + 4 > maxatts) {
8667
97
          const xmlChar **n;
8668
8669
97
          maxatts *= 2;
8670
97
          n = (const xmlChar **) xmlRealloc((void *) atts,
8671
97
               maxatts * sizeof(const xmlChar *));
8672
97
    if (n == NULL) {
8673
0
        xmlErrMemory(ctxt, NULL);
8674
0
        if (attvalue != NULL)
8675
0
      xmlFree(attvalue);
8676
0
        goto failed;
8677
0
    }
8678
97
    atts = n;
8679
97
    ctxt->atts = atts;
8680
97
    ctxt->maxatts = maxatts;
8681
97
      }
8682
8.09M
      atts[nbatts++] = attname;
8683
8.09M
      atts[nbatts++] = attvalue;
8684
8.09M
      atts[nbatts] = NULL;
8685
8.09M
      atts[nbatts + 1] = NULL;
8686
8.09M
  } else {
8687
2.93M
      if (attvalue != NULL)
8688
0
    xmlFree(attvalue);
8689
2.93M
  }
8690
8691
11.0M
failed:
8692
8693
11.0M
  GROW
8694
11.0M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8695
6.58M
      break;
8696
4.46M
  if (SKIP_BLANKS == 0) {
8697
3.63M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8698
3.63M
         "attributes construct error\n");
8699
3.63M
  }
8700
4.46M
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8701
4.46M
            (attname == NULL) && (attvalue == NULL)) {
8702
1.92M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8703
1.92M
         "xmlParseStartTag: problem parsing attributes\n");
8704
1.92M
      break;
8705
1.92M
  }
8706
2.53M
  SHRINK;
8707
2.53M
        GROW;
8708
2.53M
    }
8709
8710
    /*
8711
     * SAX: Start of Element !
8712
     */
8713
15.6M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8714
15.6M
  (!ctxt->disableSAX)) {
8715
6.24M
  if (nbatts > 0)
8716
3.89M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8717
2.35M
  else
8718
2.35M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8719
6.24M
    }
8720
8721
15.6M
    if (atts != NULL) {
8722
        /* Free only the content strings */
8723
22.9M
        for (i = 1;i < nbatts;i+=2)
8724
8.09M
      if (atts[i] != NULL)
8725
8.09M
         xmlFree((xmlChar *) atts[i]);
8726
14.8M
    }
8727
15.6M
    return(name);
8728
15.6M
}
8729
8730
/**
8731
 * xmlParseEndTag1:
8732
 * @ctxt:  an XML parser context
8733
 * @line:  line of the start tag
8734
 * @nsNr:  number of namespaces on the start tag
8735
 *
8736
 * parse an end of tag
8737
 *
8738
 * [42] ETag ::= '</' Name S? '>'
8739
 *
8740
 * With namespace
8741
 *
8742
 * [NS 9] ETag ::= '</' QName S? '>'
8743
 */
8744
8745
static void
8746
5.10M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8747
5.10M
    const xmlChar *name;
8748
8749
5.10M
    GROW;
8750
5.10M
    if ((RAW != '<') || (NXT(1) != '/')) {
8751
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8752
0
           "xmlParseEndTag: '</' not found\n");
8753
0
  return;
8754
0
    }
8755
5.10M
    SKIP(2);
8756
8757
5.10M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8758
8759
    /*
8760
     * We should definitely be at the ending "S? '>'" part
8761
     */
8762
5.10M
    GROW;
8763
5.10M
    SKIP_BLANKS;
8764
5.10M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8765
925k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8766
925k
    } else
8767
4.17M
  NEXT1;
8768
8769
    /*
8770
     * [ WFC: Element Type Match ]
8771
     * The Name in an element's end-tag must match the element type in the
8772
     * start-tag.
8773
     *
8774
     */
8775
5.10M
    if (name != (xmlChar*)1) {
8776
1.78M
        if (name == NULL) name = BAD_CAST "unparsable";
8777
1.78M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8778
1.78M
         "Opening and ending tag mismatch: %s line %d and %s\n",
8779
1.78M
                    ctxt->name, line, name);
8780
1.78M
    }
8781
8782
    /*
8783
     * SAX: End of Tag
8784
     */
8785
5.10M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8786
5.10M
  (!ctxt->disableSAX))
8787
841k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8788
8789
5.10M
    namePop(ctxt);
8790
5.10M
    spacePop(ctxt);
8791
5.10M
    return;
8792
5.10M
}
8793
8794
/**
8795
 * xmlParseEndTag:
8796
 * @ctxt:  an XML parser context
8797
 *
8798
 * DEPRECATED: Internal function, don't use.
8799
 *
8800
 * parse an end of tag
8801
 *
8802
 * [42] ETag ::= '</' Name S? '>'
8803
 *
8804
 * With namespace
8805
 *
8806
 * [NS 9] ETag ::= '</' QName S? '>'
8807
 */
8808
8809
void
8810
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8811
0
    xmlParseEndTag1(ctxt, 0);
8812
0
}
8813
#endif /* LIBXML_SAX1_ENABLED */
8814
8815
/************************************************************************
8816
 *                  *
8817
 *          SAX 2 specific operations       *
8818
 *                  *
8819
 ************************************************************************/
8820
8821
/*
8822
 * xmlGetNamespace:
8823
 * @ctxt:  an XML parser context
8824
 * @prefix:  the prefix to lookup
8825
 *
8826
 * Lookup the namespace name for the @prefix (which ca be NULL)
8827
 * The prefix must come from the @ctxt->dict dictionary
8828
 *
8829
 * Returns the namespace name or NULL if not bound
8830
 */
8831
static const xmlChar *
8832
2.82M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8833
2.82M
    int i;
8834
8835
2.82M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8836
4.77M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8837
2.62M
        if (ctxt->nsTab[i] == prefix) {
8838
587k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8839
2.21k
          return(NULL);
8840
585k
      return(ctxt->nsTab[i + 1]);
8841
587k
  }
8842
2.15M
    return(NULL);
8843
2.73M
}
8844
8845
/**
8846
 * xmlParseQName:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  pointer to store the prefix part
8849
 *
8850
 * parse an XML Namespace QName
8851
 *
8852
 * [6]  QName  ::= (Prefix ':')? LocalPart
8853
 * [7]  Prefix  ::= NCName
8854
 * [8]  LocalPart  ::= NCName
8855
 *
8856
 * Returns the Name parsed or NULL
8857
 */
8858
8859
static const xmlChar *
8860
4.15M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8861
4.15M
    const xmlChar *l, *p;
8862
8863
4.15M
    GROW;
8864
8865
4.15M
    l = xmlParseNCName(ctxt);
8866
4.15M
    if (l == NULL) {
8867
347k
        if (CUR == ':') {
8868
4.72k
      l = xmlParseName(ctxt);
8869
4.72k
      if (l != NULL) {
8870
4.72k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8871
4.72k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8872
4.72k
    *prefix = NULL;
8873
4.72k
    return(l);
8874
4.72k
      }
8875
4.72k
  }
8876
342k
        return(NULL);
8877
347k
    }
8878
3.81M
    if (CUR == ':') {
8879
1.21M
        NEXT;
8880
1.21M
  p = l;
8881
1.21M
  l = xmlParseNCName(ctxt);
8882
1.21M
  if (l == NULL) {
8883
15.9k
      xmlChar *tmp;
8884
8885
15.9k
            if (ctxt->instate == XML_PARSER_EOF)
8886
0
                return(NULL);
8887
15.9k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8888
15.9k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8889
15.9k
      l = xmlParseNmtoken(ctxt);
8890
15.9k
      if (l == NULL) {
8891
9.22k
                if (ctxt->instate == XML_PARSER_EOF)
8892
0
                    return(NULL);
8893
9.22k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8894
9.22k
            } else {
8895
6.70k
    tmp = xmlBuildQName(l, p, NULL, 0);
8896
6.70k
    xmlFree((char *)l);
8897
6.70k
      }
8898
15.9k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8899
15.9k
      if (tmp != NULL) xmlFree(tmp);
8900
15.9k
      *prefix = NULL;
8901
15.9k
      return(p);
8902
15.9k
  }
8903
1.20M
  if (CUR == ':') {
8904
6.69k
      xmlChar *tmp;
8905
8906
6.69k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8907
6.69k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8908
6.69k
      NEXT;
8909
6.69k
      tmp = (xmlChar *) xmlParseName(ctxt);
8910
6.69k
      if (tmp != NULL) {
8911
5.28k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8912
5.28k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8913
5.28k
    if (tmp != NULL) xmlFree(tmp);
8914
5.28k
    *prefix = p;
8915
5.28k
    return(l);
8916
5.28k
      }
8917
1.40k
            if (ctxt->instate == XML_PARSER_EOF)
8918
0
                return(NULL);
8919
1.40k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8920
1.40k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8921
1.40k
      if (tmp != NULL) xmlFree(tmp);
8922
1.40k
      *prefix = p;
8923
1.40k
      return(l);
8924
1.40k
  }
8925
1.19M
  *prefix = p;
8926
1.19M
    } else
8927
2.59M
        *prefix = NULL;
8928
3.78M
    return(l);
8929
3.81M
}
8930
8931
/**
8932
 * xmlParseQNameAndCompare:
8933
 * @ctxt:  an XML parser context
8934
 * @name:  the localname
8935
 * @prefix:  the prefix, if any.
8936
 *
8937
 * parse an XML name and compares for match
8938
 * (specialized for endtag parsing)
8939
 *
8940
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8941
 * and the name for mismatch
8942
 */
8943
8944
static const xmlChar *
8945
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8946
140k
                        xmlChar const *prefix) {
8947
140k
    const xmlChar *cmp;
8948
140k
    const xmlChar *in;
8949
140k
    const xmlChar *ret;
8950
140k
    const xmlChar *prefix2;
8951
8952
140k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8953
8954
140k
    GROW;
8955
140k
    in = ctxt->input->cur;
8956
8957
140k
    cmp = prefix;
8958
458k
    while (*in != 0 && *in == *cmp) {
8959
318k
  ++in;
8960
318k
  ++cmp;
8961
318k
    }
8962
140k
    if ((*cmp == 0) && (*in == ':')) {
8963
126k
        in++;
8964
126k
  cmp = name;
8965
1.00M
  while (*in != 0 && *in == *cmp) {
8966
878k
      ++in;
8967
878k
      ++cmp;
8968
878k
  }
8969
126k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8970
      /* success */
8971
112k
            ctxt->input->col += in - ctxt->input->cur;
8972
112k
      ctxt->input->cur = in;
8973
112k
      return((const xmlChar*) 1);
8974
112k
  }
8975
126k
    }
8976
    /*
8977
     * all strings coms from the dictionary, equality can be done directly
8978
     */
8979
27.6k
    ret = xmlParseQName (ctxt, &prefix2);
8980
27.6k
    if ((ret == name) && (prefix == prefix2))
8981
580
  return((const xmlChar*) 1);
8982
27.0k
    return ret;
8983
27.6k
}
8984
8985
/**
8986
 * xmlParseAttValueInternal:
8987
 * @ctxt:  an XML parser context
8988
 * @len:  attribute len result
8989
 * @alloc:  whether the attribute was reallocated as a new string
8990
 * @normalize:  if 1 then further non-CDATA normalization must be done
8991
 *
8992
 * parse a value for an attribute.
8993
 * NOTE: if no normalization is needed, the routine will return pointers
8994
 *       directly from the data buffer.
8995
 *
8996
 * 3.3.3 Attribute-Value Normalization:
8997
 * Before the value of an attribute is passed to the application or
8998
 * checked for validity, the XML processor must normalize it as follows:
8999
 * - a character reference is processed by appending the referenced
9000
 *   character to the attribute value
9001
 * - an entity reference is processed by recursively processing the
9002
 *   replacement text of the entity
9003
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9004
 *   appending #x20 to the normalized value, except that only a single
9005
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9006
 *   parsed entity or the literal entity value of an internal parsed entity
9007
 * - other characters are processed by appending them to the normalized value
9008
 * If the declared value is not CDATA, then the XML processor must further
9009
 * process the normalized attribute value by discarding any leading and
9010
 * trailing space (#x20) characters, and by replacing sequences of space
9011
 * (#x20) characters by a single space (#x20) character.
9012
 * All attributes for which no declaration has been read should be treated
9013
 * by a non-validating parser as if declared CDATA.
9014
 *
9015
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9016
 *     caller if it was copied, this can be detected by val[*len] == 0.
9017
 */
9018
9019
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9020
26.5k
    const xmlChar *oldbase = ctxt->input->base;\
9021
26.5k
    GROW;\
9022
26.5k
    if (ctxt->instate == XML_PARSER_EOF)\
9023
26.5k
        return(NULL);\
9024
26.5k
    if (oldbase != ctxt->input->base) {\
9025
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9026
0
        start = start + delta;\
9027
0
        in = in + delta;\
9028
0
    }\
9029
26.5k
    end = ctxt->input->end;
9030
9031
static xmlChar *
9032
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9033
                         int normalize)
9034
10.8M
{
9035
10.8M
    xmlChar limit = 0;
9036
10.8M
    const xmlChar *in = NULL, *start, *end, *last;
9037
10.8M
    xmlChar *ret = NULL;
9038
10.8M
    int line, col;
9039
10.8M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9040
9.20M
                    XML_MAX_HUGE_LENGTH :
9041
10.8M
                    XML_MAX_TEXT_LENGTH;
9042
9043
10.8M
    GROW;
9044
10.8M
    in = (xmlChar *) CUR_PTR;
9045
10.8M
    line = ctxt->input->line;
9046
10.8M
    col = ctxt->input->col;
9047
10.8M
    if (*in != '"' && *in != '\'') {
9048
194k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9049
194k
        return (NULL);
9050
194k
    }
9051
10.6M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9052
9053
    /*
9054
     * try to handle in this routine the most common case where no
9055
     * allocation of a new string is required and where content is
9056
     * pure ASCII.
9057
     */
9058
10.6M
    limit = *in++;
9059
10.6M
    col++;
9060
10.6M
    end = ctxt->input->end;
9061
10.6M
    start = in;
9062
10.6M
    if (in >= end) {
9063
3.73k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9064
3.73k
    }
9065
10.6M
    if (normalize) {
9066
        /*
9067
   * Skip any leading spaces
9068
   */
9069
277k
  while ((in < end) && (*in != limit) &&
9070
277k
         ((*in == 0x20) || (*in == 0x9) ||
9071
275k
          (*in == 0xA) || (*in == 0xD))) {
9072
209k
      if (*in == 0xA) {
9073
23.8k
          line++; col = 1;
9074
185k
      } else {
9075
185k
          col++;
9076
185k
      }
9077
209k
      in++;
9078
209k
      start = in;
9079
209k
      if (in >= end) {
9080
159
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9081
159
                if ((in - start) > maxLength) {
9082
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9083
0
                                   "AttValue length too long\n");
9084
0
                    return(NULL);
9085
0
                }
9086
159
      }
9087
209k
  }
9088
372k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9089
372k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9090
307k
      col++;
9091
307k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9092
304k
      if (in >= end) {
9093
250
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9094
250
                if ((in - start) > maxLength) {
9095
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9096
0
                                   "AttValue length too long\n");
9097
0
                    return(NULL);
9098
0
                }
9099
250
      }
9100
304k
  }
9101
67.9k
  last = in;
9102
  /*
9103
   * skip the trailing blanks
9104
   */
9105
72.1k
  while ((last[-1] == 0x20) && (last > start)) last--;
9106
309k
  while ((in < end) && (*in != limit) &&
9107
309k
         ((*in == 0x20) || (*in == 0x9) ||
9108
268k
          (*in == 0xA) || (*in == 0xD))) {
9109
241k
      if (*in == 0xA) {
9110
16.7k
          line++, col = 1;
9111
225k
      } else {
9112
225k
          col++;
9113
225k
      }
9114
241k
      in++;
9115
241k
      if (in >= end) {
9116
193
    const xmlChar *oldbase = ctxt->input->base;
9117
193
    GROW;
9118
193
                if (ctxt->instate == XML_PARSER_EOF)
9119
0
                    return(NULL);
9120
193
    if (oldbase != ctxt->input->base) {
9121
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9122
0
        start = start + delta;
9123
0
        in = in + delta;
9124
0
        last = last + delta;
9125
0
    }
9126
193
    end = ctxt->input->end;
9127
193
                if ((in - start) > maxLength) {
9128
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9129
0
                                   "AttValue length too long\n");
9130
0
                    return(NULL);
9131
0
                }
9132
193
      }
9133
241k
  }
9134
67.9k
        if ((in - start) > maxLength) {
9135
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9136
0
                           "AttValue length too long\n");
9137
0
            return(NULL);
9138
0
        }
9139
67.9k
  if (*in != limit) goto need_complex;
9140
10.5M
    } else {
9141
148M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9142
148M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9143
137M
      in++;
9144
137M
      col++;
9145
137M
      if (in >= end) {
9146
22.4k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9147
22.4k
                if ((in - start) > maxLength) {
9148
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9149
0
                                   "AttValue length too long\n");
9150
0
                    return(NULL);
9151
0
                }
9152
22.4k
      }
9153
137M
  }
9154
10.5M
  last = in;
9155
10.5M
        if ((in - start) > maxLength) {
9156
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9157
0
                           "AttValue length too long\n");
9158
0
            return(NULL);
9159
0
        }
9160
10.5M
  if (*in != limit) goto need_complex;
9161
10.5M
    }
9162
7.43M
    in++;
9163
7.43M
    col++;
9164
7.43M
    if (len != NULL) {
9165
973k
        if (alloc) *alloc = 0;
9166
973k
        *len = last - start;
9167
973k
        ret = (xmlChar *) start;
9168
6.45M
    } else {
9169
6.45M
        if (alloc) *alloc = 1;
9170
6.45M
        ret = xmlStrndup(start, last - start);
9171
6.45M
    }
9172
7.43M
    CUR_PTR = in;
9173
7.43M
    ctxt->input->line = line;
9174
7.43M
    ctxt->input->col = col;
9175
7.43M
    return ret;
9176
3.19M
need_complex:
9177
3.19M
    if (alloc) *alloc = 1;
9178
3.19M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9179
10.6M
}
9180
9181
/**
9182
 * xmlParseAttribute2:
9183
 * @ctxt:  an XML parser context
9184
 * @pref:  the element prefix
9185
 * @elem:  the element name
9186
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9187
 * @value:  a xmlChar ** used to store the value of the attribute
9188
 * @len:  an int * to save the length of the attribute
9189
 * @alloc:  an int * to indicate if the attribute was allocated
9190
 *
9191
 * parse an attribute in the new SAX2 framework.
9192
 *
9193
 * Returns the attribute name, and the value in *value, .
9194
 */
9195
9196
static const xmlChar *
9197
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9198
                   const xmlChar * pref, const xmlChar * elem,
9199
                   const xmlChar ** prefix, xmlChar ** value,
9200
                   int *len, int *alloc)
9201
1.34M
{
9202
1.34M
    const xmlChar *name;
9203
1.34M
    xmlChar *val, *internal_val = NULL;
9204
1.34M
    int normalize = 0;
9205
9206
1.34M
    *value = NULL;
9207
1.34M
    GROW;
9208
1.34M
    name = xmlParseQName(ctxt, prefix);
9209
1.34M
    if (name == NULL) {
9210
94.1k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9211
94.1k
                       "error parsing attribute name\n");
9212
94.1k
        return (NULL);
9213
94.1k
    }
9214
9215
    /*
9216
     * get the type if needed
9217
     */
9218
1.25M
    if (ctxt->attsSpecial != NULL) {
9219
254k
        int type;
9220
9221
254k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9222
254k
                                                 pref, elem, *prefix, name);
9223
254k
        if (type != 0)
9224
68.5k
            normalize = 1;
9225
254k
    }
9226
9227
    /*
9228
     * read the value
9229
     */
9230
1.25M
    SKIP_BLANKS;
9231
1.25M
    if (RAW == '=') {
9232
1.20M
        NEXT;
9233
1.20M
        SKIP_BLANKS;
9234
1.20M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9235
1.20M
  if (normalize) {
9236
      /*
9237
       * Sometimes a second normalisation pass for spaces is needed
9238
       * but that only happens if charrefs or entities references
9239
       * have been used in the attribute value, i.e. the attribute
9240
       * value have been extracted in an allocated string already.
9241
       */
9242
68.2k
      if (*alloc) {
9243
27.1k
          const xmlChar *val2;
9244
9245
27.1k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9246
27.1k
    if ((val2 != NULL) && (val2 != val)) {
9247
3.43k
        xmlFree(val);
9248
3.43k
        val = (xmlChar *) val2;
9249
3.43k
    }
9250
27.1k
      }
9251
68.2k
  }
9252
1.20M
        ctxt->instate = XML_PARSER_CONTENT;
9253
1.20M
    } else {
9254
46.7k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9255
46.7k
                          "Specification mandates value for attribute %s\n",
9256
46.7k
                          name);
9257
46.7k
        return (NULL);
9258
46.7k
    }
9259
9260
1.20M
    if (*prefix == ctxt->str_xml) {
9261
        /*
9262
         * Check that xml:lang conforms to the specification
9263
         * No more registered as an error, just generate a warning now
9264
         * since this was deprecated in XML second edition
9265
         */
9266
90.0k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9267
9.11k
            internal_val = xmlStrndup(val, *len);
9268
9.11k
            if (!xmlCheckLanguageID(internal_val)) {
9269
6.01k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9270
6.01k
                              "Malformed value for xml:lang : %s\n",
9271
6.01k
                              internal_val, NULL);
9272
6.01k
            }
9273
9.11k
        }
9274
9275
        /*
9276
         * Check that xml:space conforms to the specification
9277
         */
9278
90.0k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9279
1.74k
            internal_val = xmlStrndup(val, *len);
9280
1.74k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9281
0
                *(ctxt->space) = 0;
9282
1.74k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9283
667
                *(ctxt->space) = 1;
9284
1.07k
            else {
9285
1.07k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9286
1.07k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9287
1.07k
                              internal_val, NULL);
9288
1.07k
            }
9289
1.74k
        }
9290
90.0k
        if (internal_val) {
9291
10.3k
            xmlFree(internal_val);
9292
10.3k
        }
9293
90.0k
    }
9294
9295
1.20M
    *value = val;
9296
1.20M
    return (name);
9297
1.25M
}
9298
/**
9299
 * xmlParseStartTag2:
9300
 * @ctxt:  an XML parser context
9301
 *
9302
 * parse a start of tag either for rule element or
9303
 * EmptyElement. In both case we don't parse the tag closing chars.
9304
 * This routine is called when running SAX2 parsing
9305
 *
9306
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9307
 *
9308
 * [ WFC: Unique Att Spec ]
9309
 * No attribute name may appear more than once in the same start-tag or
9310
 * empty-element tag.
9311
 *
9312
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9313
 *
9314
 * [ WFC: Unique Att Spec ]
9315
 * No attribute name may appear more than once in the same start-tag or
9316
 * empty-element tag.
9317
 *
9318
 * With namespace:
9319
 *
9320
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9321
 *
9322
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9323
 *
9324
 * Returns the element name parsed
9325
 */
9326
9327
static const xmlChar *
9328
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9329
2.78M
                  const xmlChar **URI, int *tlen) {
9330
2.78M
    const xmlChar *localname;
9331
2.78M
    const xmlChar *prefix;
9332
2.78M
    const xmlChar *attname;
9333
2.78M
    const xmlChar *aprefix;
9334
2.78M
    const xmlChar *nsname;
9335
2.78M
    xmlChar *attvalue;
9336
2.78M
    const xmlChar **atts = ctxt->atts;
9337
2.78M
    int maxatts = ctxt->maxatts;
9338
2.78M
    int nratts, nbatts, nbdef, inputid;
9339
2.78M
    int i, j, nbNs, attval;
9340
2.78M
    unsigned long cur;
9341
2.78M
    int nsNr = ctxt->nsNr;
9342
9343
2.78M
    if (RAW != '<') return(NULL);
9344
2.78M
    NEXT1;
9345
9346
    /*
9347
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9348
     *       point since the attribute values may be stored as pointers to
9349
     *       the buffer and calling SHRINK would destroy them !
9350
     *       The Shrinking is only possible once the full set of attribute
9351
     *       callbacks have been done.
9352
     */
9353
2.78M
    SHRINK;
9354
2.78M
    cur = ctxt->input->cur - ctxt->input->base;
9355
2.78M
    inputid = ctxt->input->id;
9356
2.78M
    nbatts = 0;
9357
2.78M
    nratts = 0;
9358
2.78M
    nbdef = 0;
9359
2.78M
    nbNs = 0;
9360
2.78M
    attval = 0;
9361
    /* Forget any namespaces added during an earlier parse of this element. */
9362
2.78M
    ctxt->nsNr = nsNr;
9363
9364
2.78M
    localname = xmlParseQName(ctxt, &prefix);
9365
2.78M
    if (localname == NULL) {
9366
247k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9367
247k
           "StartTag: invalid element name\n");
9368
247k
        return(NULL);
9369
247k
    }
9370
2.53M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9371
9372
    /*
9373
     * Now parse the attributes, it ends up with the ending
9374
     *
9375
     * (S Attribute)* S?
9376
     */
9377
2.53M
    SKIP_BLANKS;
9378
2.53M
    GROW;
9379
9380
2.76M
    while (((RAW != '>') &&
9381
2.76M
     ((RAW != '/') || (NXT(1) != '>')) &&
9382
2.76M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9383
1.34M
  int id = ctxt->input->id;
9384
1.34M
  unsigned long cons = CUR_CONSUMED;
9385
1.34M
  int len = -1, alloc = 0;
9386
9387
1.34M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9388
1.34M
                               &aprefix, &attvalue, &len, &alloc);
9389
1.34M
        if ((attname == NULL) || (attvalue == NULL))
9390
148k
            goto next_attr;
9391
1.19M
  if (len < 0) len = xmlStrlen(attvalue);
9392
9393
1.19M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9394
23.9k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9395
23.9k
            xmlURIPtr uri;
9396
9397
23.9k
            if (URL == NULL) {
9398
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9399
0
                if ((attvalue != NULL) && (alloc != 0))
9400
0
                    xmlFree(attvalue);
9401
0
                localname = NULL;
9402
0
                goto done;
9403
0
            }
9404
23.9k
            if (*URL != 0) {
9405
23.0k
                uri = xmlParseURI((const char *) URL);
9406
23.0k
                if (uri == NULL) {
9407
5.57k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9408
5.57k
                             "xmlns: '%s' is not a valid URI\n",
9409
5.57k
                                       URL, NULL, NULL);
9410
17.4k
                } else {
9411
17.4k
                    if (uri->scheme == NULL) {
9412
5.32k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9413
5.32k
                                  "xmlns: URI %s is not absolute\n",
9414
5.32k
                                  URL, NULL, NULL);
9415
5.32k
                    }
9416
17.4k
                    xmlFreeURI(uri);
9417
17.4k
                }
9418
23.0k
                if (URL == ctxt->str_xml_ns) {
9419
0
                    if (attname != ctxt->str_xml) {
9420
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9421
0
                     "xml namespace URI cannot be the default namespace\n",
9422
0
                                 NULL, NULL, NULL);
9423
0
                    }
9424
0
                    goto next_attr;
9425
0
                }
9426
23.0k
                if ((len == 29) &&
9427
23.0k
                    (xmlStrEqual(URL,
9428
280
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9429
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9430
0
                         "reuse of the xmlns namespace name is forbidden\n",
9431
0
                             NULL, NULL, NULL);
9432
0
                    goto next_attr;
9433
0
                }
9434
23.0k
            }
9435
            /*
9436
             * check that it's not a defined namespace
9437
             */
9438
32.1k
            for (j = 1;j <= nbNs;j++)
9439
10.6k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9440
2.43k
                    break;
9441
23.9k
            if (j <= nbNs)
9442
2.43k
                xmlErrAttributeDup(ctxt, NULL, attname);
9443
21.5k
            else
9444
21.5k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9445
9446
1.17M
        } else if (aprefix == ctxt->str_xmlns) {
9447
178k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9448
178k
            xmlURIPtr uri;
9449
9450
178k
            if (attname == ctxt->str_xml) {
9451
365
                if (URL != ctxt->str_xml_ns) {
9452
365
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9453
365
                             "xml namespace prefix mapped to wrong URI\n",
9454
365
                             NULL, NULL, NULL);
9455
365
                }
9456
                /*
9457
                 * Do not keep a namespace definition node
9458
                 */
9459
365
                goto next_attr;
9460
365
            }
9461
178k
            if (URL == ctxt->str_xml_ns) {
9462
0
                if (attname != ctxt->str_xml) {
9463
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9464
0
                             "xml namespace URI mapped to wrong prefix\n",
9465
0
                             NULL, NULL, NULL);
9466
0
                }
9467
0
                goto next_attr;
9468
0
            }
9469
178k
            if (attname == ctxt->str_xmlns) {
9470
219
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9471
219
                         "redefinition of the xmlns prefix is forbidden\n",
9472
219
                         NULL, NULL, NULL);
9473
219
                goto next_attr;
9474
219
            }
9475
178k
            if ((len == 29) &&
9476
178k
                (xmlStrEqual(URL,
9477
9.16k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9478
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9479
0
                         "reuse of the xmlns namespace name is forbidden\n",
9480
0
                         NULL, NULL, NULL);
9481
0
                goto next_attr;
9482
0
            }
9483
178k
            if ((URL == NULL) || (URL[0] == 0)) {
9484
444
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9485
444
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9486
444
                              attname, NULL, NULL);
9487
444
                goto next_attr;
9488
177k
            } else {
9489
177k
                uri = xmlParseURI((const char *) URL);
9490
177k
                if (uri == NULL) {
9491
15.6k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9492
15.6k
                         "xmlns:%s: '%s' is not a valid URI\n",
9493
15.6k
                                       attname, URL, NULL);
9494
162k
                } else {
9495
162k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9496
1.05k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9497
1.05k
                                  "xmlns:%s: URI %s is not absolute\n",
9498
1.05k
                                  attname, URL, NULL);
9499
1.05k
                    }
9500
162k
                    xmlFreeURI(uri);
9501
162k
                }
9502
177k
            }
9503
9504
            /*
9505
             * check that it's not a defined namespace
9506
             */
9507
208k
            for (j = 1;j <= nbNs;j++)
9508
33.5k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9509
2.93k
                    break;
9510
177k
            if (j <= nbNs)
9511
2.93k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9512
175k
            else
9513
175k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9514
9515
996k
        } else {
9516
            /*
9517
             * Add the pair to atts
9518
             */
9519
996k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9520
216k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9521
0
                    goto next_attr;
9522
0
                }
9523
216k
                maxatts = ctxt->maxatts;
9524
216k
                atts = ctxt->atts;
9525
216k
            }
9526
996k
            ctxt->attallocs[nratts++] = alloc;
9527
996k
            atts[nbatts++] = attname;
9528
996k
            atts[nbatts++] = aprefix;
9529
            /*
9530
             * The namespace URI field is used temporarily to point at the
9531
             * base of the current input buffer for non-alloced attributes.
9532
             * When the input buffer is reallocated, all the pointers become
9533
             * invalid, but they can be reconstructed later.
9534
             */
9535
996k
            if (alloc)
9536
206k
                atts[nbatts++] = NULL;
9537
789k
            else
9538
789k
                atts[nbatts++] = ctxt->input->base;
9539
996k
            atts[nbatts++] = attvalue;
9540
996k
            attvalue += len;
9541
996k
            atts[nbatts++] = attvalue;
9542
            /*
9543
             * tag if some deallocation is needed
9544
             */
9545
996k
            if (alloc != 0) attval = 1;
9546
996k
            attvalue = NULL; /* moved into atts */
9547
996k
        }
9548
9549
1.34M
next_attr:
9550
1.34M
        if ((attvalue != NULL) && (alloc != 0)) {
9551
19.1k
            xmlFree(attvalue);
9552
19.1k
            attvalue = NULL;
9553
19.1k
        }
9554
9555
1.34M
  GROW
9556
1.34M
        if (ctxt->instate == XML_PARSER_EOF)
9557
0
            break;
9558
1.34M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9559
891k
      break;
9560
455k
  if (SKIP_BLANKS == 0) {
9561
224k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9562
224k
         "attributes construct error\n");
9563
224k
      break;
9564
224k
  }
9565
231k
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9566
231k
            (attname == NULL) && (attvalue == NULL)) {
9567
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9568
0
           "xmlParseStartTag: problem parsing attributes\n");
9569
0
      break;
9570
0
  }
9571
231k
        GROW;
9572
231k
    }
9573
9574
2.53M
    if (ctxt->input->id != inputid) {
9575
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9576
0
                    "Unexpected change of input\n");
9577
0
        localname = NULL;
9578
0
        goto done;
9579
0
    }
9580
9581
    /* Reconstruct attribute value pointers. */
9582
3.53M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9583
996k
        if (atts[i+2] != NULL) {
9584
            /*
9585
             * Arithmetic on dangling pointers is technically undefined
9586
             * behavior, but well...
9587
             */
9588
789k
            ptrdiff_t offset = ctxt->input->base - atts[i+2];
9589
789k
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9590
789k
            atts[i+3] += offset;  /* value */
9591
789k
            atts[i+4] += offset;  /* valuend */
9592
789k
        }
9593
996k
    }
9594
9595
    /*
9596
     * The attributes defaulting
9597
     */
9598
2.53M
    if (ctxt->attsDefault != NULL) {
9599
278k
        xmlDefAttrsPtr defaults;
9600
9601
278k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9602
278k
  if (defaults != NULL) {
9603
69.1k
      for (i = 0;i < defaults->nbAttrs;i++) {
9604
46.4k
          attname = defaults->values[5 * i];
9605
46.4k
    aprefix = defaults->values[5 * i + 1];
9606
9607
                /*
9608
     * special work for namespaces defaulted defs
9609
     */
9610
46.4k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9611
        /*
9612
         * check that it's not a defined namespace
9613
         */
9614
4.31k
        for (j = 1;j <= nbNs;j++)
9615
1.44k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9616
634
          break;
9617
3.50k
              if (j <= nbNs) continue;
9618
9619
2.87k
        nsname = xmlGetNamespace(ctxt, NULL);
9620
2.87k
        if (nsname != defaults->values[5 * i + 2]) {
9621
1.24k
      if (nsPush(ctxt, NULL,
9622
1.24k
                 defaults->values[5 * i + 2]) > 0)
9623
1.19k
          nbNs++;
9624
1.24k
        }
9625
42.9k
    } else if (aprefix == ctxt->str_xmlns) {
9626
        /*
9627
         * check that it's not a defined namespace
9628
         */
9629
7.10k
        for (j = 1;j <= nbNs;j++)
9630
2.84k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9631
2.07k
          break;
9632
6.34k
              if (j <= nbNs) continue;
9633
9634
4.26k
        nsname = xmlGetNamespace(ctxt, attname);
9635
4.26k
        if (nsname != defaults->values[2]) {
9636
3.00k
      if (nsPush(ctxt, attname,
9637
3.00k
                 defaults->values[5 * i + 2]) > 0)
9638
2.63k
          nbNs++;
9639
3.00k
        }
9640
36.5k
    } else {
9641
        /*
9642
         * check that it's not a defined attribute
9643
         */
9644
102k
        for (j = 0;j < nbatts;j+=5) {
9645
66.1k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9646
612
          break;
9647
66.1k
        }
9648
36.5k
        if (j < nbatts) continue;
9649
9650
35.9k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9651
3.23k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9652
0
                            localname = NULL;
9653
0
                            goto done;
9654
0
      }
9655
3.23k
      maxatts = ctxt->maxatts;
9656
3.23k
      atts = ctxt->atts;
9657
3.23k
        }
9658
35.9k
        atts[nbatts++] = attname;
9659
35.9k
        atts[nbatts++] = aprefix;
9660
35.9k
        if (aprefix == NULL)
9661
30.0k
      atts[nbatts++] = NULL;
9662
5.91k
        else
9663
5.91k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9664
35.9k
        atts[nbatts++] = defaults->values[5 * i + 2];
9665
35.9k
        atts[nbatts++] = defaults->values[5 * i + 3];
9666
35.9k
        if ((ctxt->standalone == 1) &&
9667
35.9k
            (defaults->values[5 * i + 4] != NULL)) {
9668
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9669
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9670
0
                                   attname, localname);
9671
0
        }
9672
35.9k
        nbdef++;
9673
35.9k
    }
9674
46.4k
      }
9675
22.6k
  }
9676
278k
    }
9677
9678
    /*
9679
     * The attributes checkings
9680
     */
9681
3.56M
    for (i = 0; i < nbatts;i += 5) {
9682
        /*
9683
  * The default namespace does not apply to attribute names.
9684
  */
9685
1.03M
  if (atts[i + 1] != NULL) {
9686
280k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9687
280k
      if (nsname == NULL) {
9688
111k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9689
111k
        "Namespace prefix %s for %s on %s is not defined\n",
9690
111k
        atts[i + 1], atts[i], localname);
9691
111k
      }
9692
280k
      atts[i + 2] = nsname;
9693
280k
  } else
9694
752k
      nsname = NULL;
9695
  /*
9696
   * [ WFC: Unique Att Spec ]
9697
   * No attribute name may appear more than once in the same
9698
   * start-tag or empty-element tag.
9699
   * As extended by the Namespace in XML REC.
9700
   */
9701
1.28M
        for (j = 0; j < i;j += 5) {
9702
256k
      if (atts[i] == atts[j]) {
9703
17.6k
          if (atts[i+1] == atts[j+1]) {
9704
6.43k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9705
6.43k
        break;
9706
6.43k
    }
9707
11.2k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9708
219
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9709
219
           "Namespaced Attribute %s in '%s' redefined\n",
9710
219
           atts[i], nsname, NULL);
9711
219
        break;
9712
219
    }
9713
11.2k
      }
9714
256k
  }
9715
1.03M
    }
9716
9717
2.53M
    nsname = xmlGetNamespace(ctxt, prefix);
9718
2.53M
    if ((prefix != NULL) && (nsname == NULL)) {
9719
285k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9720
285k
           "Namespace prefix %s on %s is not defined\n",
9721
285k
     prefix, localname, NULL);
9722
285k
    }
9723
2.53M
    *pref = prefix;
9724
2.53M
    *URI = nsname;
9725
9726
    /*
9727
     * SAX: Start of Element !
9728
     */
9729
2.53M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9730
2.53M
  (!ctxt->disableSAX)) {
9731
2.18M
  if (nbNs > 0)
9732
153k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9733
153k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9734
153k
        nbatts / 5, nbdef, atts);
9735
2.03M
  else
9736
2.03M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9737
2.03M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9738
2.18M
    }
9739
9740
2.53M
done:
9741
    /*
9742
     * Free up attribute allocated strings if needed
9743
     */
9744
2.53M
    if (attval != 0) {
9745
421k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9746
228k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9747
206k
          xmlFree((xmlChar *) atts[i]);
9748
192k
    }
9749
9750
2.53M
    return(localname);
9751
2.53M
}
9752
9753
/**
9754
 * xmlParseEndTag2:
9755
 * @ctxt:  an XML parser context
9756
 * @line:  line of the start tag
9757
 * @nsNr:  number of namespaces on the start tag
9758
 *
9759
 * parse an end of tag
9760
 *
9761
 * [42] ETag ::= '</' Name S? '>'
9762
 *
9763
 * With namespace
9764
 *
9765
 * [NS 9] ETag ::= '</' QName S? '>'
9766
 */
9767
9768
static void
9769
761k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9770
761k
    const xmlChar *name;
9771
9772
761k
    GROW;
9773
761k
    if ((RAW != '<') || (NXT(1) != '/')) {
9774
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9775
0
  return;
9776
0
    }
9777
761k
    SKIP(2);
9778
9779
761k
    if (tag->prefix == NULL)
9780
621k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9781
140k
    else
9782
140k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9783
9784
    /*
9785
     * We should definitely be at the ending "S? '>'" part
9786
     */
9787
761k
    GROW;
9788
761k
    if (ctxt->instate == XML_PARSER_EOF)
9789
0
        return;
9790
761k
    SKIP_BLANKS;
9791
761k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9792
57.6k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9793
57.6k
    } else
9794
704k
  NEXT1;
9795
9796
    /*
9797
     * [ WFC: Element Type Match ]
9798
     * The Name in an element's end-tag must match the element type in the
9799
     * start-tag.
9800
     *
9801
     */
9802
761k
    if (name != (xmlChar*)1) {
9803
111k
        if (name == NULL) name = BAD_CAST "unparsable";
9804
111k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9805
111k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9806
111k
                    ctxt->name, tag->line, name);
9807
111k
    }
9808
9809
    /*
9810
     * SAX: End of Tag
9811
     */
9812
761k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9813
761k
  (!ctxt->disableSAX))
9814
569k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9815
569k
                                tag->URI);
9816
9817
761k
    spacePop(ctxt);
9818
761k
    if (tag->nsNr != 0)
9819
36.9k
  nsPop(ctxt, tag->nsNr);
9820
761k
}
9821
9822
/**
9823
 * xmlParseCDSect:
9824
 * @ctxt:  an XML parser context
9825
 *
9826
 * DEPRECATED: Internal function, don't use.
9827
 *
9828
 * Parse escaped pure raw content.
9829
 *
9830
 * [18] CDSect ::= CDStart CData CDEnd
9831
 *
9832
 * [19] CDStart ::= '<![CDATA['
9833
 *
9834
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9835
 *
9836
 * [21] CDEnd ::= ']]>'
9837
 */
9838
void
9839
314k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9840
314k
    xmlChar *buf = NULL;
9841
314k
    int len = 0;
9842
314k
    int size = XML_PARSER_BUFFER_SIZE;
9843
314k
    int r, rl;
9844
314k
    int s, sl;
9845
314k
    int cur, l;
9846
314k
    int count = 0;
9847
314k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9848
289k
                    XML_MAX_HUGE_LENGTH :
9849
314k
                    XML_MAX_TEXT_LENGTH;
9850
9851
    /* Check 2.6.0 was NXT(0) not RAW */
9852
314k
    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9853
314k
  SKIP(9);
9854
314k
    } else
9855
0
        return;
9856
9857
314k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9858
314k
    r = CUR_CHAR(rl);
9859
314k
    if (!IS_CHAR(r)) {
9860
15.7k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9861
15.7k
  ctxt->instate = XML_PARSER_CONTENT;
9862
15.7k
        return;
9863
15.7k
    }
9864
298k
    NEXTL(rl);
9865
298k
    s = CUR_CHAR(sl);
9866
298k
    if (!IS_CHAR(s)) {
9867
13.1k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9868
13.1k
  ctxt->instate = XML_PARSER_CONTENT;
9869
13.1k
        return;
9870
13.1k
    }
9871
285k
    NEXTL(sl);
9872
285k
    cur = CUR_CHAR(l);
9873
285k
    buf = (xmlChar *) xmlMallocAtomic(size);
9874
285k
    if (buf == NULL) {
9875
0
  xmlErrMemory(ctxt, NULL);
9876
0
  return;
9877
0
    }
9878
30.6M
    while (IS_CHAR(cur) &&
9879
30.6M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9880
30.3M
  if (len + 5 >= size) {
9881
122k
      xmlChar *tmp;
9882
9883
122k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9884
122k
      if (tmp == NULL) {
9885
0
          xmlFree(buf);
9886
0
    xmlErrMemory(ctxt, NULL);
9887
0
    return;
9888
0
      }
9889
122k
      buf = tmp;
9890
122k
      size *= 2;
9891
122k
  }
9892
30.3M
  COPY_BUF(rl,buf,len,r);
9893
30.3M
  r = s;
9894
30.3M
  rl = sl;
9895
30.3M
  s = cur;
9896
30.3M
  sl = l;
9897
30.3M
  count++;
9898
30.3M
  if (count > 50) {
9899
514k
      SHRINK;
9900
514k
      GROW;
9901
514k
            if (ctxt->instate == XML_PARSER_EOF) {
9902
0
    xmlFree(buf);
9903
0
    return;
9904
0
            }
9905
514k
      count = 0;
9906
514k
  }
9907
30.3M
  NEXTL(l);
9908
30.3M
  cur = CUR_CHAR(l);
9909
30.3M
        if (len > maxLength) {
9910
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9911
0
                           "CData section too big found\n");
9912
0
            xmlFree(buf);
9913
0
            return;
9914
0
        }
9915
30.3M
    }
9916
285k
    buf[len] = 0;
9917
285k
    ctxt->instate = XML_PARSER_CONTENT;
9918
285k
    if (cur != '>') {
9919
92.1k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9920
92.1k
                       "CData section not finished\n%.50s\n", buf);
9921
92.1k
  xmlFree(buf);
9922
92.1k
        return;
9923
92.1k
    }
9924
193k
    NEXTL(l);
9925
9926
    /*
9927
     * OK the buffer is to be consumed as cdata.
9928
     */
9929
193k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9930
35.0k
  if (ctxt->sax->cdataBlock != NULL)
9931
24.8k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9932
10.2k
  else if (ctxt->sax->characters != NULL)
9933
10.2k
      ctxt->sax->characters(ctxt->userData, buf, len);
9934
35.0k
    }
9935
193k
    xmlFree(buf);
9936
193k
}
9937
9938
/**
9939
 * xmlParseContentInternal:
9940
 * @ctxt:  an XML parser context
9941
 *
9942
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9943
 * unexpected EOF to the caller.
9944
 */
9945
9946
static void
9947
2.82M
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9948
2.82M
    int nameNr = ctxt->nameNr;
9949
9950
2.82M
    GROW;
9951
72.6M
    while ((RAW != 0) &&
9952
72.6M
     (ctxt->instate != XML_PARSER_EOF)) {
9953
70.1M
        int id = ctxt->input->id;
9954
70.1M
  unsigned long cons = CUR_CONSUMED;
9955
70.1M
  const xmlChar *cur = ctxt->input->cur;
9956
9957
  /*
9958
   * First case : a Processing Instruction.
9959
   */
9960
70.1M
  if ((*cur == '<') && (cur[1] == '?')) {
9961
586k
      xmlParsePI(ctxt);
9962
586k
  }
9963
9964
  /*
9965
   * Second case : a CDSection
9966
   */
9967
  /* 2.6.0 test was *cur not RAW */
9968
69.5M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9969
314k
      xmlParseCDSect(ctxt);
9970
314k
  }
9971
9972
  /*
9973
   * Third case :  a comment
9974
   */
9975
69.2M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9976
69.2M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9977
1.31M
      xmlParseComment(ctxt);
9978
1.31M
      ctxt->instate = XML_PARSER_CONTENT;
9979
1.31M
  }
9980
9981
  /*
9982
   * Fourth case :  a sub-element.
9983
   */
9984
67.8M
  else if (*cur == '<') {
9985
26.5M
            if (NXT(1) == '/') {
9986
5.44M
                if (ctxt->nameNr <= nameNr)
9987
186k
                    break;
9988
5.26M
          xmlParseElementEnd(ctxt);
9989
21.1M
            } else {
9990
21.1M
          xmlParseElementStart(ctxt);
9991
21.1M
            }
9992
26.5M
  }
9993
9994
  /*
9995
   * Fifth case : a reference. If if has not been resolved,
9996
   *    parsing returns it's Name, create the node
9997
   */
9998
9999
41.3M
  else if (*cur == '&') {
10000
6.87M
      xmlParseReference(ctxt);
10001
6.87M
  }
10002
10003
  /*
10004
   * Last case, text. Note that References are handled directly.
10005
   */
10006
34.4M
  else {
10007
34.4M
      xmlParseCharData(ctxt, 0);
10008
34.4M
  }
10009
10010
69.9M
  GROW;
10011
69.9M
  SHRINK;
10012
10013
69.9M
  if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
10014
58.2k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10015
58.2k
                  "detected an error in element content\n");
10016
58.2k
      xmlHaltParser(ctxt);
10017
58.2k
            break;
10018
58.2k
  }
10019
69.9M
    }
10020
2.82M
}
10021
10022
/**
10023
 * xmlParseContent:
10024
 * @ctxt:  an XML parser context
10025
 *
10026
 * Parse a content sequence. Stops at EOF or '</'.
10027
 *
10028
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10029
 */
10030
10031
void
10032
2.69M
xmlParseContent(xmlParserCtxtPtr ctxt) {
10033
2.69M
    int nameNr = ctxt->nameNr;
10034
10035
2.69M
    xmlParseContentInternal(ctxt);
10036
10037
2.69M
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10038
1.22M
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10039
1.22M
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10040
1.22M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10041
1.22M
                "Premature end of data in tag %s line %d\n",
10042
1.22M
    name, line, NULL);
10043
1.22M
    }
10044
2.69M
}
10045
10046
/**
10047
 * xmlParseElement:
10048
 * @ctxt:  an XML parser context
10049
 *
10050
 * DEPRECATED: Internal function, don't use.
10051
 *
10052
 * parse an XML element
10053
 *
10054
 * [39] element ::= EmptyElemTag | STag content ETag
10055
 *
10056
 * [ WFC: Element Type Match ]
10057
 * The Name in an element's end-tag must match the element type in the
10058
 * start-tag.
10059
 *
10060
 */
10061
10062
void
10063
168k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10064
168k
    if (xmlParseElementStart(ctxt) != 0)
10065
38.6k
        return;
10066
10067
129k
    xmlParseContentInternal(ctxt);
10068
129k
    if (ctxt->instate == XML_PARSER_EOF)
10069
2.32k
  return;
10070
10071
127k
    if (CUR == 0) {
10072
77.6k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10073
77.6k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10074
77.6k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10075
77.6k
                "Premature end of data in tag %s line %d\n",
10076
77.6k
    name, line, NULL);
10077
77.6k
        return;
10078
77.6k
    }
10079
10080
49.8k
    xmlParseElementEnd(ctxt);
10081
49.8k
}
10082
10083
/**
10084
 * xmlParseElementStart:
10085
 * @ctxt:  an XML parser context
10086
 *
10087
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10088
 * opening tag was parsed, 1 if an empty element was parsed.
10089
 */
10090
static int
10091
21.2M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10092
21.2M
    const xmlChar *name;
10093
21.2M
    const xmlChar *prefix = NULL;
10094
21.2M
    const xmlChar *URI = NULL;
10095
21.2M
    xmlParserNodeInfo node_info;
10096
21.2M
    int line, tlen = 0;
10097
21.2M
    xmlNodePtr ret;
10098
21.2M
    int nsNr = ctxt->nsNr;
10099
10100
21.2M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10101
21.2M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10102
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10103
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10104
0
        xmlParserMaxDepth);
10105
0
  xmlHaltParser(ctxt);
10106
0
  return(-1);
10107
0
    }
10108
10109
    /* Capture start position */
10110
21.2M
    if (ctxt->record_info) {
10111
0
        node_info.begin_pos = ctxt->input->consumed +
10112
0
                          (CUR_PTR - ctxt->input->base);
10113
0
  node_info.begin_line = ctxt->input->line;
10114
0
    }
10115
10116
21.2M
    if (ctxt->spaceNr == 0)
10117
0
  spacePush(ctxt, -1);
10118
21.2M
    else if (*ctxt->space == -2)
10119
7.20M
  spacePush(ctxt, -1);
10120
14.0M
    else
10121
14.0M
  spacePush(ctxt, *ctxt->space);
10122
10123
21.2M
    line = ctxt->input->line;
10124
21.2M
#ifdef LIBXML_SAX1_ENABLED
10125
21.2M
    if (ctxt->sax2)
10126
1.79M
#endif /* LIBXML_SAX1_ENABLED */
10127
1.79M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10128
19.4M
#ifdef LIBXML_SAX1_ENABLED
10129
19.4M
    else
10130
19.4M
  name = xmlParseStartTag(ctxt);
10131
21.2M
#endif /* LIBXML_SAX1_ENABLED */
10132
21.2M
    if (ctxt->instate == XML_PARSER_EOF)
10133
3.06k
  return(-1);
10134
21.2M
    if (name == NULL) {
10135
4.76M
  spacePop(ctxt);
10136
4.76M
        return(-1);
10137
4.76M
    }
10138
16.4M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10139
16.4M
    ret = ctxt->node;
10140
10141
16.4M
#ifdef LIBXML_VALID_ENABLED
10142
    /*
10143
     * [ VC: Root Element Type ]
10144
     * The Name in the document type declaration must match the element
10145
     * type of the root element.
10146
     */
10147
16.4M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10148
16.4M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10149
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10150
16.4M
#endif /* LIBXML_VALID_ENABLED */
10151
10152
    /*
10153
     * Check for an Empty Element.
10154
     */
10155
16.4M
    if ((RAW == '/') && (NXT(1) == '>')) {
10156
5.24M
        SKIP(2);
10157
5.24M
  if (ctxt->sax2) {
10158
443k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10159
443k
    (!ctxt->disableSAX))
10160
378k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10161
443k
#ifdef LIBXML_SAX1_ENABLED
10162
4.79M
  } else {
10163
4.79M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10164
4.79M
    (!ctxt->disableSAX))
10165
1.95M
    ctxt->sax->endElement(ctxt->userData, name);
10166
4.79M
#endif /* LIBXML_SAX1_ENABLED */
10167
4.79M
  }
10168
5.24M
  namePop(ctxt);
10169
5.24M
  spacePop(ctxt);
10170
5.24M
  if (nsNr != ctxt->nsNr)
10171
19.2k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10172
5.24M
  if ( ret != NULL && ctxt->record_info ) {
10173
0
     node_info.end_pos = ctxt->input->consumed +
10174
0
            (CUR_PTR - ctxt->input->base);
10175
0
     node_info.end_line = ctxt->input->line;
10176
0
     node_info.node = ret;
10177
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10178
0
  }
10179
5.24M
  return(1);
10180
5.24M
    }
10181
11.2M
    if (RAW == '>') {
10182
8.88M
        NEXT1;
10183
8.88M
    } else {
10184
2.36M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10185
2.36M
         "Couldn't find end of Start Tag %s line %d\n",
10186
2.36M
                    name, line, NULL);
10187
10188
  /*
10189
   * end of parsing of this node.
10190
   */
10191
2.36M
  nodePop(ctxt);
10192
2.36M
  namePop(ctxt);
10193
2.36M
  spacePop(ctxt);
10194
2.36M
  if (nsNr != ctxt->nsNr)
10195
9.38k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10196
10197
  /*
10198
   * Capture end position and add node
10199
   */
10200
2.36M
  if ( ret != NULL && ctxt->record_info ) {
10201
0
     node_info.end_pos = ctxt->input->consumed +
10202
0
            (CUR_PTR - ctxt->input->base);
10203
0
     node_info.end_line = ctxt->input->line;
10204
0
     node_info.node = ret;
10205
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10206
0
  }
10207
2.36M
  return(-1);
10208
2.36M
    }
10209
10210
8.88M
    return(0);
10211
11.2M
}
10212
10213
/**
10214
 * xmlParseElementEnd:
10215
 * @ctxt:  an XML parser context
10216
 *
10217
 * Parse the end of an XML element.
10218
 */
10219
static void
10220
5.31M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10221
5.31M
    xmlParserNodeInfo node_info;
10222
5.31M
    xmlNodePtr ret = ctxt->node;
10223
10224
5.31M
    if (ctxt->nameNr <= 0)
10225
0
        return;
10226
10227
    /*
10228
     * parse the end of tag: '</' should be here.
10229
     */
10230
5.31M
    if (ctxt->sax2) {
10231
423k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10232
423k
  namePop(ctxt);
10233
423k
    }
10234
4.88M
#ifdef LIBXML_SAX1_ENABLED
10235
4.88M
    else
10236
4.88M
  xmlParseEndTag1(ctxt, 0);
10237
5.31M
#endif /* LIBXML_SAX1_ENABLED */
10238
10239
    /*
10240
     * Capture end position and add node
10241
     */
10242
5.31M
    if ( ret != NULL && ctxt->record_info ) {
10243
0
       node_info.end_pos = ctxt->input->consumed +
10244
0
                          (CUR_PTR - ctxt->input->base);
10245
0
       node_info.end_line = ctxt->input->line;
10246
0
       node_info.node = ret;
10247
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10248
0
    }
10249
5.31M
}
10250
10251
/**
10252
 * xmlParseVersionNum:
10253
 * @ctxt:  an XML parser context
10254
 *
10255
 * DEPRECATED: Internal function, don't use.
10256
 *
10257
 * parse the XML version value.
10258
 *
10259
 * [26] VersionNum ::= '1.' [0-9]+
10260
 *
10261
 * In practice allow [0-9].[0-9]+ at that level
10262
 *
10263
 * Returns the string giving the XML version number, or NULL
10264
 */
10265
xmlChar *
10266
310k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10267
310k
    xmlChar *buf = NULL;
10268
310k
    int len = 0;
10269
310k
    int size = 10;
10270
310k
    xmlChar cur;
10271
10272
310k
    buf = (xmlChar *) xmlMallocAtomic(size);
10273
310k
    if (buf == NULL) {
10274
0
  xmlErrMemory(ctxt, NULL);
10275
0
  return(NULL);
10276
0
    }
10277
310k
    cur = CUR;
10278
310k
    if (!((cur >= '0') && (cur <= '9'))) {
10279
3.60k
  xmlFree(buf);
10280
3.60k
  return(NULL);
10281
3.60k
    }
10282
306k
    buf[len++] = cur;
10283
306k
    NEXT;
10284
306k
    cur=CUR;
10285
306k
    if (cur != '.') {
10286
4.50k
  xmlFree(buf);
10287
4.50k
  return(NULL);
10288
4.50k
    }
10289
302k
    buf[len++] = cur;
10290
302k
    NEXT;
10291
302k
    cur=CUR;
10292
1.01M
    while ((cur >= '0') && (cur <= '9')) {
10293
715k
  if (len + 1 >= size) {
10294
2.66k
      xmlChar *tmp;
10295
10296
2.66k
      size *= 2;
10297
2.66k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10298
2.66k
      if (tmp == NULL) {
10299
0
          xmlFree(buf);
10300
0
    xmlErrMemory(ctxt, NULL);
10301
0
    return(NULL);
10302
0
      }
10303
2.66k
      buf = tmp;
10304
2.66k
  }
10305
715k
  buf[len++] = cur;
10306
715k
  NEXT;
10307
715k
  cur=CUR;
10308
715k
    }
10309
302k
    buf[len] = 0;
10310
302k
    return(buf);
10311
302k
}
10312
10313
/**
10314
 * xmlParseVersionInfo:
10315
 * @ctxt:  an XML parser context
10316
 *
10317
 * DEPRECATED: Internal function, don't use.
10318
 *
10319
 * parse the XML version.
10320
 *
10321
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10322
 *
10323
 * [25] Eq ::= S? '=' S?
10324
 *
10325
 * Returns the version string, e.g. "1.0"
10326
 */
10327
10328
xmlChar *
10329
407k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10330
407k
    xmlChar *version = NULL;
10331
10332
407k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10333
318k
  SKIP(7);
10334
318k
  SKIP_BLANKS;
10335
318k
  if (RAW != '=') {
10336
4.67k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10337
4.67k
      return(NULL);
10338
4.67k
        }
10339
314k
  NEXT;
10340
314k
  SKIP_BLANKS;
10341
314k
  if (RAW == '"') {
10342
273k
      NEXT;
10343
273k
      version = xmlParseVersionNum(ctxt);
10344
273k
      if (RAW != '"') {
10345
13.2k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10346
13.2k
      } else
10347
260k
          NEXT;
10348
273k
  } else if (RAW == '\''){
10349
36.8k
      NEXT;
10350
36.8k
      version = xmlParseVersionNum(ctxt);
10351
36.8k
      if (RAW != '\'') {
10352
3.38k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10353
3.38k
      } else
10354
33.5k
          NEXT;
10355
36.8k
  } else {
10356
4.09k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10357
4.09k
  }
10358
314k
    }
10359
402k
    return(version);
10360
407k
}
10361
10362
/**
10363
 * xmlParseEncName:
10364
 * @ctxt:  an XML parser context
10365
 *
10366
 * DEPRECATED: Internal function, don't use.
10367
 *
10368
 * parse the XML encoding name
10369
 *
10370
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10371
 *
10372
 * Returns the encoding name value or NULL
10373
 */
10374
xmlChar *
10375
163k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10376
163k
    xmlChar *buf = NULL;
10377
163k
    int len = 0;
10378
163k
    int size = 10;
10379
163k
    xmlChar cur;
10380
10381
163k
    cur = CUR;
10382
163k
    if (((cur >= 'a') && (cur <= 'z')) ||
10383
163k
        ((cur >= 'A') && (cur <= 'Z'))) {
10384
160k
  buf = (xmlChar *) xmlMallocAtomic(size);
10385
160k
  if (buf == NULL) {
10386
0
      xmlErrMemory(ctxt, NULL);
10387
0
      return(NULL);
10388
0
  }
10389
10390
160k
  buf[len++] = cur;
10391
160k
  NEXT;
10392
160k
  cur = CUR;
10393
2.05M
  while (((cur >= 'a') && (cur <= 'z')) ||
10394
2.05M
         ((cur >= 'A') && (cur <= 'Z')) ||
10395
2.05M
         ((cur >= '0') && (cur <= '9')) ||
10396
2.05M
         (cur == '.') || (cur == '_') ||
10397
2.05M
         (cur == '-')) {
10398
1.89M
      if (len + 1 >= size) {
10399
94.7k
          xmlChar *tmp;
10400
10401
94.7k
    size *= 2;
10402
94.7k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10403
94.7k
    if (tmp == NULL) {
10404
0
        xmlErrMemory(ctxt, NULL);
10405
0
        xmlFree(buf);
10406
0
        return(NULL);
10407
0
    }
10408
94.7k
    buf = tmp;
10409
94.7k
      }
10410
1.89M
      buf[len++] = cur;
10411
1.89M
      NEXT;
10412
1.89M
      cur = CUR;
10413
1.89M
      if (cur == 0) {
10414
869
          SHRINK;
10415
869
    GROW;
10416
869
    cur = CUR;
10417
869
      }
10418
1.89M
        }
10419
160k
  buf[len] = 0;
10420
160k
    } else {
10421
2.45k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10422
2.45k
    }
10423
163k
    return(buf);
10424
163k
}
10425
10426
/**
10427
 * xmlParseEncodingDecl:
10428
 * @ctxt:  an XML parser context
10429
 *
10430
 * DEPRECATED: Internal function, don't use.
10431
 *
10432
 * parse the XML encoding declaration
10433
 *
10434
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10435
 *
10436
 * this setups the conversion filters.
10437
 *
10438
 * Returns the encoding value or NULL
10439
 */
10440
10441
const xmlChar *
10442
342k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10443
342k
    xmlChar *encoding = NULL;
10444
10445
342k
    SKIP_BLANKS;
10446
342k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10447
173k
  SKIP(8);
10448
173k
  SKIP_BLANKS;
10449
173k
  if (RAW != '=') {
10450
1.68k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10451
1.68k
      return(NULL);
10452
1.68k
        }
10453
171k
  NEXT;
10454
171k
  SKIP_BLANKS;
10455
171k
  if (RAW == '"') {
10456
148k
      NEXT;
10457
148k
      encoding = xmlParseEncName(ctxt);
10458
148k
      if (RAW != '"') {
10459
6.08k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10460
6.08k
    xmlFree((xmlChar *) encoding);
10461
6.08k
    return(NULL);
10462
6.08k
      } else
10463
141k
          NEXT;
10464
148k
  } else if (RAW == '\''){
10465
15.1k
      NEXT;
10466
15.1k
      encoding = xmlParseEncName(ctxt);
10467
15.1k
      if (RAW != '\'') {
10468
2.08k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10469
2.08k
    xmlFree((xmlChar *) encoding);
10470
2.08k
    return(NULL);
10471
2.08k
      } else
10472
13.1k
          NEXT;
10473
15.1k
  } else {
10474
8.38k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10475
8.38k
  }
10476
10477
        /*
10478
         * Non standard parsing, allowing the user to ignore encoding
10479
         */
10480
163k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10481
72.1k
      xmlFree((xmlChar *) encoding);
10482
72.1k
            return(NULL);
10483
72.1k
  }
10484
10485
  /*
10486
   * UTF-16 encoding switch has already taken place at this stage,
10487
   * more over the little-endian/big-endian selection is already done
10488
   */
10489
91.2k
        if ((encoding != NULL) &&
10490
91.2k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10491
84.3k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10492
      /*
10493
       * If no encoding was passed to the parser, that we are
10494
       * using UTF-16 and no decoder is present i.e. the
10495
       * document is apparently UTF-8 compatible, then raise an
10496
       * encoding mismatch fatal error
10497
       */
10498
1.03k
      if ((ctxt->encoding == NULL) &&
10499
1.03k
          (ctxt->input->buf != NULL) &&
10500
1.03k
          (ctxt->input->buf->encoder == NULL)) {
10501
945
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10502
945
      "Document labelled UTF-16 but has UTF-8 content\n");
10503
945
      }
10504
1.03k
      if (ctxt->encoding != NULL)
10505
88
    xmlFree((xmlChar *) ctxt->encoding);
10506
1.03k
      ctxt->encoding = encoding;
10507
1.03k
  }
10508
  /*
10509
   * UTF-8 encoding is handled natively
10510
   */
10511
90.2k
        else if ((encoding != NULL) &&
10512
90.2k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10513
83.3k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10514
39.9k
      if (ctxt->encoding != NULL)
10515
322
    xmlFree((xmlChar *) ctxt->encoding);
10516
39.9k
      ctxt->encoding = encoding;
10517
39.9k
  }
10518
50.3k
  else if (encoding != NULL) {
10519
43.4k
      xmlCharEncodingHandlerPtr handler;
10520
10521
43.4k
      if (ctxt->input->encoding != NULL)
10522
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10523
43.4k
      ctxt->input->encoding = encoding;
10524
10525
43.4k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10526
43.4k
      if (handler != NULL) {
10527
42.6k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10528
        /* failed to convert */
10529
350
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10530
350
        return(NULL);
10531
350
    }
10532
42.6k
      } else {
10533
745
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10534
745
      "Unsupported encoding %s\n", encoding);
10535
745
    return(NULL);
10536
745
      }
10537
43.4k
  }
10538
91.2k
    }
10539
259k
    return(encoding);
10540
342k
}
10541
10542
/**
10543
 * xmlParseSDDecl:
10544
 * @ctxt:  an XML parser context
10545
 *
10546
 * DEPRECATED: Internal function, don't use.
10547
 *
10548
 * parse the XML standalone declaration
10549
 *
10550
 * [32] SDDecl ::= S 'standalone' Eq
10551
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10552
 *
10553
 * [ VC: Standalone Document Declaration ]
10554
 * TODO The standalone document declaration must have the value "no"
10555
 * if any external markup declarations contain declarations of:
10556
 *  - attributes with default values, if elements to which these
10557
 *    attributes apply appear in the document without specifications
10558
 *    of values for these attributes, or
10559
 *  - entities (other than amp, lt, gt, apos, quot), if references
10560
 *    to those entities appear in the document, or
10561
 *  - attributes with values subject to normalization, where the
10562
 *    attribute appears in the document with a value which will change
10563
 *    as a result of normalization, or
10564
 *  - element types with element content, if white space occurs directly
10565
 *    within any instance of those types.
10566
 *
10567
 * Returns:
10568
 *   1 if standalone="yes"
10569
 *   0 if standalone="no"
10570
 *  -2 if standalone attribute is missing or invalid
10571
 *    (A standalone value of -2 means that the XML declaration was found,
10572
 *     but no value was specified for the standalone attribute).
10573
 */
10574
10575
int
10576
136k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10577
136k
    int standalone = -2;
10578
10579
136k
    SKIP_BLANKS;
10580
136k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10581
19.2k
  SKIP(10);
10582
19.2k
        SKIP_BLANKS;
10583
19.2k
  if (RAW != '=') {
10584
399
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10585
399
      return(standalone);
10586
399
        }
10587
18.8k
  NEXT;
10588
18.8k
  SKIP_BLANKS;
10589
18.8k
        if (RAW == '\''){
10590
6.63k
      NEXT;
10591
6.63k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10592
4.16k
          standalone = 0;
10593
4.16k
                SKIP(2);
10594
4.16k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10595
2.47k
                 (NXT(2) == 's')) {
10596
1.84k
          standalone = 1;
10597
1.84k
    SKIP(3);
10598
1.84k
            } else {
10599
628
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10600
628
      }
10601
6.63k
      if (RAW != '\'') {
10602
992
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10603
992
      } else
10604
5.64k
          NEXT;
10605
12.2k
  } else if (RAW == '"'){
10606
11.9k
      NEXT;
10607
11.9k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10608
4.60k
          standalone = 0;
10609
4.60k
    SKIP(2);
10610
7.35k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10611
7.35k
                 (NXT(2) == 's')) {
10612
6.53k
          standalone = 1;
10613
6.53k
                SKIP(3);
10614
6.53k
            } else {
10615
824
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10616
824
      }
10617
11.9k
      if (RAW != '"') {
10618
1.28k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10619
1.28k
      } else
10620
10.6k
          NEXT;
10621
11.9k
  } else {
10622
288
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10623
288
        }
10624
18.8k
    }
10625
135k
    return(standalone);
10626
136k
}
10627
10628
/**
10629
 * xmlParseXMLDecl:
10630
 * @ctxt:  an XML parser context
10631
 *
10632
 * DEPRECATED: Internal function, don't use.
10633
 *
10634
 * parse an XML declaration header
10635
 *
10636
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10637
 */
10638
10639
void
10640
228k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10641
228k
    xmlChar *version;
10642
10643
    /*
10644
     * This value for standalone indicates that the document has an
10645
     * XML declaration but it does not have a standalone attribute.
10646
     * It will be overwritten later if a standalone attribute is found.
10647
     */
10648
228k
    ctxt->input->standalone = -2;
10649
10650
    /*
10651
     * We know that '<?xml' is here.
10652
     */
10653
228k
    SKIP(5);
10654
10655
228k
    if (!IS_BLANK_CH(RAW)) {
10656
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10657
0
                 "Blank needed after '<?xml'\n");
10658
0
    }
10659
228k
    SKIP_BLANKS;
10660
10661
    /*
10662
     * We must have the VersionInfo here.
10663
     */
10664
228k
    version = xmlParseVersionInfo(ctxt);
10665
228k
    if (version == NULL) {
10666
25.9k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10667
202k
    } else {
10668
202k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10669
      /*
10670
       * Changed here for XML-1.0 5th edition
10671
       */
10672
3.70k
      if (ctxt->options & XML_PARSE_OLD10) {
10673
1.37k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10674
1.37k
                "Unsupported version '%s'\n",
10675
1.37k
                version);
10676
2.32k
      } else {
10677
2.32k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10678
1.92k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10679
1.92k
                      "Unsupported version '%s'\n",
10680
1.92k
          version, NULL);
10681
1.92k
    } else {
10682
407
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10683
407
              "Unsupported version '%s'\n",
10684
407
              version);
10685
407
    }
10686
2.32k
      }
10687
3.70k
  }
10688
202k
  if (ctxt->version != NULL)
10689
0
      xmlFree((void *) ctxt->version);
10690
202k
  ctxt->version = version;
10691
202k
    }
10692
10693
    /*
10694
     * We may have the encoding declaration
10695
     */
10696
228k
    if (!IS_BLANK_CH(RAW)) {
10697
95.8k
        if ((RAW == '?') && (NXT(1) == '>')) {
10698
64.6k
      SKIP(2);
10699
64.6k
      return;
10700
64.6k
  }
10701
31.2k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10702
31.2k
    }
10703
164k
    xmlParseEncodingDecl(ctxt);
10704
164k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10705
164k
         (ctxt->instate == XML_PARSER_EOF)) {
10706
  /*
10707
   * The XML REC instructs us to stop parsing right here
10708
   */
10709
682
        return;
10710
682
    }
10711
10712
    /*
10713
     * We may have the standalone status.
10714
     */
10715
163k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10716
28.2k
        if ((RAW == '?') && (NXT(1) == '>')) {
10717
27.0k
      SKIP(2);
10718
27.0k
      return;
10719
27.0k
  }
10720
1.18k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10721
1.18k
    }
10722
10723
    /*
10724
     * We can grow the input buffer freely at that point
10725
     */
10726
136k
    GROW;
10727
10728
136k
    SKIP_BLANKS;
10729
136k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10730
10731
136k
    SKIP_BLANKS;
10732
136k
    if ((RAW == '?') && (NXT(1) == '>')) {
10733
83.0k
        SKIP(2);
10734
83.0k
    } else if (RAW == '>') {
10735
        /* Deprecated old WD ... */
10736
743
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10737
743
  NEXT;
10738
52.4k
    } else {
10739
52.4k
        int c;
10740
10741
52.4k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10742
3.31M
        while ((c = CUR) != 0) {
10743
3.30M
            NEXT;
10744
3.30M
            if (c == '>')
10745
46.5k
                break;
10746
3.30M
        }
10747
52.4k
    }
10748
136k
}
10749
10750
/**
10751
 * xmlParseMisc:
10752
 * @ctxt:  an XML parser context
10753
 *
10754
 * DEPRECATED: Internal function, don't use.
10755
 *
10756
 * parse an XML Misc* optional field.
10757
 *
10758
 * [27] Misc ::= Comment | PI |  S
10759
 */
10760
10761
void
10762
494k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10763
538k
    while (ctxt->instate != XML_PARSER_EOF) {
10764
538k
        SKIP_BLANKS;
10765
538k
        GROW;
10766
538k
        if ((RAW == '<') && (NXT(1) == '?')) {
10767
23.7k
      xmlParsePI(ctxt);
10768
514k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10769
19.9k
      xmlParseComment(ctxt);
10770
494k
        } else {
10771
494k
            break;
10772
494k
        }
10773
538k
    }
10774
494k
}
10775
10776
/**
10777
 * xmlParseDocument:
10778
 * @ctxt:  an XML parser context
10779
 *
10780
 * parse an XML document (and build a tree if using the standard SAX
10781
 * interface).
10782
 *
10783
 * [1] document ::= prolog element Misc*
10784
 *
10785
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10786
 *
10787
 * Returns 0, -1 in case of error. the parser context is augmented
10788
 *                as a result of the parsing.
10789
 */
10790
10791
int
10792
221k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10793
221k
    xmlChar start[4];
10794
221k
    xmlCharEncoding enc;
10795
10796
221k
    xmlInitParser();
10797
10798
221k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10799
0
        return(-1);
10800
10801
221k
    GROW;
10802
10803
    /*
10804
     * SAX: detecting the level.
10805
     */
10806
221k
    xmlDetectSAX2(ctxt);
10807
10808
    /*
10809
     * SAX: beginning of the document processing.
10810
     */
10811
221k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10812
221k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10813
221k
    if (ctxt->instate == XML_PARSER_EOF)
10814
0
  return(-1);
10815
10816
221k
    if ((ctxt->encoding == NULL) &&
10817
221k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10818
  /*
10819
   * Get the 4 first bytes and decode the charset
10820
   * if enc != XML_CHAR_ENCODING_NONE
10821
   * plug some encoding conversion routines.
10822
   */
10823
221k
  start[0] = RAW;
10824
221k
  start[1] = NXT(1);
10825
221k
  start[2] = NXT(2);
10826
221k
  start[3] = NXT(3);
10827
221k
  enc = xmlDetectCharEncoding(&start[0], 4);
10828
221k
  if (enc != XML_CHAR_ENCODING_NONE) {
10829
102k
      xmlSwitchEncoding(ctxt, enc);
10830
102k
  }
10831
221k
    }
10832
10833
10834
221k
    if (CUR == 0) {
10835
973
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10836
973
  return(-1);
10837
973
    }
10838
10839
    /*
10840
     * Check for the XMLDecl in the Prolog.
10841
     * do not GROW here to avoid the detected encoder to decode more
10842
     * than just the first line, unless the amount of data is really
10843
     * too small to hold "<?xml version="1.0" encoding="foo"
10844
     */
10845
220k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10846
8.71k
       GROW;
10847
8.71k
    }
10848
220k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10849
10850
  /*
10851
   * Note that we will switch encoding on the fly.
10852
   */
10853
94.7k
  xmlParseXMLDecl(ctxt);
10854
94.7k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10855
94.7k
      (ctxt->instate == XML_PARSER_EOF)) {
10856
      /*
10857
       * The XML REC instructs us to stop parsing right here
10858
       */
10859
400
      return(-1);
10860
400
  }
10861
94.3k
  ctxt->standalone = ctxt->input->standalone;
10862
94.3k
  SKIP_BLANKS;
10863
126k
    } else {
10864
126k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10865
126k
    }
10866
220k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10867
211k
        ctxt->sax->startDocument(ctxt->userData);
10868
220k
    if (ctxt->instate == XML_PARSER_EOF)
10869
0
  return(-1);
10870
220k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10871
220k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10872
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10873
0
    }
10874
10875
    /*
10876
     * The Misc part of the Prolog
10877
     */
10878
220k
    xmlParseMisc(ctxt);
10879
10880
    /*
10881
     * Then possibly doc type declaration(s) and more Misc
10882
     * (doctypedecl Misc*)?
10883
     */
10884
220k
    GROW;
10885
220k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10886
10887
112k
  ctxt->inSubset = 1;
10888
112k
  xmlParseDocTypeDecl(ctxt);
10889
112k
  if (RAW == '[') {
10890
89.7k
      ctxt->instate = XML_PARSER_DTD;
10891
89.7k
      xmlParseInternalSubset(ctxt);
10892
89.7k
      if (ctxt->instate == XML_PARSER_EOF)
10893
5.82k
    return(-1);
10894
89.7k
  }
10895
10896
  /*
10897
   * Create and update the external subset.
10898
   */
10899
106k
  ctxt->inSubset = 2;
10900
106k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10901
106k
      (!ctxt->disableSAX))
10902
84.4k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10903
84.4k
                                ctxt->extSubSystem, ctxt->extSubURI);
10904
106k
  if (ctxt->instate == XML_PARSER_EOF)
10905
1.25k
      return(-1);
10906
105k
  ctxt->inSubset = 0;
10907
10908
105k
        xmlCleanSpecialAttr(ctxt);
10909
10910
105k
  ctxt->instate = XML_PARSER_PROLOG;
10911
105k
  xmlParseMisc(ctxt);
10912
105k
    }
10913
10914
    /*
10915
     * Time to start parsing the tree itself
10916
     */
10917
213k
    GROW;
10918
213k
    if (RAW != '<') {
10919
45.0k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10920
45.0k
           "Start tag expected, '<' not found\n");
10921
168k
    } else {
10922
168k
  ctxt->instate = XML_PARSER_CONTENT;
10923
168k
  xmlParseElement(ctxt);
10924
168k
  ctxt->instate = XML_PARSER_EPILOG;
10925
10926
10927
  /*
10928
   * The Misc part at the end
10929
   */
10930
168k
  xmlParseMisc(ctxt);
10931
10932
168k
  if (RAW != 0) {
10933
41.6k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10934
41.6k
  }
10935
168k
  ctxt->instate = XML_PARSER_EOF;
10936
168k
    }
10937
10938
    /*
10939
     * SAX: end of the document processing.
10940
     */
10941
213k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10942
213k
        ctxt->sax->endDocument(ctxt->userData);
10943
10944
    /*
10945
     * Remove locally kept entity definitions if the tree was not built
10946
     */
10947
213k
    if ((ctxt->myDoc != NULL) &&
10948
213k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10949
877
  xmlFreeDoc(ctxt->myDoc);
10950
877
  ctxt->myDoc = NULL;
10951
877
    }
10952
10953
213k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10954
29.1k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10955
29.1k
  if (ctxt->valid)
10956
17.4k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10957
29.1k
  if (ctxt->nsWellFormed)
10958
28.1k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10959
29.1k
  if (ctxt->options & XML_PARSE_OLD10)
10960
10.4k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10961
29.1k
    }
10962
213k
    if (! ctxt->wellFormed) {
10963
184k
  ctxt->valid = 0;
10964
184k
  return(-1);
10965
184k
    }
10966
29.1k
    return(0);
10967
213k
}
10968
10969
/**
10970
 * xmlParseExtParsedEnt:
10971
 * @ctxt:  an XML parser context
10972
 *
10973
 * parse a general parsed entity
10974
 * An external general parsed entity is well-formed if it matches the
10975
 * production labeled extParsedEnt.
10976
 *
10977
 * [78] extParsedEnt ::= TextDecl? content
10978
 *
10979
 * Returns 0, -1 in case of error. the parser context is augmented
10980
 *                as a result of the parsing.
10981
 */
10982
10983
int
10984
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10985
0
    xmlChar start[4];
10986
0
    xmlCharEncoding enc;
10987
10988
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10989
0
        return(-1);
10990
10991
0
    xmlDetectSAX2(ctxt);
10992
10993
0
    GROW;
10994
10995
    /*
10996
     * SAX: beginning of the document processing.
10997
     */
10998
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10999
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11000
11001
    /*
11002
     * Get the 4 first bytes and decode the charset
11003
     * if enc != XML_CHAR_ENCODING_NONE
11004
     * plug some encoding conversion routines.
11005
     */
11006
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11007
0
  start[0] = RAW;
11008
0
  start[1] = NXT(1);
11009
0
  start[2] = NXT(2);
11010
0
  start[3] = NXT(3);
11011
0
  enc = xmlDetectCharEncoding(start, 4);
11012
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11013
0
      xmlSwitchEncoding(ctxt, enc);
11014
0
  }
11015
0
    }
11016
11017
11018
0
    if (CUR == 0) {
11019
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11020
0
    }
11021
11022
    /*
11023
     * Check for the XMLDecl in the Prolog.
11024
     */
11025
0
    GROW;
11026
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11027
11028
  /*
11029
   * Note that we will switch encoding on the fly.
11030
   */
11031
0
  xmlParseXMLDecl(ctxt);
11032
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11033
      /*
11034
       * The XML REC instructs us to stop parsing right here
11035
       */
11036
0
      return(-1);
11037
0
  }
11038
0
  SKIP_BLANKS;
11039
0
    } else {
11040
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11041
0
    }
11042
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11043
0
        ctxt->sax->startDocument(ctxt->userData);
11044
0
    if (ctxt->instate == XML_PARSER_EOF)
11045
0
  return(-1);
11046
11047
    /*
11048
     * Doing validity checking on chunk doesn't make sense
11049
     */
11050
0
    ctxt->instate = XML_PARSER_CONTENT;
11051
0
    ctxt->validate = 0;
11052
0
    ctxt->loadsubset = 0;
11053
0
    ctxt->depth = 0;
11054
11055
0
    xmlParseContent(ctxt);
11056
0
    if (ctxt->instate == XML_PARSER_EOF)
11057
0
  return(-1);
11058
11059
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11060
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11061
0
    } else if (RAW != 0) {
11062
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11063
0
    }
11064
11065
    /*
11066
     * SAX: end of the document processing.
11067
     */
11068
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11069
0
        ctxt->sax->endDocument(ctxt->userData);
11070
11071
0
    if (! ctxt->wellFormed) return(-1);
11072
0
    return(0);
11073
0
}
11074
11075
#ifdef LIBXML_PUSH_ENABLED
11076
/************************************************************************
11077
 *                  *
11078
 *    Progressive parsing interfaces        *
11079
 *                  *
11080
 ************************************************************************/
11081
11082
/**
11083
 * xmlParseLookupSequence:
11084
 * @ctxt:  an XML parser context
11085
 * @first:  the first char to lookup
11086
 * @next:  the next char to lookup or zero
11087
 * @third:  the next char to lookup or zero
11088
 *
11089
 * Try to find if a sequence (first, next, third) or  just (first next) or
11090
 * (first) is available in the input stream.
11091
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11092
 * to avoid rescanning sequences of bytes, it DOES change the state of the
11093
 * parser, do not use liberally.
11094
 *
11095
 * Returns the index to the current parsing point if the full sequence
11096
 *      is available, -1 otherwise.
11097
 */
11098
static int
11099
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11100
881k
                       xmlChar next, xmlChar third) {
11101
881k
    int base, len;
11102
881k
    xmlParserInputPtr in;
11103
881k
    const xmlChar *buf;
11104
11105
881k
    in = ctxt->input;
11106
881k
    if (in == NULL) return(-1);
11107
881k
    base = in->cur - in->base;
11108
881k
    if (base < 0) return(-1);
11109
881k
    if (ctxt->checkIndex > base)
11110
241k
        base = ctxt->checkIndex;
11111
881k
    if (in->buf == NULL) {
11112
0
  buf = in->base;
11113
0
  len = in->length;
11114
881k
    } else {
11115
881k
  buf = xmlBufContent(in->buf->buffer);
11116
881k
  len = xmlBufUse(in->buf->buffer);
11117
881k
    }
11118
    /* take into account the sequence length */
11119
881k
    if (third) len -= 2;
11120
685k
    else if (next) len --;
11121
762M
    for (;base < len;base++) {
11122
762M
        if (buf[base] == first) {
11123
999k
      if (third != 0) {
11124
280k
    if ((buf[base + 1] != next) ||
11125
280k
        (buf[base + 2] != third)) continue;
11126
719k
      } else if (next != 0) {
11127
425k
    if (buf[base + 1] != next) continue;
11128
425k
      }
11129
550k
      ctxt->checkIndex = 0;
11130
#ifdef DEBUG_PUSH
11131
      if (next == 0)
11132
    xmlGenericError(xmlGenericErrorContext,
11133
      "PP: lookup '%c' found at %d\n",
11134
      first, base);
11135
      else if (third == 0)
11136
    xmlGenericError(xmlGenericErrorContext,
11137
      "PP: lookup '%c%c' found at %d\n",
11138
      first, next, base);
11139
      else
11140
    xmlGenericError(xmlGenericErrorContext,
11141
      "PP: lookup '%c%c%c' found at %d\n",
11142
      first, next, third, base);
11143
#endif
11144
550k
      return(base - (in->cur - in->base));
11145
999k
  }
11146
762M
    }
11147
330k
    ctxt->checkIndex = base;
11148
#ifdef DEBUG_PUSH
11149
    if (next == 0)
11150
  xmlGenericError(xmlGenericErrorContext,
11151
    "PP: lookup '%c' failed\n", first);
11152
    else if (third == 0)
11153
  xmlGenericError(xmlGenericErrorContext,
11154
    "PP: lookup '%c%c' failed\n", first, next);
11155
    else
11156
  xmlGenericError(xmlGenericErrorContext,
11157
    "PP: lookup '%c%c%c' failed\n", first, next, third);
11158
#endif
11159
330k
    return(-1);
11160
881k
}
11161
11162
/**
11163
 * xmlParseGetLasts:
11164
 * @ctxt:  an XML parser context
11165
 * @lastlt:  pointer to store the last '<' from the input
11166
 * @lastgt:  pointer to store the last '>' from the input
11167
 *
11168
 * Lookup the last < and > in the current chunk
11169
 */
11170
static void
11171
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11172
1.65M
                 const xmlChar **lastgt) {
11173
1.65M
    const xmlChar *tmp;
11174
11175
1.65M
    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11176
0
  xmlGenericError(xmlGenericErrorContext,
11177
0
        "Internal error: xmlParseGetLasts\n");
11178
0
  return;
11179
0
    }
11180
1.65M
    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11181
1.03M
        tmp = ctxt->input->end;
11182
1.03M
  tmp--;
11183
225M
  while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11184
1.03M
  if (tmp < ctxt->input->base) {
11185
28.4k
      *lastlt = NULL;
11186
28.4k
      *lastgt = NULL;
11187
1.00M
  } else {
11188
1.00M
      *lastlt = tmp;
11189
1.00M
      tmp++;
11190
64.4M
      while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11191
63.4M
          if (*tmp == '\'') {
11192
44.1k
        tmp++;
11193
22.6M
        while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11194
44.1k
        if (tmp < ctxt->input->end) tmp++;
11195
63.4M
    } else if (*tmp == '"') {
11196
303k
        tmp++;
11197
18.5M
        while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11198
303k
        if (tmp < ctxt->input->end) tmp++;
11199
303k
    } else
11200
63.1M
        tmp++;
11201
63.4M
      }
11202
1.00M
      if (tmp < ctxt->input->end)
11203
443k
          *lastgt = tmp;
11204
559k
      else {
11205
559k
          tmp = *lastlt;
11206
559k
    tmp--;
11207
19.6M
    while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11208
559k
    if (tmp >= ctxt->input->base)
11209
530k
        *lastgt = tmp;
11210
29.1k
    else
11211
29.1k
        *lastgt = NULL;
11212
559k
      }
11213
1.00M
  }
11214
1.03M
    } else {
11215
619k
        *lastlt = NULL;
11216
619k
  *lastgt = NULL;
11217
619k
    }
11218
1.65M
}
11219
/**
11220
 * xmlCheckCdataPush:
11221
 * @cur: pointer to the block of characters
11222
 * @len: length of the block in bytes
11223
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11224
 *
11225
 * Check that the block of characters is okay as SCdata content [20]
11226
 *
11227
 * Returns the number of bytes to pass if okay, a negative index where an
11228
 *         UTF-8 error occurred otherwise
11229
 */
11230
static int
11231
91.0k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11232
91.0k
    int ix;
11233
91.0k
    unsigned char c;
11234
91.0k
    int codepoint;
11235
11236
91.0k
    if ((utf == NULL) || (len <= 0))
11237
1.30k
        return(0);
11238
11239
5.35M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11240
5.33M
        c = utf[ix];
11241
5.33M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11242
4.12M
      if (c >= 0x20)
11243
3.79M
    ix++;
11244
336k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11245
328k
          ix++;
11246
8.21k
      else
11247
8.21k
          return(-ix);
11248
4.12M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11249
397k
      if (ix + 2 > len) return(complete ? -ix : ix);
11250
394k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11251
5.90k
          return(-ix);
11252
388k
      codepoint = (utf[ix] & 0x1f) << 6;
11253
388k
      codepoint |= utf[ix+1] & 0x3f;
11254
388k
      if (!xmlIsCharQ(codepoint))
11255
4.33k
          return(-ix);
11256
383k
      ix += 2;
11257
807k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11258
352k
      if (ix + 3 > len) return(complete ? -ix : ix);
11259
348k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11260
348k
          ((utf[ix+2] & 0xc0) != 0x80))
11261
9.52k
        return(-ix);
11262
339k
      codepoint = (utf[ix] & 0xf) << 12;
11263
339k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11264
339k
      codepoint |= utf[ix+2] & 0x3f;
11265
339k
      if (!xmlIsCharQ(codepoint))
11266
2.95k
          return(-ix);
11267
336k
      ix += 3;
11268
454k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11269
446k
      if (ix + 4 > len) return(complete ? -ix : ix);
11270
443k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11271
443k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11272
443k
    ((utf[ix+3] & 0xc0) != 0x80))
11273
15.5k
        return(-ix);
11274
427k
      codepoint = (utf[ix] & 0x7) << 18;
11275
427k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11276
427k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11277
427k
      codepoint |= utf[ix+3] & 0x3f;
11278
427k
      if (!xmlIsCharQ(codepoint))
11279
6.44k
          return(-ix);
11280
421k
      ix += 4;
11281
421k
  } else       /* unknown encoding */
11282
8.34k
      return(-ix);
11283
5.33M
      }
11284
17.9k
      return(ix);
11285
89.7k
}
11286
11287
/**
11288
 * xmlParseTryOrFinish:
11289
 * @ctxt:  an XML parser context
11290
 * @terminate:  last chunk indicator
11291
 *
11292
 * Try to progress on parsing
11293
 *
11294
 * Returns zero if no parsing was possible
11295
 */
11296
static int
11297
1.39M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11298
1.39M
    int ret = 0;
11299
1.39M
    int avail, tlen;
11300
1.39M
    xmlChar cur, next;
11301
1.39M
    const xmlChar *lastlt, *lastgt;
11302
11303
1.39M
    if (ctxt->input == NULL)
11304
0
        return(0);
11305
11306
#ifdef DEBUG_PUSH
11307
    switch (ctxt->instate) {
11308
  case XML_PARSER_EOF:
11309
      xmlGenericError(xmlGenericErrorContext,
11310
        "PP: try EOF\n"); break;
11311
  case XML_PARSER_START:
11312
      xmlGenericError(xmlGenericErrorContext,
11313
        "PP: try START\n"); break;
11314
  case XML_PARSER_MISC:
11315
      xmlGenericError(xmlGenericErrorContext,
11316
        "PP: try MISC\n");break;
11317
  case XML_PARSER_COMMENT:
11318
      xmlGenericError(xmlGenericErrorContext,
11319
        "PP: try COMMENT\n");break;
11320
  case XML_PARSER_PROLOG:
11321
      xmlGenericError(xmlGenericErrorContext,
11322
        "PP: try PROLOG\n");break;
11323
  case XML_PARSER_START_TAG:
11324
      xmlGenericError(xmlGenericErrorContext,
11325
        "PP: try START_TAG\n");break;
11326
  case XML_PARSER_CONTENT:
11327
      xmlGenericError(xmlGenericErrorContext,
11328
        "PP: try CONTENT\n");break;
11329
  case XML_PARSER_CDATA_SECTION:
11330
      xmlGenericError(xmlGenericErrorContext,
11331
        "PP: try CDATA_SECTION\n");break;
11332
  case XML_PARSER_END_TAG:
11333
      xmlGenericError(xmlGenericErrorContext,
11334
        "PP: try END_TAG\n");break;
11335
  case XML_PARSER_ENTITY_DECL:
11336
      xmlGenericError(xmlGenericErrorContext,
11337
        "PP: try ENTITY_DECL\n");break;
11338
  case XML_PARSER_ENTITY_VALUE:
11339
      xmlGenericError(xmlGenericErrorContext,
11340
        "PP: try ENTITY_VALUE\n");break;
11341
  case XML_PARSER_ATTRIBUTE_VALUE:
11342
      xmlGenericError(xmlGenericErrorContext,
11343
        "PP: try ATTRIBUTE_VALUE\n");break;
11344
  case XML_PARSER_DTD:
11345
      xmlGenericError(xmlGenericErrorContext,
11346
        "PP: try DTD\n");break;
11347
  case XML_PARSER_EPILOG:
11348
      xmlGenericError(xmlGenericErrorContext,
11349
        "PP: try EPILOG\n");break;
11350
  case XML_PARSER_PI:
11351
      xmlGenericError(xmlGenericErrorContext,
11352
        "PP: try PI\n");break;
11353
        case XML_PARSER_IGNORE:
11354
            xmlGenericError(xmlGenericErrorContext,
11355
        "PP: try IGNORE\n");break;
11356
    }
11357
#endif
11358
11359
1.39M
    if ((ctxt->input != NULL) &&
11360
1.39M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11361
5.58k
  xmlSHRINK(ctxt);
11362
5.58k
  ctxt->checkIndex = 0;
11363
5.58k
    }
11364
1.39M
    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11365
11366
9.34M
    while (ctxt->instate != XML_PARSER_EOF) {
11367
9.29M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11368
76.3k
      return(0);
11369
11370
9.21M
  if (ctxt->input == NULL) break;
11371
9.21M
  if (ctxt->input->buf == NULL)
11372
0
      avail = ctxt->input->length -
11373
0
              (ctxt->input->cur - ctxt->input->base);
11374
9.21M
  else {
11375
      /*
11376
       * If we are operating on converted input, try to flush
11377
       * remaining chars to avoid them stalling in the non-converted
11378
       * buffer. But do not do this in document start where
11379
       * encoding="..." may not have been read and we work on a
11380
       * guessed encoding.
11381
       */
11382
9.21M
      if ((ctxt->instate != XML_PARSER_START) &&
11383
9.21M
          (ctxt->input->buf->raw != NULL) &&
11384
9.21M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11385
59.5k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11386
59.5k
                                                 ctxt->input);
11387
59.5k
    size_t current = ctxt->input->cur - ctxt->input->base;
11388
11389
59.5k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11390
59.5k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11391
59.5k
                                      base, current);
11392
59.5k
      }
11393
9.21M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11394
9.21M
        (ctxt->input->cur - ctxt->input->base);
11395
9.21M
  }
11396
9.21M
        if (avail < 1)
11397
140k
      goto done;
11398
9.07M
        switch (ctxt->instate) {
11399
0
            case XML_PARSER_EOF:
11400
          /*
11401
     * Document parsing is done !
11402
     */
11403
0
          goto done;
11404
599k
            case XML_PARSER_START:
11405
599k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11406
160k
        xmlChar start[4];
11407
160k
        xmlCharEncoding enc;
11408
11409
        /*
11410
         * Very first chars read from the document flow.
11411
         */
11412
160k
        if (avail < 4)
11413
2.72k
      goto done;
11414
11415
        /*
11416
         * Get the 4 first bytes and decode the charset
11417
         * if enc != XML_CHAR_ENCODING_NONE
11418
         * plug some encoding conversion routines,
11419
         * else xmlSwitchEncoding will set to (default)
11420
         * UTF8.
11421
         */
11422
158k
        start[0] = RAW;
11423
158k
        start[1] = NXT(1);
11424
158k
        start[2] = NXT(2);
11425
158k
        start[3] = NXT(3);
11426
158k
        enc = xmlDetectCharEncoding(start, 4);
11427
158k
        xmlSwitchEncoding(ctxt, enc);
11428
158k
        break;
11429
160k
    }
11430
11431
438k
    if (avail < 2)
11432
236
        goto done;
11433
438k
    cur = ctxt->input->cur[0];
11434
438k
    next = ctxt->input->cur[1];
11435
438k
    if (cur == 0) {
11436
368
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11437
368
      ctxt->sax->setDocumentLocator(ctxt->userData,
11438
368
                  &xmlDefaultSAXLocator);
11439
368
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11440
368
        xmlHaltParser(ctxt);
11441
#ifdef DEBUG_PUSH
11442
        xmlGenericError(xmlGenericErrorContext,
11443
          "PP: entering EOF\n");
11444
#endif
11445
368
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11446
368
      ctxt->sax->endDocument(ctxt->userData);
11447
368
        goto done;
11448
368
    }
11449
437k
          if ((cur == '<') && (next == '?')) {
11450
        /* PI or XML decl */
11451
271k
        if (avail < 5) return(ret);
11452
271k
        if ((!terminate) &&
11453
271k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11454
122k
      return(ret);
11455
149k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11456
149k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11457
149k
                  &xmlDefaultSAXLocator);
11458
149k
        if ((ctxt->input->cur[2] == 'x') &&
11459
149k
      (ctxt->input->cur[3] == 'm') &&
11460
149k
      (ctxt->input->cur[4] == 'l') &&
11461
149k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11462
133k
      ret += 5;
11463
#ifdef DEBUG_PUSH
11464
      xmlGenericError(xmlGenericErrorContext,
11465
        "PP: Parsing XML Decl\n");
11466
#endif
11467
133k
      xmlParseXMLDecl(ctxt);
11468
133k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11469
          /*
11470
           * The XML REC instructs us to stop parsing right
11471
           * here
11472
           */
11473
282
          xmlHaltParser(ctxt);
11474
282
          return(0);
11475
282
      }
11476
133k
      ctxt->standalone = ctxt->input->standalone;
11477
133k
      if ((ctxt->encoding == NULL) &&
11478
133k
          (ctxt->input->encoding != NULL))
11479
20.1k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11480
133k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11481
133k
          (!ctxt->disableSAX))
11482
126k
          ctxt->sax->startDocument(ctxt->userData);
11483
133k
      ctxt->instate = XML_PARSER_MISC;
11484
#ifdef DEBUG_PUSH
11485
      xmlGenericError(xmlGenericErrorContext,
11486
        "PP: entering MISC\n");
11487
#endif
11488
133k
        } else {
11489
15.2k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11490
15.2k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11491
15.2k
          (!ctxt->disableSAX))
11492
15.2k
          ctxt->sax->startDocument(ctxt->userData);
11493
15.2k
      ctxt->instate = XML_PARSER_MISC;
11494
#ifdef DEBUG_PUSH
11495
      xmlGenericError(xmlGenericErrorContext,
11496
        "PP: entering MISC\n");
11497
#endif
11498
15.2k
        }
11499
166k
    } else {
11500
166k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11501
166k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11502
166k
                  &xmlDefaultSAXLocator);
11503
166k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11504
166k
        if (ctxt->version == NULL) {
11505
0
            xmlErrMemory(ctxt, NULL);
11506
0
      break;
11507
0
        }
11508
166k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11509
166k
            (!ctxt->disableSAX))
11510
166k
      ctxt->sax->startDocument(ctxt->userData);
11511
166k
        ctxt->instate = XML_PARSER_MISC;
11512
#ifdef DEBUG_PUSH
11513
        xmlGenericError(xmlGenericErrorContext,
11514
          "PP: entering MISC\n");
11515
#endif
11516
166k
    }
11517
315k
    break;
11518
2.00M
            case XML_PARSER_START_TAG: {
11519
2.00M
          const xmlChar *name;
11520
2.00M
    const xmlChar *prefix = NULL;
11521
2.00M
    const xmlChar *URI = NULL;
11522
2.00M
                int line = ctxt->input->line;
11523
2.00M
    int nsNr = ctxt->nsNr;
11524
11525
2.00M
    if ((avail < 2) && (ctxt->inputNr == 1))
11526
0
        goto done;
11527
2.00M
    cur = ctxt->input->cur[0];
11528
2.00M
          if (cur != '<') {
11529
21.5k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11530
21.5k
        xmlHaltParser(ctxt);
11531
21.5k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11532
21.5k
      ctxt->sax->endDocument(ctxt->userData);
11533
21.5k
        goto done;
11534
21.5k
    }
11535
1.98M
    if (!terminate) {
11536
1.87M
        if (ctxt->progressive) {
11537
            /* > can be found unescaped in attribute values */
11538
1.87M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11539
241k
          goto done;
11540
1.87M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11541
0
      goto done;
11542
0
        }
11543
1.87M
    }
11544
1.74M
    if (ctxt->spaceNr == 0)
11545
11.3k
        spacePush(ctxt, -1);
11546
1.73M
    else if (*ctxt->space == -2)
11547
115k
        spacePush(ctxt, -1);
11548
1.61M
    else
11549
1.61M
        spacePush(ctxt, *ctxt->space);
11550
1.74M
#ifdef LIBXML_SAX1_ENABLED
11551
1.74M
    if (ctxt->sax2)
11552
993k
#endif /* LIBXML_SAX1_ENABLED */
11553
993k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11554
750k
#ifdef LIBXML_SAX1_ENABLED
11555
750k
    else
11556
750k
        name = xmlParseStartTag(ctxt);
11557
1.74M
#endif /* LIBXML_SAX1_ENABLED */
11558
1.74M
    if (ctxt->instate == XML_PARSER_EOF)
11559
0
        goto done;
11560
1.74M
    if (name == NULL) {
11561
16.7k
        spacePop(ctxt);
11562
16.7k
        xmlHaltParser(ctxt);
11563
16.7k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11564
16.7k
      ctxt->sax->endDocument(ctxt->userData);
11565
16.7k
        goto done;
11566
16.7k
    }
11567
1.72M
#ifdef LIBXML_VALID_ENABLED
11568
    /*
11569
     * [ VC: Root Element Type ]
11570
     * The Name in the document type declaration must match
11571
     * the element type of the root element.
11572
     */
11573
1.72M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11574
1.72M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11575
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11576
1.72M
#endif /* LIBXML_VALID_ENABLED */
11577
11578
    /*
11579
     * Check for an Empty Element.
11580
     */
11581
1.72M
    if ((RAW == '/') && (NXT(1) == '>')) {
11582
494k
        SKIP(2);
11583
11584
494k
        if (ctxt->sax2) {
11585
277k
      if ((ctxt->sax != NULL) &&
11586
277k
          (ctxt->sax->endElementNs != NULL) &&
11587
277k
          (!ctxt->disableSAX))
11588
276k
          ctxt->sax->endElementNs(ctxt->userData, name,
11589
276k
                                  prefix, URI);
11590
277k
      if (ctxt->nsNr - nsNr > 0)
11591
8.41k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11592
277k
#ifdef LIBXML_SAX1_ENABLED
11593
277k
        } else {
11594
217k
      if ((ctxt->sax != NULL) &&
11595
217k
          (ctxt->sax->endElement != NULL) &&
11596
217k
          (!ctxt->disableSAX))
11597
216k
          ctxt->sax->endElement(ctxt->userData, name);
11598
217k
#endif /* LIBXML_SAX1_ENABLED */
11599
217k
        }
11600
494k
        if (ctxt->instate == XML_PARSER_EOF)
11601
0
      goto done;
11602
494k
        spacePop(ctxt);
11603
494k
        if (ctxt->nameNr == 0) {
11604
8.35k
      ctxt->instate = XML_PARSER_EPILOG;
11605
486k
        } else {
11606
486k
      ctxt->instate = XML_PARSER_CONTENT;
11607
486k
        }
11608
494k
                    ctxt->progressive = 1;
11609
494k
        break;
11610
494k
    }
11611
1.23M
    if (RAW == '>') {
11612
1.02M
        NEXT;
11613
1.02M
    } else {
11614
208k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11615
208k
           "Couldn't find end of Start Tag %s\n",
11616
208k
           name);
11617
208k
        nodePop(ctxt);
11618
208k
        spacePop(ctxt);
11619
208k
    }
11620
1.23M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11621
11622
1.23M
    ctxt->instate = XML_PARSER_CONTENT;
11623
1.23M
                ctxt->progressive = 1;
11624
1.23M
                break;
11625
1.72M
      }
11626
4.89M
            case XML_PARSER_CONTENT: {
11627
4.89M
    int id;
11628
4.89M
    unsigned long cons;
11629
4.89M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
29.5k
        goto done;
11631
4.86M
    cur = ctxt->input->cur[0];
11632
4.86M
    next = ctxt->input->cur[1];
11633
11634
4.86M
    id = ctxt->input->id;
11635
4.86M
          cons = CUR_CONSUMED;
11636
4.86M
    if ((cur == '<') && (next == '/')) {
11637
555k
        ctxt->instate = XML_PARSER_END_TAG;
11638
555k
        break;
11639
4.30M
          } else if ((cur == '<') && (next == '?')) {
11640
24.7k
        if ((!terminate) &&
11641
24.7k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11642
8.80k
                        ctxt->progressive = XML_PARSER_PI;
11643
8.80k
      goto done;
11644
8.80k
                    }
11645
15.9k
        xmlParsePI(ctxt);
11646
15.9k
        ctxt->instate = XML_PARSER_CONTENT;
11647
15.9k
                    ctxt->progressive = 1;
11648
4.28M
    } else if ((cur == '<') && (next != '!')) {
11649
1.51M
        ctxt->instate = XML_PARSER_START_TAG;
11650
1.51M
        break;
11651
2.76M
    } else if ((cur == '<') && (next == '!') &&
11652
2.76M
               (ctxt->input->cur[2] == '-') &&
11653
2.76M
         (ctxt->input->cur[3] == '-')) {
11654
52.3k
        int term;
11655
11656
52.3k
              if (avail < 4)
11657
0
            goto done;
11658
52.3k
        ctxt->input->cur += 4;
11659
52.3k
        term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11660
52.3k
        ctxt->input->cur -= 4;
11661
52.3k
        if ((!terminate) && (term < 0)) {
11662
17.2k
                        ctxt->progressive = XML_PARSER_COMMENT;
11663
17.2k
      goto done;
11664
17.2k
                    }
11665
35.1k
        xmlParseComment(ctxt);
11666
35.1k
        ctxt->instate = XML_PARSER_CONTENT;
11667
35.1k
                    ctxt->progressive = 1;
11668
2.70M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11669
2.70M
        (ctxt->input->cur[2] == '[') &&
11670
2.70M
        (ctxt->input->cur[3] == 'C') &&
11671
2.70M
        (ctxt->input->cur[4] == 'D') &&
11672
2.70M
        (ctxt->input->cur[5] == 'A') &&
11673
2.70M
        (ctxt->input->cur[6] == 'T') &&
11674
2.70M
        (ctxt->input->cur[7] == 'A') &&
11675
2.70M
        (ctxt->input->cur[8] == '[')) {
11676
13.7k
        SKIP(9);
11677
13.7k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11678
13.7k
        break;
11679
2.69M
    } else if ((cur == '<') && (next == '!') &&
11680
2.69M
               (avail < 9)) {
11681
16.7k
        goto done;
11682
2.67M
    } else if (cur == '&') {
11683
217k
        if ((!terminate) &&
11684
217k
            (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11685
44.2k
      goto done;
11686
173k
        xmlParseReference(ctxt);
11687
2.46M
    } else {
11688
        /* TODO Avoid the extra copy, handle directly !!! */
11689
        /*
11690
         * Goal of the following test is:
11691
         *  - minimize calls to the SAX 'character' callback
11692
         *    when they are mergeable
11693
         *  - handle an problem for isBlank when we only parse
11694
         *    a sequence of blank chars and the next one is
11695
         *    not available to check against '<' presence.
11696
         *  - tries to homogenize the differences in SAX
11697
         *    callbacks between the push and pull versions
11698
         *    of the parser.
11699
         */
11700
2.46M
        if ((ctxt->inputNr == 1) &&
11701
2.46M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11702
2.03M
      if (!terminate) {
11703
1.94M
          if (ctxt->progressive) {
11704
1.94M
        if ((lastlt == NULL) ||
11705
1.94M
            (ctxt->input->cur > lastlt))
11706
152k
            goto done;
11707
1.94M
          } else if (xmlParseLookupSequence(ctxt,
11708
0
                                            '<', 0, 0) < 0) {
11709
0
        goto done;
11710
0
          }
11711
1.94M
      }
11712
2.03M
                    }
11713
2.30M
        ctxt->checkIndex = 0;
11714
2.30M
        xmlParseCharData(ctxt, 0);
11715
2.30M
    }
11716
2.53M
    if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11717
51.2k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11718
51.2k
                    "detected an error in element content\n");
11719
51.2k
        xmlHaltParser(ctxt);
11720
51.2k
        break;
11721
51.2k
    }
11722
2.48M
    break;
11723
2.53M
      }
11724
2.48M
            case XML_PARSER_END_TAG:
11725
582k
    if (avail < 2)
11726
0
        goto done;
11727
582k
    if (!terminate) {
11728
552k
        if (ctxt->progressive) {
11729
            /* > can be found unescaped in attribute values */
11730
552k
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11731
28.7k
          goto done;
11732
552k
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11733
0
      goto done;
11734
0
        }
11735
552k
    }
11736
554k
    if (ctxt->sax2) {
11737
338k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11738
338k
        nameNsPop(ctxt);
11739
338k
    }
11740
215k
#ifdef LIBXML_SAX1_ENABLED
11741
215k
      else
11742
215k
        xmlParseEndTag1(ctxt, 0);
11743
554k
#endif /* LIBXML_SAX1_ENABLED */
11744
554k
    if (ctxt->instate == XML_PARSER_EOF) {
11745
        /* Nothing */
11746
554k
    } else if (ctxt->nameNr == 0) {
11747
63.3k
        ctxt->instate = XML_PARSER_EPILOG;
11748
490k
    } else {
11749
490k
        ctxt->instate = XML_PARSER_CONTENT;
11750
490k
    }
11751
554k
    break;
11752
101k
            case XML_PARSER_CDATA_SECTION: {
11753
          /*
11754
     * The Push mode need to have the SAX callback for
11755
     * cdataBlock merge back contiguous callbacks.
11756
     */
11757
101k
    int base;
11758
11759
101k
    base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11760
101k
    if (base < 0) {
11761
74.9k
        if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11762
64.0k
            int tmp;
11763
11764
64.0k
      tmp = xmlCheckCdataPush(ctxt->input->cur,
11765
64.0k
                              XML_PARSER_BIG_BUFFER_SIZE, 0);
11766
64.0k
      if (tmp < 0) {
11767
2.29k
          tmp = -tmp;
11768
2.29k
          ctxt->input->cur += tmp;
11769
2.29k
          goto encoding_error;
11770
2.29k
      }
11771
61.7k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11772
61.7k
          if (ctxt->sax->cdataBlock != NULL)
11773
29.4k
        ctxt->sax->cdataBlock(ctxt->userData,
11774
29.4k
                              ctxt->input->cur, tmp);
11775
32.2k
          else if (ctxt->sax->characters != NULL)
11776
32.2k
        ctxt->sax->characters(ctxt->userData,
11777
32.2k
                              ctxt->input->cur, tmp);
11778
61.7k
      }
11779
61.7k
      if (ctxt->instate == XML_PARSER_EOF)
11780
0
          goto done;
11781
61.7k
      SKIPL(tmp);
11782
61.7k
      ctxt->checkIndex = 0;
11783
61.7k
        }
11784
72.6k
        goto done;
11785
74.9k
    } else {
11786
27.0k
        int tmp;
11787
11788
27.0k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11789
27.0k
        if ((tmp < 0) || (tmp != base)) {
11790
20.4k
      tmp = -tmp;
11791
20.4k
      ctxt->input->cur += tmp;
11792
20.4k
      goto encoding_error;
11793
20.4k
        }
11794
6.58k
        if ((ctxt->sax != NULL) && (base == 0) &&
11795
6.58k
            (ctxt->sax->cdataBlock != NULL) &&
11796
6.58k
            (!ctxt->disableSAX)) {
11797
      /*
11798
       * Special case to provide identical behaviour
11799
       * between pull and push parsers on enpty CDATA
11800
       * sections
11801
       */
11802
914
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11803
914
           (!strncmp((const char *)&ctxt->input->cur[-9],
11804
914
                     "<![CDATA[", 9)))
11805
903
           ctxt->sax->cdataBlock(ctxt->userData,
11806
903
                                 BAD_CAST "", 0);
11807
5.67k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11808
5.67k
      (!ctxt->disableSAX)) {
11809
5.27k
      if (ctxt->sax->cdataBlock != NULL)
11810
3.96k
          ctxt->sax->cdataBlock(ctxt->userData,
11811
3.96k
              ctxt->input->cur, base);
11812
1.31k
      else if (ctxt->sax->characters != NULL)
11813
1.31k
          ctxt->sax->characters(ctxt->userData,
11814
1.31k
              ctxt->input->cur, base);
11815
5.27k
        }
11816
6.58k
        if (ctxt->instate == XML_PARSER_EOF)
11817
0
      goto done;
11818
6.58k
        SKIPL(base + 3);
11819
6.58k
        ctxt->checkIndex = 0;
11820
6.58k
        ctxt->instate = XML_PARSER_CONTENT;
11821
#ifdef DEBUG_PUSH
11822
        xmlGenericError(xmlGenericErrorContext,
11823
          "PP: entering CONTENT\n");
11824
#endif
11825
6.58k
    }
11826
6.58k
    break;
11827
101k
      }
11828
395k
            case XML_PARSER_MISC:
11829
395k
    SKIP_BLANKS;
11830
395k
    if (ctxt->input->buf == NULL)
11831
0
        avail = ctxt->input->length -
11832
0
                (ctxt->input->cur - ctxt->input->base);
11833
395k
    else
11834
395k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11835
395k
                (ctxt->input->cur - ctxt->input->base);
11836
395k
    if (avail < 2)
11837
5.59k
        goto done;
11838
389k
    cur = ctxt->input->cur[0];
11839
389k
    next = ctxt->input->cur[1];
11840
389k
          if ((cur == '<') && (next == '?')) {
11841
29.9k
        if ((!terminate) &&
11842
29.9k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11843
5.39k
                        ctxt->progressive = XML_PARSER_PI;
11844
5.39k
      goto done;
11845
5.39k
                    }
11846
#ifdef DEBUG_PUSH
11847
        xmlGenericError(xmlGenericErrorContext,
11848
          "PP: Parsing PI\n");
11849
#endif
11850
24.5k
        xmlParsePI(ctxt);
11851
24.5k
        if (ctxt->instate == XML_PARSER_EOF)
11852
0
      goto done;
11853
24.5k
        ctxt->instate = XML_PARSER_MISC;
11854
24.5k
                    ctxt->progressive = 1;
11855
24.5k
        ctxt->checkIndex = 0;
11856
359k
    } else if ((cur == '<') && (next == '!') &&
11857
359k
        (ctxt->input->cur[2] == '-') &&
11858
359k
        (ctxt->input->cur[3] == '-')) {
11859
23.0k
        if ((!terminate) &&
11860
23.0k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11861
10.7k
                        ctxt->progressive = XML_PARSER_COMMENT;
11862
10.7k
      goto done;
11863
10.7k
                    }
11864
#ifdef DEBUG_PUSH
11865
        xmlGenericError(xmlGenericErrorContext,
11866
          "PP: Parsing Comment\n");
11867
#endif
11868
12.3k
        xmlParseComment(ctxt);
11869
12.3k
        if (ctxt->instate == XML_PARSER_EOF)
11870
0
      goto done;
11871
12.3k
        ctxt->instate = XML_PARSER_MISC;
11872
12.3k
                    ctxt->progressive = 1;
11873
12.3k
        ctxt->checkIndex = 0;
11874
336k
    } else if ((cur == '<') && (next == '!') &&
11875
336k
        (ctxt->input->cur[2] == 'D') &&
11876
336k
        (ctxt->input->cur[3] == 'O') &&
11877
336k
        (ctxt->input->cur[4] == 'C') &&
11878
336k
        (ctxt->input->cur[5] == 'T') &&
11879
336k
        (ctxt->input->cur[6] == 'Y') &&
11880
336k
        (ctxt->input->cur[7] == 'P') &&
11881
336k
        (ctxt->input->cur[8] == 'E')) {
11882
178k
        if ((!terminate) &&
11883
178k
            (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11884
27.9k
                        ctxt->progressive = XML_PARSER_DTD;
11885
27.9k
      goto done;
11886
27.9k
                    }
11887
#ifdef DEBUG_PUSH
11888
        xmlGenericError(xmlGenericErrorContext,
11889
          "PP: Parsing internal subset\n");
11890
#endif
11891
150k
        ctxt->inSubset = 1;
11892
150k
                    ctxt->progressive = 0;
11893
150k
        ctxt->checkIndex = 0;
11894
150k
        xmlParseDocTypeDecl(ctxt);
11895
150k
        if (ctxt->instate == XML_PARSER_EOF)
11896
0
      goto done;
11897
150k
        if (RAW == '[') {
11898
117k
      ctxt->instate = XML_PARSER_DTD;
11899
#ifdef DEBUG_PUSH
11900
      xmlGenericError(xmlGenericErrorContext,
11901
        "PP: entering DTD\n");
11902
#endif
11903
117k
        } else {
11904
      /*
11905
       * Create and update the external subset.
11906
       */
11907
33.1k
      ctxt->inSubset = 2;
11908
33.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11909
33.1k
          (ctxt->sax->externalSubset != NULL))
11910
31.2k
          ctxt->sax->externalSubset(ctxt->userData,
11911
31.2k
            ctxt->intSubName, ctxt->extSubSystem,
11912
31.2k
            ctxt->extSubURI);
11913
33.1k
      ctxt->inSubset = 0;
11914
33.1k
      xmlCleanSpecialAttr(ctxt);
11915
33.1k
      ctxt->instate = XML_PARSER_PROLOG;
11916
#ifdef DEBUG_PUSH
11917
      xmlGenericError(xmlGenericErrorContext,
11918
        "PP: entering PROLOG\n");
11919
#endif
11920
33.1k
        }
11921
157k
    } else if ((cur == '<') && (next == '!') &&
11922
157k
               (avail < 9)) {
11923
6.49k
        goto done;
11924
151k
    } else {
11925
151k
        ctxt->instate = XML_PARSER_START_TAG;
11926
151k
        ctxt->progressive = XML_PARSER_START_TAG;
11927
151k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11928
#ifdef DEBUG_PUSH
11929
        xmlGenericError(xmlGenericErrorContext,
11930
          "PP: entering START_TAG\n");
11931
#endif
11932
151k
    }
11933
338k
    break;
11934
338k
            case XML_PARSER_PROLOG:
11935
137k
    SKIP_BLANKS;
11936
137k
    if (ctxt->input->buf == NULL)
11937
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11938
137k
    else
11939
137k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11940
137k
                            (ctxt->input->cur - ctxt->input->base);
11941
137k
    if (avail < 2)
11942
2.40k
        goto done;
11943
135k
    cur = ctxt->input->cur[0];
11944
135k
    next = ctxt->input->cur[1];
11945
135k
          if ((cur == '<') && (next == '?')) {
11946
7.32k
        if ((!terminate) &&
11947
7.32k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11948
4.34k
                        ctxt->progressive = XML_PARSER_PI;
11949
4.34k
      goto done;
11950
4.34k
                    }
11951
#ifdef DEBUG_PUSH
11952
        xmlGenericError(xmlGenericErrorContext,
11953
          "PP: Parsing PI\n");
11954
#endif
11955
2.98k
        xmlParsePI(ctxt);
11956
2.98k
        if (ctxt->instate == XML_PARSER_EOF)
11957
0
      goto done;
11958
2.98k
        ctxt->instate = XML_PARSER_PROLOG;
11959
2.98k
                    ctxt->progressive = 1;
11960
127k
    } else if ((cur == '<') && (next == '!') &&
11961
127k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11962
18.0k
        if ((!terminate) &&
11963
18.0k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11964
5.34k
                        ctxt->progressive = XML_PARSER_COMMENT;
11965
5.34k
      goto done;
11966
5.34k
                    }
11967
#ifdef DEBUG_PUSH
11968
        xmlGenericError(xmlGenericErrorContext,
11969
          "PP: Parsing Comment\n");
11970
#endif
11971
12.6k
        xmlParseComment(ctxt);
11972
12.6k
        if (ctxt->instate == XML_PARSER_EOF)
11973
0
      goto done;
11974
12.6k
        ctxt->instate = XML_PARSER_PROLOG;
11975
12.6k
                    ctxt->progressive = 1;
11976
109k
    } else if ((cur == '<') && (next == '!') &&
11977
109k
               (avail < 4)) {
11978
251
        goto done;
11979
109k
    } else {
11980
109k
        ctxt->instate = XML_PARSER_START_TAG;
11981
109k
        if (ctxt->progressive == 0)
11982
98.5k
      ctxt->progressive = XML_PARSER_START_TAG;
11983
109k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11984
#ifdef DEBUG_PUSH
11985
        xmlGenericError(xmlGenericErrorContext,
11986
          "PP: entering START_TAG\n");
11987
#endif
11988
109k
    }
11989
125k
    break;
11990
125k
            case XML_PARSER_EPILOG:
11991
82.9k
    SKIP_BLANKS;
11992
82.9k
    if (ctxt->input->buf == NULL)
11993
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11994
82.9k
    else
11995
82.9k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11996
82.9k
                            (ctxt->input->cur - ctxt->input->base);
11997
82.9k
    if (avail < 2)
11998
61.4k
        goto done;
11999
21.5k
    cur = ctxt->input->cur[0];
12000
21.5k
    next = ctxt->input->cur[1];
12001
21.5k
          if ((cur == '<') && (next == '?')) {
12002
6.87k
        if ((!terminate) &&
12003
6.87k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12004
4.62k
                        ctxt->progressive = XML_PARSER_PI;
12005
4.62k
      goto done;
12006
4.62k
                    }
12007
#ifdef DEBUG_PUSH
12008
        xmlGenericError(xmlGenericErrorContext,
12009
          "PP: Parsing PI\n");
12010
#endif
12011
2.25k
        xmlParsePI(ctxt);
12012
2.25k
        if (ctxt->instate == XML_PARSER_EOF)
12013
0
      goto done;
12014
2.25k
        ctxt->instate = XML_PARSER_EPILOG;
12015
2.25k
                    ctxt->progressive = 1;
12016
14.6k
    } else if ((cur == '<') && (next == '!') &&
12017
14.6k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12018
4.88k
        if ((!terminate) &&
12019
4.88k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12020
3.60k
                        ctxt->progressive = XML_PARSER_COMMENT;
12021
3.60k
      goto done;
12022
3.60k
                    }
12023
#ifdef DEBUG_PUSH
12024
        xmlGenericError(xmlGenericErrorContext,
12025
          "PP: Parsing Comment\n");
12026
#endif
12027
1.28k
        xmlParseComment(ctxt);
12028
1.28k
        if (ctxt->instate == XML_PARSER_EOF)
12029
0
      goto done;
12030
1.28k
        ctxt->instate = XML_PARSER_EPILOG;
12031
1.28k
                    ctxt->progressive = 1;
12032
9.76k
    } else if ((cur == '<') && (next == '!') &&
12033
9.76k
               (avail < 4)) {
12034
980
        goto done;
12035
8.78k
    } else {
12036
8.78k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12037
8.78k
        xmlHaltParser(ctxt);
12038
#ifdef DEBUG_PUSH
12039
        xmlGenericError(xmlGenericErrorContext,
12040
          "PP: entering EOF\n");
12041
#endif
12042
8.78k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12043
8.78k
      ctxt->sax->endDocument(ctxt->userData);
12044
8.78k
        goto done;
12045
8.78k
    }
12046
3.53k
    break;
12047
277k
            case XML_PARSER_DTD: {
12048
          /*
12049
     * Sorry but progressive parsing of the internal subset
12050
     * is not expected to be supported. We first check that
12051
     * the full content of the internal subset is available and
12052
     * the parsing is launched only at that point.
12053
     * Internal subset ends up with "']' S? '>'" in an unescaped
12054
     * section and not in a ']]>' sequence which are conditional
12055
     * sections (whoever argued to keep that crap in XML deserve
12056
     * a place in hell !).
12057
     */
12058
277k
    int base, i;
12059
277k
    xmlChar *buf;
12060
277k
          xmlChar quote = 0;
12061
277k
                size_t use;
12062
12063
277k
    base = ctxt->input->cur - ctxt->input->base;
12064
277k
    if (base < 0) return(0);
12065
277k
    if (ctxt->checkIndex > base)
12066
114k
        base = ctxt->checkIndex;
12067
277k
    buf = xmlBufContent(ctxt->input->buf->buffer);
12068
277k
                use = xmlBufUse(ctxt->input->buf->buffer);
12069
287M
    for (;(unsigned int) base < use; base++) {
12070
287M
        if (quote != 0) {
12071
220M
            if (buf[base] == quote)
12072
833k
          quote = 0;
12073
220M
      continue;
12074
220M
        }
12075
66.4M
        if ((quote == 0) && (buf[base] == '<')) {
12076
1.67M
            int found  = 0;
12077
      /* special handling of comments */
12078
1.67M
            if (((unsigned int) base + 4 < use) &&
12079
1.67M
          (buf[base + 1] == '!') &&
12080
1.67M
          (buf[base + 2] == '-') &&
12081
1.67M
          (buf[base + 3] == '-')) {
12082
44.6M
          for (;(unsigned int) base + 3 < use; base++) {
12083
44.6M
        if ((buf[base] == '-') &&
12084
44.6M
            (buf[base + 1] == '-') &&
12085
44.6M
            (buf[base + 2] == '>')) {
12086
319k
            found = 1;
12087
319k
            base += 2;
12088
319k
            break;
12089
319k
        }
12090
44.6M
                }
12091
342k
          if (!found) {
12092
#if 0
12093
              fprintf(stderr, "unfinished comment\n");
12094
#endif
12095
23.1k
              break; /* for */
12096
23.1k
                }
12097
319k
                continue;
12098
342k
      }
12099
1.67M
        }
12100
66.1M
        if (buf[base] == '"') {
12101
789k
            quote = '"';
12102
789k
      continue;
12103
789k
        }
12104
65.3M
        if (buf[base] == '\'') {
12105
93.9k
            quote = '\'';
12106
93.9k
      continue;
12107
93.9k
        }
12108
65.2M
        if (buf[base] == ']') {
12109
#if 0
12110
            fprintf(stderr, "%c%c%c%c: ", buf[base],
12111
              buf[base + 1], buf[base + 2], buf[base + 3]);
12112
#endif
12113
129k
            if ((unsigned int) base +1 >= use)
12114
486
          break;
12115
129k
      if (buf[base + 1] == ']') {
12116
          /* conditional crap, skip both ']' ! */
12117
6.86k
          base++;
12118
6.86k
          continue;
12119
6.86k
      }
12120
203k
            for (i = 1; (unsigned int) base + i < use; i++) {
12121
203k
          if (buf[base + i] == '>') {
12122
#if 0
12123
              fprintf(stderr, "found\n");
12124
#endif
12125
106k
              goto found_end_int_subset;
12126
106k
          }
12127
96.8k
          if (!IS_BLANK_CH(buf[base + i])) {
12128
#if 0
12129
              fprintf(stderr, "not found\n");
12130
#endif
12131
15.7k
              goto not_end_of_int_subset;
12132
15.7k
          }
12133
96.8k
      }
12134
#if 0
12135
      fprintf(stderr, "end of stream\n");
12136
#endif
12137
179
            break;
12138
12139
122k
        }
12140
65.1M
not_end_of_int_subset:
12141
65.1M
                    continue; /* for */
12142
65.2M
    }
12143
    /*
12144
     * We didn't found the end of the Internal subset
12145
     */
12146
170k
                if (quote == 0)
12147
121k
                    ctxt->checkIndex = base;
12148
49.6k
                else
12149
49.6k
                    ctxt->checkIndex = 0;
12150
#ifdef DEBUG_PUSH
12151
    if (next == 0)
12152
        xmlGenericError(xmlGenericErrorContext,
12153
          "PP: lookup of int subset end filed\n");
12154
#endif
12155
170k
          goto done;
12156
12157
106k
found_end_int_subset:
12158
106k
                ctxt->checkIndex = 0;
12159
106k
    xmlParseInternalSubset(ctxt);
12160
106k
    if (ctxt->instate == XML_PARSER_EOF)
12161
2.78k
        goto done;
12162
103k
    ctxt->inSubset = 2;
12163
103k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12164
103k
        (ctxt->sax->externalSubset != NULL))
12165
86.1k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12166
86.1k
          ctxt->extSubSystem, ctxt->extSubURI);
12167
103k
    ctxt->inSubset = 0;
12168
103k
    xmlCleanSpecialAttr(ctxt);
12169
103k
    if (ctxt->instate == XML_PARSER_EOF)
12170
885
        goto done;
12171
102k
    ctxt->instate = XML_PARSER_PROLOG;
12172
102k
    ctxt->checkIndex = 0;
12173
#ifdef DEBUG_PUSH
12174
    xmlGenericError(xmlGenericErrorContext,
12175
      "PP: entering PROLOG\n");
12176
#endif
12177
102k
                break;
12178
103k
      }
12179
0
            case XML_PARSER_COMMENT:
12180
0
    xmlGenericError(xmlGenericErrorContext,
12181
0
      "PP: internal error, state == COMMENT\n");
12182
0
    ctxt->instate = XML_PARSER_CONTENT;
12183
#ifdef DEBUG_PUSH
12184
    xmlGenericError(xmlGenericErrorContext,
12185
      "PP: entering CONTENT\n");
12186
#endif
12187
0
    break;
12188
0
            case XML_PARSER_IGNORE:
12189
0
    xmlGenericError(xmlGenericErrorContext,
12190
0
      "PP: internal error, state == IGNORE");
12191
0
          ctxt->instate = XML_PARSER_DTD;
12192
#ifdef DEBUG_PUSH
12193
    xmlGenericError(xmlGenericErrorContext,
12194
      "PP: entering DTD\n");
12195
#endif
12196
0
          break;
12197
0
            case XML_PARSER_PI:
12198
0
    xmlGenericError(xmlGenericErrorContext,
12199
0
      "PP: internal error, state == PI\n");
12200
0
    ctxt->instate = XML_PARSER_CONTENT;
12201
#ifdef DEBUG_PUSH
12202
    xmlGenericError(xmlGenericErrorContext,
12203
      "PP: entering CONTENT\n");
12204
#endif
12205
0
    break;
12206
0
            case XML_PARSER_ENTITY_DECL:
12207
0
    xmlGenericError(xmlGenericErrorContext,
12208
0
      "PP: internal error, state == ENTITY_DECL\n");
12209
0
    ctxt->instate = XML_PARSER_DTD;
12210
#ifdef DEBUG_PUSH
12211
    xmlGenericError(xmlGenericErrorContext,
12212
      "PP: entering DTD\n");
12213
#endif
12214
0
    break;
12215
0
            case XML_PARSER_ENTITY_VALUE:
12216
0
    xmlGenericError(xmlGenericErrorContext,
12217
0
      "PP: internal error, state == ENTITY_VALUE\n");
12218
0
    ctxt->instate = XML_PARSER_CONTENT;
12219
#ifdef DEBUG_PUSH
12220
    xmlGenericError(xmlGenericErrorContext,
12221
      "PP: entering DTD\n");
12222
#endif
12223
0
    break;
12224
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12225
0
    xmlGenericError(xmlGenericErrorContext,
12226
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12227
0
    ctxt->instate = XML_PARSER_START_TAG;
12228
#ifdef DEBUG_PUSH
12229
    xmlGenericError(xmlGenericErrorContext,
12230
      "PP: entering START_TAG\n");
12231
#endif
12232
0
    break;
12233
0
            case XML_PARSER_SYSTEM_LITERAL:
12234
0
    xmlGenericError(xmlGenericErrorContext,
12235
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12236
0
    ctxt->instate = XML_PARSER_START_TAG;
12237
#ifdef DEBUG_PUSH
12238
    xmlGenericError(xmlGenericErrorContext,
12239
      "PP: entering START_TAG\n");
12240
#endif
12241
0
    break;
12242
0
            case XML_PARSER_PUBLIC_LITERAL:
12243
0
    xmlGenericError(xmlGenericErrorContext,
12244
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12245
0
    ctxt->instate = XML_PARSER_START_TAG;
12246
#ifdef DEBUG_PUSH
12247
    xmlGenericError(xmlGenericErrorContext,
12248
      "PP: entering START_TAG\n");
12249
#endif
12250
0
    break;
12251
9.07M
  }
12252
9.07M
    }
12253
1.16M
done:
12254
#ifdef DEBUG_PUSH
12255
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12256
#endif
12257
1.16M
    return(ret);
12258
22.7k
encoding_error:
12259
22.7k
    {
12260
22.7k
        char buffer[150];
12261
12262
22.7k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12263
22.7k
      ctxt->input->cur[0], ctxt->input->cur[1],
12264
22.7k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12265
22.7k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12266
22.7k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12267
22.7k
         BAD_CAST buffer, NULL);
12268
22.7k
    }
12269
22.7k
    return(0);
12270
1.39M
}
12271
12272
/**
12273
 * xmlParseCheckTransition:
12274
 * @ctxt:  an XML parser context
12275
 * @chunk:  a char array
12276
 * @size:  the size in byte of the chunk
12277
 *
12278
 * Check depending on the current parser state if the chunk given must be
12279
 * processed immediately or one need more data to advance on parsing.
12280
 *
12281
 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12282
 */
12283
static int
12284
1.39M
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12285
1.39M
    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12286
0
        return(-1);
12287
1.39M
    if (ctxt->instate == XML_PARSER_START_TAG) {
12288
376k
        if (memchr(chunk, '>', size) != NULL)
12289
190k
            return(1);
12290
186k
        return(0);
12291
376k
    }
12292
1.01M
    if (ctxt->progressive == XML_PARSER_COMMENT) {
12293
54.8k
        if (memchr(chunk, '>', size) != NULL)
12294
32.1k
            return(1);
12295
22.7k
        return(0);
12296
54.8k
    }
12297
964k
    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12298
168k
        if (memchr(chunk, '>', size) != NULL)
12299
83.7k
            return(1);
12300
84.3k
        return(0);
12301
168k
    }
12302
796k
    if (ctxt->progressive == XML_PARSER_PI) {
12303
30.8k
        if (memchr(chunk, '>', size) != NULL)
12304
19.9k
            return(1);
12305
10.8k
        return(0);
12306
30.8k
    }
12307
765k
    if (ctxt->instate == XML_PARSER_END_TAG) {
12308
25.7k
        if (memchr(chunk, '>', size) != NULL)
12309
22.5k
            return(1);
12310
3.15k
        return(0);
12311
25.7k
    }
12312
739k
    if ((ctxt->progressive == XML_PARSER_DTD) ||
12313
739k
        (ctxt->instate == XML_PARSER_DTD)) {
12314
248k
        if (memchr(chunk, '>', size) != NULL)
12315
171k
            return(1);
12316
77.3k
        return(0);
12317
248k
    }
12318
491k
    return(1);
12319
739k
}
12320
12321
/**
12322
 * xmlParseChunk:
12323
 * @ctxt:  an XML parser context
12324
 * @chunk:  an char array
12325
 * @size:  the size in byte of the chunk
12326
 * @terminate:  last chunk indicator
12327
 *
12328
 * Parse a Chunk of memory
12329
 *
12330
 * Returns zero if no error, the xmlParserErrors otherwise.
12331
 */
12332
int
12333
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12334
2.37M
              int terminate) {
12335
2.37M
    int end_in_lf = 0;
12336
2.37M
    int remain = 0;
12337
2.37M
    size_t old_avail = 0;
12338
2.37M
    size_t avail = 0;
12339
12340
2.37M
    if (ctxt == NULL)
12341
0
        return(XML_ERR_INTERNAL_ERROR);
12342
2.37M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12343
613k
        return(ctxt->errNo);
12344
1.75M
    if (ctxt->instate == XML_PARSER_EOF)
12345
152
        return(-1);
12346
1.75M
    if (ctxt->instate == XML_PARSER_START)
12347
424k
        xmlDetectSAX2(ctxt);
12348
1.75M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12349
1.75M
        (chunk[size - 1] == '\r')) {
12350
12.3k
  end_in_lf = 1;
12351
12.3k
  size--;
12352
12.3k
    }
12353
12354
1.77M
xmldecl_done:
12355
12356
1.77M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12357
1.77M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12358
1.57M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12359
1.57M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12360
1.57M
  int res;
12361
12362
1.57M
        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12363
        /*
12364
         * Specific handling if we autodetected an encoding, we should not
12365
         * push more than the first line ... which depend on the encoding
12366
         * And only push the rest once the final encoding was detected
12367
         */
12368
1.57M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12369
1.57M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12370
27.1k
            unsigned int len = 45;
12371
12372
27.1k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12373
27.1k
                               BAD_CAST "UTF-16")) ||
12374
27.1k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12375
5.72k
                               BAD_CAST "UTF16")))
12376
21.4k
                len = 90;
12377
5.72k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12378
5.72k
                                    BAD_CAST "UCS-4")) ||
12379
5.72k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12380
5.58k
                                    BAD_CAST "UCS4")))
12381
132
                len = 180;
12382
12383
27.1k
            if (ctxt->input->buf->rawconsumed < len)
12384
1.77k
                len -= ctxt->input->buf->rawconsumed;
12385
12386
            /*
12387
             * Change size for reading the initial declaration only
12388
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12389
             * will blindly copy extra bytes from memory.
12390
             */
12391
27.1k
            if ((unsigned int) size > len) {
12392
18.0k
                remain = size - len;
12393
18.0k
                size = len;
12394
18.0k
            } else {
12395
9.09k
                remain = 0;
12396
9.09k
            }
12397
27.1k
        }
12398
1.57M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12399
1.57M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12400
1.57M
  if (res < 0) {
12401
388
      ctxt->errNo = XML_PARSER_EOF;
12402
388
      xmlHaltParser(ctxt);
12403
388
      return (XML_PARSER_EOF);
12404
388
  }
12405
#ifdef DEBUG_PUSH
12406
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12407
#endif
12408
12409
1.57M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12410
199k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12411
199k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12412
199k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12413
199k
        (in->raw != NULL)) {
12414
11.2k
    int nbchars;
12415
11.2k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12416
11.2k
    size_t current = ctxt->input->cur - ctxt->input->base;
12417
12418
11.2k
    nbchars = xmlCharEncInput(in, terminate);
12419
11.2k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12420
11.2k
    if (nbchars < 0) {
12421
        /* TODO 2.6.0 */
12422
301
        xmlGenericError(xmlGenericErrorContext,
12423
301
            "xmlParseChunk: encoder error\n");
12424
301
                    xmlHaltParser(ctxt);
12425
301
        return(XML_ERR_INVALID_ENCODING);
12426
301
    }
12427
11.2k
      }
12428
199k
  }
12429
199k
    }
12430
1.77M
    if (remain != 0) {
12431
17.9k
        xmlParseTryOrFinish(ctxt, 0);
12432
1.75M
    } else {
12433
1.75M
        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12434
1.75M
            avail = xmlBufUse(ctxt->input->buf->buffer);
12435
        /*
12436
         * Depending on the current state it may not be such
12437
         * a good idea to try parsing if there is nothing in the chunk
12438
         * which would be worth doing a parser state transition and we
12439
         * need to wait for more data
12440
         */
12441
1.75M
        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12442
1.75M
            (old_avail == 0) || (avail == 0) ||
12443
1.75M
            (xmlParseCheckTransition(ctxt,
12444
1.39M
                       (const char *)&ctxt->input->base[old_avail],
12445
1.39M
                                     avail - old_avail)))
12446
1.37M
            xmlParseTryOrFinish(ctxt, terminate);
12447
1.75M
    }
12448
1.77M
    if (ctxt->instate == XML_PARSER_EOF)
12449
104k
        return(ctxt->errNo);
12450
12451
1.67M
    if ((ctxt->input != NULL) &&
12452
1.67M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12453
1.67M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12454
1.67M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12455
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12456
0
        xmlHaltParser(ctxt);
12457
0
    }
12458
1.67M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12459
77.8k
        return(ctxt->errNo);
12460
12461
1.59M
    if (remain != 0) {
12462
17.6k
        chunk += size;
12463
17.6k
        size = remain;
12464
17.6k
        remain = 0;
12465
17.6k
        goto xmldecl_done;
12466
17.6k
    }
12467
1.57M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12468
1.57M
        (ctxt->input->buf != NULL)) {
12469
11.7k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12470
11.7k
           ctxt->input);
12471
11.7k
  size_t current = ctxt->input->cur - ctxt->input->base;
12472
12473
11.7k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12474
12475
11.7k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12476
11.7k
            base, current);
12477
11.7k
    }
12478
1.57M
    if (terminate) {
12479
  /*
12480
   * Check for termination
12481
   */
12482
100k
  int cur_avail = 0;
12483
12484
100k
  if (ctxt->input != NULL) {
12485
100k
      if (ctxt->input->buf == NULL)
12486
0
    cur_avail = ctxt->input->length -
12487
0
          (ctxt->input->cur - ctxt->input->base);
12488
100k
      else
12489
100k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12490
100k
                    (ctxt->input->cur - ctxt->input->base);
12491
100k
  }
12492
12493
100k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12494
100k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12495
43.1k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12496
43.1k
  }
12497
100k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12498
458
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12499
458
  }
12500
100k
  if (ctxt->instate != XML_PARSER_EOF) {
12501
100k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12502
100k
    ctxt->sax->endDocument(ctxt->userData);
12503
100k
  }
12504
100k
  ctxt->instate = XML_PARSER_EOF;
12505
100k
    }
12506
1.57M
    if (ctxt->wellFormed == 0)
12507
462k
  return((xmlParserErrors) ctxt->errNo);
12508
1.11M
    else
12509
1.11M
        return(0);
12510
1.57M
}
12511
12512
/************************************************************************
12513
 *                  *
12514
 *    I/O front end functions to the parser     *
12515
 *                  *
12516
 ************************************************************************/
12517
12518
/**
12519
 * xmlCreatePushParserCtxt:
12520
 * @sax:  a SAX handler
12521
 * @user_data:  The user data returned on SAX callbacks
12522
 * @chunk:  a pointer to an array of chars
12523
 * @size:  number of chars in the array
12524
 * @filename:  an optional file name or URI
12525
 *
12526
 * Create a parser context for using the XML parser in push mode.
12527
 * If @buffer and @size are non-NULL, the data is used to detect
12528
 * the encoding.  The remaining characters will be parsed so they
12529
 * don't need to be fed in again through xmlParseChunk.
12530
 * To allow content encoding detection, @size should be >= 4
12531
 * The value of @filename is used for fetching external entities
12532
 * and error/warning reports.
12533
 *
12534
 * Returns the new parser context or NULL
12535
 */
12536
12537
xmlParserCtxtPtr
12538
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12539
317k
                        const char *chunk, int size, const char *filename) {
12540
317k
    xmlParserCtxtPtr ctxt;
12541
317k
    xmlParserInputPtr inputStream;
12542
317k
    xmlParserInputBufferPtr buf;
12543
317k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12544
12545
    /*
12546
     * plug some encoding conversion routines
12547
     */
12548
317k
    if ((chunk != NULL) && (size >= 4))
12549
158k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12550
12551
317k
    buf = xmlAllocParserInputBuffer(enc);
12552
317k
    if (buf == NULL) return(NULL);
12553
12554
317k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12555
317k
    if (ctxt == NULL) {
12556
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12557
0
  xmlFreeParserInputBuffer(buf);
12558
0
  return(NULL);
12559
0
    }
12560
317k
    ctxt->dictNames = 1;
12561
317k
    if (filename == NULL) {
12562
158k
  ctxt->directory = NULL;
12563
158k
    } else {
12564
158k
        ctxt->directory = xmlParserGetDirectory(filename);
12565
158k
    }
12566
12567
317k
    inputStream = xmlNewInputStream(ctxt);
12568
317k
    if (inputStream == NULL) {
12569
0
  xmlFreeParserCtxt(ctxt);
12570
0
  xmlFreeParserInputBuffer(buf);
12571
0
  return(NULL);
12572
0
    }
12573
12574
317k
    if (filename == NULL)
12575
158k
  inputStream->filename = NULL;
12576
158k
    else {
12577
158k
  inputStream->filename = (char *)
12578
158k
      xmlCanonicPath((const xmlChar *) filename);
12579
158k
  if (inputStream->filename == NULL) {
12580
0
      xmlFreeParserCtxt(ctxt);
12581
0
      xmlFreeParserInputBuffer(buf);
12582
0
      return(NULL);
12583
0
  }
12584
158k
    }
12585
317k
    inputStream->buf = buf;
12586
317k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12587
317k
    inputPush(ctxt, inputStream);
12588
12589
    /*
12590
     * If the caller didn't provide an initial 'chunk' for determining
12591
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12592
     * that it can be automatically determined later
12593
     */
12594
317k
    if ((size == 0) || (chunk == NULL)) {
12595
159k
  ctxt->charset = XML_CHAR_ENCODING_NONE;
12596
159k
    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12597
158k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12598
158k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12599
12600
158k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12601
12602
158k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12603
#ifdef DEBUG_PUSH
12604
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12605
#endif
12606
158k
    }
12607
12608
317k
    if (enc != XML_CHAR_ENCODING_NONE) {
12609
72.1k
        xmlSwitchEncoding(ctxt, enc);
12610
72.1k
    }
12611
12612
317k
    return(ctxt);
12613
317k
}
12614
#endif /* LIBXML_PUSH_ENABLED */
12615
12616
/**
12617
 * xmlHaltParser:
12618
 * @ctxt:  an XML parser context
12619
 *
12620
 * Blocks further parser processing don't override error
12621
 * for internal use
12622
 */
12623
static void
12624
843k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12625
843k
    if (ctxt == NULL)
12626
0
        return;
12627
843k
    ctxt->instate = XML_PARSER_EOF;
12628
843k
    ctxt->disableSAX = 1;
12629
844k
    while (ctxt->inputNr > 1)
12630
1.61k
        xmlFreeInputStream(inputPop(ctxt));
12631
843k
    if (ctxt->input != NULL) {
12632
        /*
12633
   * in case there was a specific allocation deallocate before
12634
   * overriding base
12635
   */
12636
843k
        if (ctxt->input->free != NULL) {
12637
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12638
0
      ctxt->input->free = NULL;
12639
0
  }
12640
843k
        if (ctxt->input->buf != NULL) {
12641
768k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12642
768k
            ctxt->input->buf = NULL;
12643
768k
        }
12644
843k
  ctxt->input->cur = BAD_CAST"";
12645
843k
        ctxt->input->length = 0;
12646
843k
  ctxt->input->base = ctxt->input->cur;
12647
843k
        ctxt->input->end = ctxt->input->cur;
12648
843k
    }
12649
843k
}
12650
12651
/**
12652
 * xmlStopParser:
12653
 * @ctxt:  an XML parser context
12654
 *
12655
 * Blocks further parser processing
12656
 */
12657
void
12658
159k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12659
159k
    if (ctxt == NULL)
12660
0
        return;
12661
159k
    xmlHaltParser(ctxt);
12662
159k
    ctxt->errNo = XML_ERR_USER_STOP;
12663
159k
}
12664
12665
/**
12666
 * xmlCreateIOParserCtxt:
12667
 * @sax:  a SAX handler
12668
 * @user_data:  The user data returned on SAX callbacks
12669
 * @ioread:  an I/O read function
12670
 * @ioclose:  an I/O close function
12671
 * @ioctx:  an I/O handler
12672
 * @enc:  the charset encoding if known
12673
 *
12674
 * Create a parser context for using the XML parser with an existing
12675
 * I/O stream
12676
 *
12677
 * Returns the new parser context or NULL
12678
 */
12679
xmlParserCtxtPtr
12680
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12681
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12682
0
  void *ioctx, xmlCharEncoding enc) {
12683
0
    xmlParserCtxtPtr ctxt;
12684
0
    xmlParserInputPtr inputStream;
12685
0
    xmlParserInputBufferPtr buf;
12686
12687
0
    if (ioread == NULL) return(NULL);
12688
12689
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12690
0
    if (buf == NULL) {
12691
0
        if (ioclose != NULL)
12692
0
            ioclose(ioctx);
12693
0
        return (NULL);
12694
0
    }
12695
12696
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12697
0
    if (ctxt == NULL) {
12698
0
  xmlFreeParserInputBuffer(buf);
12699
0
  return(NULL);
12700
0
    }
12701
12702
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12703
0
    if (inputStream == NULL) {
12704
0
  xmlFreeParserCtxt(ctxt);
12705
0
  return(NULL);
12706
0
    }
12707
0
    inputPush(ctxt, inputStream);
12708
12709
0
    return(ctxt);
12710
0
}
12711
12712
#ifdef LIBXML_VALID_ENABLED
12713
/************************************************************************
12714
 *                  *
12715
 *    Front ends when parsing a DTD       *
12716
 *                  *
12717
 ************************************************************************/
12718
12719
/**
12720
 * xmlIOParseDTD:
12721
 * @sax:  the SAX handler block or NULL
12722
 * @input:  an Input Buffer
12723
 * @enc:  the charset encoding if known
12724
 *
12725
 * Load and parse a DTD
12726
 *
12727
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12728
 * @input will be freed by the function in any case.
12729
 */
12730
12731
xmlDtdPtr
12732
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12733
0
        xmlCharEncoding enc) {
12734
0
    xmlDtdPtr ret = NULL;
12735
0
    xmlParserCtxtPtr ctxt;
12736
0
    xmlParserInputPtr pinput = NULL;
12737
0
    xmlChar start[4];
12738
12739
0
    if (input == NULL)
12740
0
  return(NULL);
12741
12742
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12743
0
    if (ctxt == NULL) {
12744
0
        xmlFreeParserInputBuffer(input);
12745
0
  return(NULL);
12746
0
    }
12747
12748
    /* We are loading a DTD */
12749
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12750
12751
0
    xmlDetectSAX2(ctxt);
12752
12753
    /*
12754
     * generate a parser input from the I/O handler
12755
     */
12756
12757
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12758
0
    if (pinput == NULL) {
12759
0
        xmlFreeParserInputBuffer(input);
12760
0
  xmlFreeParserCtxt(ctxt);
12761
0
  return(NULL);
12762
0
    }
12763
12764
    /*
12765
     * plug some encoding conversion routines here.
12766
     */
12767
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12768
0
  xmlFreeParserCtxt(ctxt);
12769
0
  return(NULL);
12770
0
    }
12771
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12772
0
        xmlSwitchEncoding(ctxt, enc);
12773
0
    }
12774
12775
0
    pinput->filename = NULL;
12776
0
    pinput->line = 1;
12777
0
    pinput->col = 1;
12778
0
    pinput->base = ctxt->input->cur;
12779
0
    pinput->cur = ctxt->input->cur;
12780
0
    pinput->free = NULL;
12781
12782
    /*
12783
     * let's parse that entity knowing it's an external subset.
12784
     */
12785
0
    ctxt->inSubset = 2;
12786
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12787
0
    if (ctxt->myDoc == NULL) {
12788
0
  xmlErrMemory(ctxt, "New Doc failed");
12789
0
  return(NULL);
12790
0
    }
12791
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12792
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793
0
                                 BAD_CAST "none", BAD_CAST "none");
12794
12795
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12796
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12797
  /*
12798
   * Get the 4 first bytes and decode the charset
12799
   * if enc != XML_CHAR_ENCODING_NONE
12800
   * plug some encoding conversion routines.
12801
   */
12802
0
  start[0] = RAW;
12803
0
  start[1] = NXT(1);
12804
0
  start[2] = NXT(2);
12805
0
  start[3] = NXT(3);
12806
0
  enc = xmlDetectCharEncoding(start, 4);
12807
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12808
0
      xmlSwitchEncoding(ctxt, enc);
12809
0
  }
12810
0
    }
12811
12812
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12813
12814
0
    if (ctxt->myDoc != NULL) {
12815
0
  if (ctxt->wellFormed) {
12816
0
      ret = ctxt->myDoc->extSubset;
12817
0
      ctxt->myDoc->extSubset = NULL;
12818
0
      if (ret != NULL) {
12819
0
    xmlNodePtr tmp;
12820
12821
0
    ret->doc = NULL;
12822
0
    tmp = ret->children;
12823
0
    while (tmp != NULL) {
12824
0
        tmp->doc = NULL;
12825
0
        tmp = tmp->next;
12826
0
    }
12827
0
      }
12828
0
  } else {
12829
0
      ret = NULL;
12830
0
  }
12831
0
        xmlFreeDoc(ctxt->myDoc);
12832
0
        ctxt->myDoc = NULL;
12833
0
    }
12834
0
    xmlFreeParserCtxt(ctxt);
12835
12836
0
    return(ret);
12837
0
}
12838
12839
/**
12840
 * xmlSAXParseDTD:
12841
 * @sax:  the SAX handler block
12842
 * @ExternalID:  a NAME* containing the External ID of the DTD
12843
 * @SystemID:  a NAME* containing the URL to the DTD
12844
 *
12845
 * DEPRECATED: Don't use.
12846
 *
12847
 * Load and parse an external subset.
12848
 *
12849
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12850
 */
12851
12852
xmlDtdPtr
12853
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12854
0
                          const xmlChar *SystemID) {
12855
0
    xmlDtdPtr ret = NULL;
12856
0
    xmlParserCtxtPtr ctxt;
12857
0
    xmlParserInputPtr input = NULL;
12858
0
    xmlCharEncoding enc;
12859
0
    xmlChar* systemIdCanonic;
12860
12861
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12862
12863
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12864
0
    if (ctxt == NULL) {
12865
0
  return(NULL);
12866
0
    }
12867
12868
    /* We are loading a DTD */
12869
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12870
12871
    /*
12872
     * Canonicalise the system ID
12873
     */
12874
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12875
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12876
0
  xmlFreeParserCtxt(ctxt);
12877
0
  return(NULL);
12878
0
    }
12879
12880
    /*
12881
     * Ask the Entity resolver to load the damn thing
12882
     */
12883
12884
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12885
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12886
0
                                   systemIdCanonic);
12887
0
    if (input == NULL) {
12888
0
  xmlFreeParserCtxt(ctxt);
12889
0
  if (systemIdCanonic != NULL)
12890
0
      xmlFree(systemIdCanonic);
12891
0
  return(NULL);
12892
0
    }
12893
12894
    /*
12895
     * plug some encoding conversion routines here.
12896
     */
12897
0
    if (xmlPushInput(ctxt, input) < 0) {
12898
0
  xmlFreeParserCtxt(ctxt);
12899
0
  if (systemIdCanonic != NULL)
12900
0
      xmlFree(systemIdCanonic);
12901
0
  return(NULL);
12902
0
    }
12903
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12904
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12905
0
  xmlSwitchEncoding(ctxt, enc);
12906
0
    }
12907
12908
0
    if (input->filename == NULL)
12909
0
  input->filename = (char *) systemIdCanonic;
12910
0
    else
12911
0
  xmlFree(systemIdCanonic);
12912
0
    input->line = 1;
12913
0
    input->col = 1;
12914
0
    input->base = ctxt->input->cur;
12915
0
    input->cur = ctxt->input->cur;
12916
0
    input->free = NULL;
12917
12918
    /*
12919
     * let's parse that entity knowing it's an external subset.
12920
     */
12921
0
    ctxt->inSubset = 2;
12922
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12923
0
    if (ctxt->myDoc == NULL) {
12924
0
  xmlErrMemory(ctxt, "New Doc failed");
12925
0
  xmlFreeParserCtxt(ctxt);
12926
0
  return(NULL);
12927
0
    }
12928
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12929
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12930
0
                                 ExternalID, SystemID);
12931
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12932
12933
0
    if (ctxt->myDoc != NULL) {
12934
0
  if (ctxt->wellFormed) {
12935
0
      ret = ctxt->myDoc->extSubset;
12936
0
      ctxt->myDoc->extSubset = NULL;
12937
0
      if (ret != NULL) {
12938
0
    xmlNodePtr tmp;
12939
12940
0
    ret->doc = NULL;
12941
0
    tmp = ret->children;
12942
0
    while (tmp != NULL) {
12943
0
        tmp->doc = NULL;
12944
0
        tmp = tmp->next;
12945
0
    }
12946
0
      }
12947
0
  } else {
12948
0
      ret = NULL;
12949
0
  }
12950
0
        xmlFreeDoc(ctxt->myDoc);
12951
0
        ctxt->myDoc = NULL;
12952
0
    }
12953
0
    xmlFreeParserCtxt(ctxt);
12954
12955
0
    return(ret);
12956
0
}
12957
12958
12959
/**
12960
 * xmlParseDTD:
12961
 * @ExternalID:  a NAME* containing the External ID of the DTD
12962
 * @SystemID:  a NAME* containing the URL to the DTD
12963
 *
12964
 * Load and parse an external subset.
12965
 *
12966
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12967
 */
12968
12969
xmlDtdPtr
12970
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12971
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12972
0
}
12973
#endif /* LIBXML_VALID_ENABLED */
12974
12975
/************************************************************************
12976
 *                  *
12977
 *    Front ends when parsing an Entity     *
12978
 *                  *
12979
 ************************************************************************/
12980
12981
/**
12982
 * xmlParseCtxtExternalEntity:
12983
 * @ctx:  the existing parsing context
12984
 * @URL:  the URL for the entity to load
12985
 * @ID:  the System ID for the entity to load
12986
 * @lst:  the return value for the set of parsed nodes
12987
 *
12988
 * Parse an external general entity within an existing parsing context
12989
 * An external general parsed entity is well-formed if it matches the
12990
 * production labeled extParsedEnt.
12991
 *
12992
 * [78] extParsedEnt ::= TextDecl? content
12993
 *
12994
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12995
 *    the parser error code otherwise
12996
 */
12997
12998
int
12999
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13000
0
                 const xmlChar *ID, xmlNodePtr *lst) {
13001
0
    void *userData;
13002
13003
0
    if (ctx == NULL) return(-1);
13004
    /*
13005
     * If the user provided their own SAX callbacks, then reuse the
13006
     * userData callback field, otherwise the expected setup in a
13007
     * DOM builder is to have userData == ctxt
13008
     */
13009
0
    if (ctx->userData == ctx)
13010
0
        userData = NULL;
13011
0
    else
13012
0
        userData = ctx->userData;
13013
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
13014
0
                                         userData, ctx->depth + 1,
13015
0
                                         URL, ID, lst);
13016
0
}
13017
13018
/**
13019
 * xmlParseExternalEntityPrivate:
13020
 * @doc:  the document the chunk pertains to
13021
 * @oldctxt:  the previous parser context if available
13022
 * @sax:  the SAX handler block (possibly NULL)
13023
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13024
 * @depth:  Used for loop detection, use 0
13025
 * @URL:  the URL for the entity to load
13026
 * @ID:  the System ID for the entity to load
13027
 * @list:  the return value for the set of parsed nodes
13028
 *
13029
 * Private version of xmlParseExternalEntity()
13030
 *
13031
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13032
 *    the parser error code otherwise
13033
 */
13034
13035
static xmlParserErrors
13036
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13037
                xmlSAXHandlerPtr sax,
13038
          void *user_data, int depth, const xmlChar *URL,
13039
2.49M
          const xmlChar *ID, xmlNodePtr *list) {
13040
2.49M
    xmlParserCtxtPtr ctxt;
13041
2.49M
    xmlDocPtr newDoc;
13042
2.49M
    xmlNodePtr newRoot;
13043
2.49M
    xmlParserErrors ret = XML_ERR_OK;
13044
2.49M
    xmlChar start[4];
13045
2.49M
    xmlCharEncoding enc;
13046
13047
2.49M
    if (((depth > 40) &&
13048
2.49M
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13049
2.49M
  (depth > 1024)) {
13050
2.41k
  return(XML_ERR_ENTITY_LOOP);
13051
2.41k
    }
13052
13053
2.48M
    if (list != NULL)
13054
2.47M
        *list = NULL;
13055
2.48M
    if ((URL == NULL) && (ID == NULL))
13056
199
  return(XML_ERR_INTERNAL_ERROR);
13057
2.48M
    if (doc == NULL)
13058
0
  return(XML_ERR_INTERNAL_ERROR);
13059
13060
2.48M
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
13061
2.48M
                                             oldctxt);
13062
2.48M
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13063
2.44M
    xmlDetectSAX2(ctxt);
13064
13065
2.44M
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13066
2.44M
    if (newDoc == NULL) {
13067
0
  xmlFreeParserCtxt(ctxt);
13068
0
  return(XML_ERR_INTERNAL_ERROR);
13069
0
    }
13070
2.44M
    newDoc->properties = XML_DOC_INTERNAL;
13071
2.44M
    if (doc) {
13072
2.44M
        newDoc->intSubset = doc->intSubset;
13073
2.44M
        newDoc->extSubset = doc->extSubset;
13074
2.44M
        if (doc->dict) {
13075
1.07M
            newDoc->dict = doc->dict;
13076
1.07M
            xmlDictReference(newDoc->dict);
13077
1.07M
        }
13078
2.44M
        if (doc->URL != NULL) {
13079
1.59M
            newDoc->URL = xmlStrdup(doc->URL);
13080
1.59M
        }
13081
2.44M
    }
13082
2.44M
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13083
2.44M
    if (newRoot == NULL) {
13084
0
  if (sax != NULL)
13085
0
  xmlFreeParserCtxt(ctxt);
13086
0
  newDoc->intSubset = NULL;
13087
0
  newDoc->extSubset = NULL;
13088
0
        xmlFreeDoc(newDoc);
13089
0
  return(XML_ERR_INTERNAL_ERROR);
13090
0
    }
13091
2.44M
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13092
2.44M
    nodePush(ctxt, newDoc->children);
13093
2.44M
    if (doc == NULL) {
13094
0
        ctxt->myDoc = newDoc;
13095
2.44M
    } else {
13096
2.44M
        ctxt->myDoc = doc;
13097
2.44M
        newRoot->doc = doc;
13098
2.44M
    }
13099
13100
    /*
13101
     * Get the 4 first bytes and decode the charset
13102
     * if enc != XML_CHAR_ENCODING_NONE
13103
     * plug some encoding conversion routines.
13104
     */
13105
2.44M
    GROW;
13106
2.44M
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13107
2.44M
  start[0] = RAW;
13108
2.44M
  start[1] = NXT(1);
13109
2.44M
  start[2] = NXT(2);
13110
2.44M
  start[3] = NXT(3);
13111
2.44M
  enc = xmlDetectCharEncoding(start, 4);
13112
2.44M
  if (enc != XML_CHAR_ENCODING_NONE) {
13113
29.3k
      xmlSwitchEncoding(ctxt, enc);
13114
29.3k
  }
13115
2.44M
    }
13116
13117
    /*
13118
     * Parse a possible text declaration first
13119
     */
13120
2.44M
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13121
28.4k
  xmlParseTextDecl(ctxt);
13122
        /*
13123
         * An XML-1.0 document can't reference an entity not XML-1.0
13124
         */
13125
28.4k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13126
28.4k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13127
201
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13128
201
                           "Version mismatch between document and entity\n");
13129
201
        }
13130
28.4k
    }
13131
13132
2.44M
    ctxt->instate = XML_PARSER_CONTENT;
13133
2.44M
    ctxt->depth = depth;
13134
2.44M
    if (oldctxt != NULL) {
13135
2.44M
  ctxt->_private = oldctxt->_private;
13136
2.44M
  ctxt->loadsubset = oldctxt->loadsubset;
13137
2.44M
  ctxt->validate = oldctxt->validate;
13138
2.44M
  ctxt->valid = oldctxt->valid;
13139
2.44M
  ctxt->replaceEntities = oldctxt->replaceEntities;
13140
2.44M
        if (oldctxt->validate) {
13141
2.33M
            ctxt->vctxt.error = oldctxt->vctxt.error;
13142
2.33M
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
13143
2.33M
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
13144
2.33M
        }
13145
2.44M
  ctxt->external = oldctxt->external;
13146
2.44M
        if (ctxt->dict) xmlDictFree(ctxt->dict);
13147
2.44M
        ctxt->dict = oldctxt->dict;
13148
2.44M
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13149
2.44M
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13150
2.44M
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13151
2.44M
        ctxt->dictNames = oldctxt->dictNames;
13152
2.44M
        ctxt->attsDefault = oldctxt->attsDefault;
13153
2.44M
        ctxt->attsSpecial = oldctxt->attsSpecial;
13154
2.44M
        ctxt->linenumbers = oldctxt->linenumbers;
13155
2.44M
  ctxt->record_info = oldctxt->record_info;
13156
2.44M
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13157
2.44M
  ctxt->node_seq.length = oldctxt->node_seq.length;
13158
2.44M
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13159
2.44M
    } else {
13160
  /*
13161
   * Doing validity checking on chunk without context
13162
   * doesn't make sense
13163
   */
13164
0
  ctxt->_private = NULL;
13165
0
  ctxt->validate = 0;
13166
0
  ctxt->external = 2;
13167
0
  ctxt->loadsubset = 0;
13168
0
    }
13169
13170
2.44M
    xmlParseContent(ctxt);
13171
13172
2.44M
    if ((RAW == '<') && (NXT(1) == '/')) {
13173
125k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13174
2.32M
    } else if (RAW != 0) {
13175
2.86k
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13176
2.86k
    }
13177
2.44M
    if (ctxt->node != newDoc->children) {
13178
1.88M
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13179
1.88M
    }
13180
13181
2.44M
    if (!ctxt->wellFormed) {
13182
2.43M
        if (ctxt->errNo == 0)
13183
0
      ret = XML_ERR_INTERNAL_ERROR;
13184
2.43M
  else
13185
2.43M
      ret = (xmlParserErrors)ctxt->errNo;
13186
2.43M
    } else {
13187
7.48k
  if (list != NULL) {
13188
5.09k
      xmlNodePtr cur;
13189
13190
      /*
13191
       * Return the newly created nodeset after unlinking it from
13192
       * they pseudo parent.
13193
       */
13194
5.09k
      cur = newDoc->children->children;
13195
5.09k
      *list = cur;
13196
7.54k
      while (cur != NULL) {
13197
2.45k
    cur->parent = NULL;
13198
2.45k
    cur = cur->next;
13199
2.45k
      }
13200
5.09k
            newDoc->children->children = NULL;
13201
5.09k
  }
13202
7.48k
  ret = XML_ERR_OK;
13203
7.48k
    }
13204
13205
    /*
13206
     * Record in the parent context the number of entities replacement
13207
     * done when parsing that reference.
13208
     */
13209
2.44M
    if (oldctxt != NULL)
13210
2.44M
        oldctxt->nbentities += ctxt->nbentities;
13211
13212
    /*
13213
     * Also record the size of the entity parsed
13214
     */
13215
2.44M
    if (ctxt->input != NULL && oldctxt != NULL) {
13216
2.44M
  oldctxt->sizeentities += ctxt->input->consumed;
13217
2.44M
  oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13218
2.44M
    }
13219
    /*
13220
     * And record the last error if any
13221
     */
13222
2.44M
    if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13223
2.43M
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13224
13225
2.44M
    if (oldctxt != NULL) {
13226
2.44M
        ctxt->dict = NULL;
13227
2.44M
        ctxt->attsDefault = NULL;
13228
2.44M
        ctxt->attsSpecial = NULL;
13229
2.44M
        oldctxt->validate = ctxt->validate;
13230
2.44M
        oldctxt->valid = ctxt->valid;
13231
2.44M
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13232
2.44M
        oldctxt->node_seq.length = ctxt->node_seq.length;
13233
2.44M
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13234
2.44M
    }
13235
2.44M
    ctxt->node_seq.maximum = 0;
13236
2.44M
    ctxt->node_seq.length = 0;
13237
2.44M
    ctxt->node_seq.buffer = NULL;
13238
2.44M
    xmlFreeParserCtxt(ctxt);
13239
2.44M
    newDoc->intSubset = NULL;
13240
2.44M
    newDoc->extSubset = NULL;
13241
2.44M
    xmlFreeDoc(newDoc);
13242
13243
2.44M
    return(ret);
13244
2.44M
}
13245
13246
#ifdef LIBXML_SAX1_ENABLED
13247
/**
13248
 * xmlParseExternalEntity:
13249
 * @doc:  the document the chunk pertains to
13250
 * @sax:  the SAX handler block (possibly NULL)
13251
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13252
 * @depth:  Used for loop detection, use 0
13253
 * @URL:  the URL for the entity to load
13254
 * @ID:  the System ID for the entity to load
13255
 * @lst:  the return value for the set of parsed nodes
13256
 *
13257
 * Parse an external general entity
13258
 * An external general parsed entity is well-formed if it matches the
13259
 * production labeled extParsedEnt.
13260
 *
13261
 * [78] extParsedEnt ::= TextDecl? content
13262
 *
13263
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13264
 *    the parser error code otherwise
13265
 */
13266
13267
int
13268
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13269
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13270
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13271
0
                           ID, lst));
13272
0
}
13273
13274
/**
13275
 * xmlParseBalancedChunkMemory:
13276
 * @doc:  the document the chunk pertains to (must not be NULL)
13277
 * @sax:  the SAX handler block (possibly NULL)
13278
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13279
 * @depth:  Used for loop detection, use 0
13280
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13281
 * @lst:  the return value for the set of parsed nodes
13282
 *
13283
 * Parse a well-balanced chunk of an XML document
13284
 * called by the parser
13285
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13286
 * the content production in the XML grammar:
13287
 *
13288
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13289
 *
13290
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13291
 *    the parser error code otherwise
13292
 */
13293
13294
int
13295
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13296
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13297
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13298
0
                                                depth, string, lst, 0 );
13299
0
}
13300
#endif /* LIBXML_SAX1_ENABLED */
13301
13302
/**
13303
 * xmlParseBalancedChunkMemoryInternal:
13304
 * @oldctxt:  the existing parsing context
13305
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13306
 * @user_data:  the user data field for the parser context
13307
 * @lst:  the return value for the set of parsed nodes
13308
 *
13309
 *
13310
 * Parse a well-balanced chunk of an XML document
13311
 * called by the parser
13312
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13313
 * the content production in the XML grammar:
13314
 *
13315
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13316
 *
13317
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13318
 * error code otherwise
13319
 *
13320
 * In case recover is set to 1, the nodelist will not be empty even if
13321
 * the parsed chunk is not well balanced.
13322
 */
13323
static xmlParserErrors
13324
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13325
253k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13326
253k
    xmlParserCtxtPtr ctxt;
13327
253k
    xmlDocPtr newDoc = NULL;
13328
253k
    xmlNodePtr newRoot;
13329
253k
    xmlSAXHandlerPtr oldsax = NULL;
13330
253k
    xmlNodePtr content = NULL;
13331
253k
    xmlNodePtr last = NULL;
13332
253k
    int size;
13333
253k
    xmlParserErrors ret = XML_ERR_OK;
13334
253k
#ifdef SAX2
13335
253k
    int i;
13336
253k
#endif
13337
13338
253k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13339
253k
        (oldctxt->depth >  1024)) {
13340
1.33k
  return(XML_ERR_ENTITY_LOOP);
13341
1.33k
    }
13342
13343
13344
252k
    if (lst != NULL)
13345
251k
        *lst = NULL;
13346
252k
    if (string == NULL)
13347
74
        return(XML_ERR_INTERNAL_ERROR);
13348
13349
252k
    size = xmlStrlen(string);
13350
13351
252k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13352
252k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13353
251k
    if (user_data != NULL)
13354
0
  ctxt->userData = user_data;
13355
251k
    else
13356
251k
  ctxt->userData = ctxt;
13357
251k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13358
251k
    ctxt->dict = oldctxt->dict;
13359
251k
    ctxt->input_id = oldctxt->input_id + 1;
13360
251k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13361
251k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13362
251k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13363
13364
251k
#ifdef SAX2
13365
    /* propagate namespaces down the entity */
13366
1.43M
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13367
1.18M
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13368
1.18M
    }
13369
251k
#endif
13370
13371
251k
    oldsax = ctxt->sax;
13372
251k
    ctxt->sax = oldctxt->sax;
13373
251k
    xmlDetectSAX2(ctxt);
13374
251k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13375
251k
    ctxt->options = oldctxt->options;
13376
13377
251k
    ctxt->_private = oldctxt->_private;
13378
251k
    if (oldctxt->myDoc == NULL) {
13379
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13380
0
  if (newDoc == NULL) {
13381
0
      ctxt->sax = oldsax;
13382
0
      ctxt->dict = NULL;
13383
0
      xmlFreeParserCtxt(ctxt);
13384
0
      return(XML_ERR_INTERNAL_ERROR);
13385
0
  }
13386
0
  newDoc->properties = XML_DOC_INTERNAL;
13387
0
  newDoc->dict = ctxt->dict;
13388
0
  xmlDictReference(newDoc->dict);
13389
0
  ctxt->myDoc = newDoc;
13390
251k
    } else {
13391
251k
  ctxt->myDoc = oldctxt->myDoc;
13392
251k
        content = ctxt->myDoc->children;
13393
251k
  last = ctxt->myDoc->last;
13394
251k
    }
13395
251k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13396
251k
    if (newRoot == NULL) {
13397
0
  ctxt->sax = oldsax;
13398
0
  ctxt->dict = NULL;
13399
0
  xmlFreeParserCtxt(ctxt);
13400
0
  if (newDoc != NULL) {
13401
0
      xmlFreeDoc(newDoc);
13402
0
  }
13403
0
  return(XML_ERR_INTERNAL_ERROR);
13404
0
    }
13405
251k
    ctxt->myDoc->children = NULL;
13406
251k
    ctxt->myDoc->last = NULL;
13407
251k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13408
251k
    nodePush(ctxt, ctxt->myDoc->children);
13409
251k
    ctxt->instate = XML_PARSER_CONTENT;
13410
251k
    ctxt->depth = oldctxt->depth + 1;
13411
13412
251k
    ctxt->validate = 0;
13413
251k
    ctxt->loadsubset = oldctxt->loadsubset;
13414
251k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13415
  /*
13416
   * ID/IDREF registration will be done in xmlValidateElement below
13417
   */
13418
101k
  ctxt->loadsubset |= XML_SKIP_IDS;
13419
101k
    }
13420
251k
    ctxt->dictNames = oldctxt->dictNames;
13421
251k
    ctxt->attsDefault = oldctxt->attsDefault;
13422
251k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13423
13424
251k
    xmlParseContent(ctxt);
13425
251k
    if ((RAW == '<') && (NXT(1) == '/')) {
13426
11.1k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13427
240k
    } else if (RAW != 0) {
13428
196
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13429
196
    }
13430
251k
    if (ctxt->node != ctxt->myDoc->children) {
13431
103k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13432
103k
    }
13433
13434
251k
    if (!ctxt->wellFormed) {
13435
233k
        if (ctxt->errNo == 0)
13436
0
      ret = XML_ERR_INTERNAL_ERROR;
13437
233k
  else
13438
233k
      ret = (xmlParserErrors)ctxt->errNo;
13439
233k
    } else {
13440
18.1k
      ret = XML_ERR_OK;
13441
18.1k
    }
13442
13443
251k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13444
18.1k
  xmlNodePtr cur;
13445
13446
  /*
13447
   * Return the newly created nodeset after unlinking it from
13448
   * they pseudo parent.
13449
   */
13450
18.1k
  cur = ctxt->myDoc->children->children;
13451
18.1k
  *lst = cur;
13452
66.8k
  while (cur != NULL) {
13453
48.7k
#ifdef LIBXML_VALID_ENABLED
13454
48.7k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13455
48.7k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13456
48.7k
    (cur->type == XML_ELEMENT_NODE)) {
13457
12.5k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13458
12.5k
      oldctxt->myDoc, cur);
13459
12.5k
      }
13460
48.7k
#endif /* LIBXML_VALID_ENABLED */
13461
48.7k
      cur->parent = NULL;
13462
48.7k
      cur = cur->next;
13463
48.7k
  }
13464
18.1k
  ctxt->myDoc->children->children = NULL;
13465
18.1k
    }
13466
251k
    if (ctxt->myDoc != NULL) {
13467
251k
  xmlFreeNode(ctxt->myDoc->children);
13468
251k
        ctxt->myDoc->children = content;
13469
251k
        ctxt->myDoc->last = last;
13470
251k
    }
13471
13472
    /*
13473
     * Record in the parent context the number of entities replacement
13474
     * done when parsing that reference.
13475
     */
13476
251k
    if (oldctxt != NULL)
13477
251k
        oldctxt->nbentities += ctxt->nbentities;
13478
13479
    /*
13480
     * Also record the last error if any
13481
     */
13482
251k
    if (ctxt->lastError.code != XML_ERR_OK)
13483
233k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13484
13485
251k
    ctxt->sax = oldsax;
13486
251k
    ctxt->dict = NULL;
13487
251k
    ctxt->attsDefault = NULL;
13488
251k
    ctxt->attsSpecial = NULL;
13489
251k
    xmlFreeParserCtxt(ctxt);
13490
251k
    if (newDoc != NULL) {
13491
0
  xmlFreeDoc(newDoc);
13492
0
    }
13493
13494
251k
    return(ret);
13495
251k
}
13496
13497
/**
13498
 * xmlParseInNodeContext:
13499
 * @node:  the context node
13500
 * @data:  the input string
13501
 * @datalen:  the input string length in bytes
13502
 * @options:  a combination of xmlParserOption
13503
 * @lst:  the return value for the set of parsed nodes
13504
 *
13505
 * Parse a well-balanced chunk of an XML document
13506
 * within the context (DTD, namespaces, etc ...) of the given node.
13507
 *
13508
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13509
 * the content production in the XML grammar:
13510
 *
13511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13512
 *
13513
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13514
 * error code otherwise
13515
 */
13516
xmlParserErrors
13517
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13518
0
                      int options, xmlNodePtr *lst) {
13519
0
#ifdef SAX2
13520
0
    xmlParserCtxtPtr ctxt;
13521
0
    xmlDocPtr doc = NULL;
13522
0
    xmlNodePtr fake, cur;
13523
0
    int nsnr = 0;
13524
13525
0
    xmlParserErrors ret = XML_ERR_OK;
13526
13527
    /*
13528
     * check all input parameters, grab the document
13529
     */
13530
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13531
0
        return(XML_ERR_INTERNAL_ERROR);
13532
0
    switch (node->type) {
13533
0
        case XML_ELEMENT_NODE:
13534
0
        case XML_ATTRIBUTE_NODE:
13535
0
        case XML_TEXT_NODE:
13536
0
        case XML_CDATA_SECTION_NODE:
13537
0
        case XML_ENTITY_REF_NODE:
13538
0
        case XML_PI_NODE:
13539
0
        case XML_COMMENT_NODE:
13540
0
        case XML_DOCUMENT_NODE:
13541
0
        case XML_HTML_DOCUMENT_NODE:
13542
0
      break;
13543
0
  default:
13544
0
      return(XML_ERR_INTERNAL_ERROR);
13545
13546
0
    }
13547
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13548
0
           (node->type != XML_DOCUMENT_NODE) &&
13549
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13550
0
  node = node->parent;
13551
0
    if (node == NULL)
13552
0
  return(XML_ERR_INTERNAL_ERROR);
13553
0
    if (node->type == XML_ELEMENT_NODE)
13554
0
  doc = node->doc;
13555
0
    else
13556
0
        doc = (xmlDocPtr) node;
13557
0
    if (doc == NULL)
13558
0
  return(XML_ERR_INTERNAL_ERROR);
13559
13560
    /*
13561
     * allocate a context and set-up everything not related to the
13562
     * node position in the tree
13563
     */
13564
0
    if (doc->type == XML_DOCUMENT_NODE)
13565
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13566
0
#ifdef LIBXML_HTML_ENABLED
13567
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13568
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13569
        /*
13570
         * When parsing in context, it makes no sense to add implied
13571
         * elements like html/body/etc...
13572
         */
13573
0
        options |= HTML_PARSE_NOIMPLIED;
13574
0
    }
13575
0
#endif
13576
0
    else
13577
0
        return(XML_ERR_INTERNAL_ERROR);
13578
13579
0
    if (ctxt == NULL)
13580
0
        return(XML_ERR_NO_MEMORY);
13581
13582
    /*
13583
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13584
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13585
     * we must wait until the last moment to free the original one.
13586
     */
13587
0
    if (doc->dict != NULL) {
13588
0
        if (ctxt->dict != NULL)
13589
0
      xmlDictFree(ctxt->dict);
13590
0
  ctxt->dict = doc->dict;
13591
0
    } else
13592
0
        options |= XML_PARSE_NODICT;
13593
13594
0
    if (doc->encoding != NULL) {
13595
0
        xmlCharEncodingHandlerPtr hdlr;
13596
13597
0
        if (ctxt->encoding != NULL)
13598
0
      xmlFree((xmlChar *) ctxt->encoding);
13599
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13600
13601
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13602
0
        if (hdlr != NULL) {
13603
0
            xmlSwitchToEncoding(ctxt, hdlr);
13604
0
  } else {
13605
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13606
0
        }
13607
0
    }
13608
13609
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13610
0
    xmlDetectSAX2(ctxt);
13611
0
    ctxt->myDoc = doc;
13612
    /* parsing in context, i.e. as within existing content */
13613
0
    ctxt->input_id = 2;
13614
0
    ctxt->instate = XML_PARSER_CONTENT;
13615
13616
0
    fake = xmlNewDocComment(node->doc, NULL);
13617
0
    if (fake == NULL) {
13618
0
        xmlFreeParserCtxt(ctxt);
13619
0
  return(XML_ERR_NO_MEMORY);
13620
0
    }
13621
0
    xmlAddChild(node, fake);
13622
13623
0
    if (node->type == XML_ELEMENT_NODE) {
13624
0
  nodePush(ctxt, node);
13625
  /*
13626
   * initialize the SAX2 namespaces stack
13627
   */
13628
0
  cur = node;
13629
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13630
0
      xmlNsPtr ns = cur->nsDef;
13631
0
      const xmlChar *iprefix, *ihref;
13632
13633
0
      while (ns != NULL) {
13634
0
    if (ctxt->dict) {
13635
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13636
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13637
0
    } else {
13638
0
        iprefix = ns->prefix;
13639
0
        ihref = ns->href;
13640
0
    }
13641
13642
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13643
0
        nsPush(ctxt, iprefix, ihref);
13644
0
        nsnr++;
13645
0
    }
13646
0
    ns = ns->next;
13647
0
      }
13648
0
      cur = cur->parent;
13649
0
  }
13650
0
    }
13651
13652
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13653
  /*
13654
   * ID/IDREF registration will be done in xmlValidateElement below
13655
   */
13656
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13657
0
    }
13658
13659
0
#ifdef LIBXML_HTML_ENABLED
13660
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13661
0
        __htmlParseContent(ctxt);
13662
0
    else
13663
0
#endif
13664
0
  xmlParseContent(ctxt);
13665
13666
0
    nsPop(ctxt, nsnr);
13667
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13668
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13669
0
    } else if (RAW != 0) {
13670
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13671
0
    }
13672
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13673
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13674
0
  ctxt->wellFormed = 0;
13675
0
    }
13676
13677
0
    if (!ctxt->wellFormed) {
13678
0
        if (ctxt->errNo == 0)
13679
0
      ret = XML_ERR_INTERNAL_ERROR;
13680
0
  else
13681
0
      ret = (xmlParserErrors)ctxt->errNo;
13682
0
    } else {
13683
0
        ret = XML_ERR_OK;
13684
0
    }
13685
13686
    /*
13687
     * Return the newly created nodeset after unlinking it from
13688
     * the pseudo sibling.
13689
     */
13690
13691
0
    cur = fake->next;
13692
0
    fake->next = NULL;
13693
0
    node->last = fake;
13694
13695
0
    if (cur != NULL) {
13696
0
  cur->prev = NULL;
13697
0
    }
13698
13699
0
    *lst = cur;
13700
13701
0
    while (cur != NULL) {
13702
0
  cur->parent = NULL;
13703
0
  cur = cur->next;
13704
0
    }
13705
13706
0
    xmlUnlinkNode(fake);
13707
0
    xmlFreeNode(fake);
13708
13709
13710
0
    if (ret != XML_ERR_OK) {
13711
0
        xmlFreeNodeList(*lst);
13712
0
  *lst = NULL;
13713
0
    }
13714
13715
0
    if (doc->dict != NULL)
13716
0
        ctxt->dict = NULL;
13717
0
    xmlFreeParserCtxt(ctxt);
13718
13719
0
    return(ret);
13720
#else /* !SAX2 */
13721
    return(XML_ERR_INTERNAL_ERROR);
13722
#endif
13723
0
}
13724
13725
#ifdef LIBXML_SAX1_ENABLED
13726
/**
13727
 * xmlParseBalancedChunkMemoryRecover:
13728
 * @doc:  the document the chunk pertains to (must not be NULL)
13729
 * @sax:  the SAX handler block (possibly NULL)
13730
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13731
 * @depth:  Used for loop detection, use 0
13732
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13733
 * @lst:  the return value for the set of parsed nodes
13734
 * @recover: return nodes even if the data is broken (use 0)
13735
 *
13736
 *
13737
 * Parse a well-balanced chunk of an XML document
13738
 * called by the parser
13739
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13740
 * the content production in the XML grammar:
13741
 *
13742
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13743
 *
13744
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13745
 *    the parser error code otherwise
13746
 *
13747
 * In case recover is set to 1, the nodelist will not be empty even if
13748
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13749
 * some extent.
13750
 */
13751
int
13752
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13753
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13754
0
     int recover) {
13755
0
    xmlParserCtxtPtr ctxt;
13756
0
    xmlDocPtr newDoc;
13757
0
    xmlSAXHandlerPtr oldsax = NULL;
13758
0
    xmlNodePtr content, newRoot;
13759
0
    int size;
13760
0
    int ret = 0;
13761
13762
0
    if (depth > 40) {
13763
0
  return(XML_ERR_ENTITY_LOOP);
13764
0
    }
13765
13766
13767
0
    if (lst != NULL)
13768
0
        *lst = NULL;
13769
0
    if (string == NULL)
13770
0
        return(-1);
13771
13772
0
    size = xmlStrlen(string);
13773
13774
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13775
0
    if (ctxt == NULL) return(-1);
13776
0
    ctxt->userData = ctxt;
13777
0
    if (sax != NULL) {
13778
0
  oldsax = ctxt->sax;
13779
0
        ctxt->sax = sax;
13780
0
  if (user_data != NULL)
13781
0
      ctxt->userData = user_data;
13782
0
    }
13783
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13784
0
    if (newDoc == NULL) {
13785
0
  xmlFreeParserCtxt(ctxt);
13786
0
  return(-1);
13787
0
    }
13788
0
    newDoc->properties = XML_DOC_INTERNAL;
13789
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13790
0
        xmlDictFree(ctxt->dict);
13791
0
  ctxt->dict = doc->dict;
13792
0
  xmlDictReference(ctxt->dict);
13793
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13794
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13795
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13796
0
  ctxt->dictNames = 1;
13797
0
    } else {
13798
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13799
0
    }
13800
    /* doc == NULL is only supported for historic reasons */
13801
0
    if (doc != NULL) {
13802
0
  newDoc->intSubset = doc->intSubset;
13803
0
  newDoc->extSubset = doc->extSubset;
13804
0
    }
13805
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13806
0
    if (newRoot == NULL) {
13807
0
  if (sax != NULL)
13808
0
      ctxt->sax = oldsax;
13809
0
  xmlFreeParserCtxt(ctxt);
13810
0
  newDoc->intSubset = NULL;
13811
0
  newDoc->extSubset = NULL;
13812
0
        xmlFreeDoc(newDoc);
13813
0
  return(-1);
13814
0
    }
13815
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13816
0
    nodePush(ctxt, newRoot);
13817
    /* doc == NULL is only supported for historic reasons */
13818
0
    if (doc == NULL) {
13819
0
  ctxt->myDoc = newDoc;
13820
0
    } else {
13821
0
  ctxt->myDoc = newDoc;
13822
0
  newDoc->children->doc = doc;
13823
  /* Ensure that doc has XML spec namespace */
13824
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13825
0
  newDoc->oldNs = doc->oldNs;
13826
0
    }
13827
0
    ctxt->instate = XML_PARSER_CONTENT;
13828
0
    ctxt->input_id = 2;
13829
0
    ctxt->depth = depth;
13830
13831
    /*
13832
     * Doing validity checking on chunk doesn't make sense
13833
     */
13834
0
    ctxt->validate = 0;
13835
0
    ctxt->loadsubset = 0;
13836
0
    xmlDetectSAX2(ctxt);
13837
13838
0
    if ( doc != NULL ){
13839
0
        content = doc->children;
13840
0
        doc->children = NULL;
13841
0
        xmlParseContent(ctxt);
13842
0
        doc->children = content;
13843
0
    }
13844
0
    else {
13845
0
        xmlParseContent(ctxt);
13846
0
    }
13847
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13848
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13849
0
    } else if (RAW != 0) {
13850
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13851
0
    }
13852
0
    if (ctxt->node != newDoc->children) {
13853
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13854
0
    }
13855
13856
0
    if (!ctxt->wellFormed) {
13857
0
        if (ctxt->errNo == 0)
13858
0
      ret = 1;
13859
0
  else
13860
0
      ret = ctxt->errNo;
13861
0
    } else {
13862
0
      ret = 0;
13863
0
    }
13864
13865
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13866
0
  xmlNodePtr cur;
13867
13868
  /*
13869
   * Return the newly created nodeset after unlinking it from
13870
   * they pseudo parent.
13871
   */
13872
0
  cur = newDoc->children->children;
13873
0
  *lst = cur;
13874
0
  while (cur != NULL) {
13875
0
      xmlSetTreeDoc(cur, doc);
13876
0
      cur->parent = NULL;
13877
0
      cur = cur->next;
13878
0
  }
13879
0
  newDoc->children->children = NULL;
13880
0
    }
13881
13882
0
    if (sax != NULL)
13883
0
  ctxt->sax = oldsax;
13884
0
    xmlFreeParserCtxt(ctxt);
13885
0
    newDoc->intSubset = NULL;
13886
0
    newDoc->extSubset = NULL;
13887
    /* This leaks the namespace list if doc == NULL */
13888
0
    newDoc->oldNs = NULL;
13889
0
    xmlFreeDoc(newDoc);
13890
13891
0
    return(ret);
13892
0
}
13893
13894
/**
13895
 * xmlSAXParseEntity:
13896
 * @sax:  the SAX handler block
13897
 * @filename:  the filename
13898
 *
13899
 * DEPRECATED: Don't use.
13900
 *
13901
 * parse an XML external entity out of context and build a tree.
13902
 * It use the given SAX function block to handle the parsing callback.
13903
 * If sax is NULL, fallback to the default DOM tree building routines.
13904
 *
13905
 * [78] extParsedEnt ::= TextDecl? content
13906
 *
13907
 * This correspond to a "Well Balanced" chunk
13908
 *
13909
 * Returns the resulting document tree
13910
 */
13911
13912
xmlDocPtr
13913
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13914
0
    xmlDocPtr ret;
13915
0
    xmlParserCtxtPtr ctxt;
13916
13917
0
    ctxt = xmlCreateFileParserCtxt(filename);
13918
0
    if (ctxt == NULL) {
13919
0
  return(NULL);
13920
0
    }
13921
0
    if (sax != NULL) {
13922
0
  if (ctxt->sax != NULL)
13923
0
      xmlFree(ctxt->sax);
13924
0
        ctxt->sax = sax;
13925
0
        ctxt->userData = NULL;
13926
0
    }
13927
13928
0
    xmlParseExtParsedEnt(ctxt);
13929
13930
0
    if (ctxt->wellFormed)
13931
0
  ret = ctxt->myDoc;
13932
0
    else {
13933
0
        ret = NULL;
13934
0
        xmlFreeDoc(ctxt->myDoc);
13935
0
        ctxt->myDoc = NULL;
13936
0
    }
13937
0
    if (sax != NULL)
13938
0
        ctxt->sax = NULL;
13939
0
    xmlFreeParserCtxt(ctxt);
13940
13941
0
    return(ret);
13942
0
}
13943
13944
/**
13945
 * xmlParseEntity:
13946
 * @filename:  the filename
13947
 *
13948
 * parse an XML external entity out of context and build a tree.
13949
 *
13950
 * [78] extParsedEnt ::= TextDecl? content
13951
 *
13952
 * This correspond to a "Well Balanced" chunk
13953
 *
13954
 * Returns the resulting document tree
13955
 */
13956
13957
xmlDocPtr
13958
0
xmlParseEntity(const char *filename) {
13959
0
    return(xmlSAXParseEntity(NULL, filename));
13960
0
}
13961
#endif /* LIBXML_SAX1_ENABLED */
13962
13963
/**
13964
 * xmlCreateEntityParserCtxtInternal:
13965
 * @URL:  the entity URL
13966
 * @ID:  the entity PUBLIC ID
13967
 * @base:  a possible base for the target URI
13968
 * @pctx:  parser context used to set options on new context
13969
 *
13970
 * Create a parser context for an external entity
13971
 * Automatic support for ZLIB/Compress compressed document is provided
13972
 * by default if found at compile-time.
13973
 *
13974
 * Returns the new parser context or NULL
13975
 */
13976
static xmlParserCtxtPtr
13977
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13978
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13979
2.48M
        xmlParserCtxtPtr pctx) {
13980
2.48M
    xmlParserCtxtPtr ctxt;
13981
2.48M
    xmlParserInputPtr inputStream;
13982
2.48M
    char *directory = NULL;
13983
2.48M
    xmlChar *uri;
13984
13985
2.48M
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13986
2.48M
    if (ctxt == NULL) {
13987
0
  return(NULL);
13988
0
    }
13989
13990
2.48M
    if (pctx != NULL) {
13991
2.48M
        ctxt->options = pctx->options;
13992
2.48M
        ctxt->_private = pctx->_private;
13993
  /*
13994
   * this is a subparser of pctx, so the input_id should be
13995
   * incremented to distinguish from main entity
13996
   */
13997
2.48M
  ctxt->input_id = pctx->input_id + 1;
13998
2.48M
    }
13999
14000
    /* Don't read from stdin. */
14001
2.48M
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
14002
0
        URL = BAD_CAST "./-";
14003
14004
2.48M
    uri = xmlBuildURI(URL, base);
14005
14006
2.48M
    if (uri == NULL) {
14007
5.64k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14008
5.64k
  if (inputStream == NULL) {
14009
5.63k
      xmlFreeParserCtxt(ctxt);
14010
5.63k
      return(NULL);
14011
5.63k
  }
14012
14013
15
  inputPush(ctxt, inputStream);
14014
14015
15
  if ((ctxt->directory == NULL) && (directory == NULL))
14016
15
      directory = xmlParserGetDirectory((char *)URL);
14017
15
  if ((ctxt->directory == NULL) && (directory != NULL))
14018
15
      ctxt->directory = directory;
14019
2.48M
    } else {
14020
2.48M
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14021
2.48M
  if (inputStream == NULL) {
14022
38.1k
      xmlFree(uri);
14023
38.1k
      xmlFreeParserCtxt(ctxt);
14024
38.1k
      return(NULL);
14025
38.1k
  }
14026
14027
2.44M
  inputPush(ctxt, inputStream);
14028
14029
2.44M
  if ((ctxt->directory == NULL) && (directory == NULL))
14030
2.44M
      directory = xmlParserGetDirectory((char *)uri);
14031
2.44M
  if ((ctxt->directory == NULL) && (directory != NULL))
14032
2.44M
      ctxt->directory = directory;
14033
2.44M
  xmlFree(uri);
14034
2.44M
    }
14035
2.44M
    return(ctxt);
14036
2.48M
}
14037
14038
/**
14039
 * xmlCreateEntityParserCtxt:
14040
 * @URL:  the entity URL
14041
 * @ID:  the entity PUBLIC ID
14042
 * @base:  a possible base for the target URI
14043
 *
14044
 * Create a parser context for an external entity
14045
 * Automatic support for ZLIB/Compress compressed document is provided
14046
 * by default if found at compile-time.
14047
 *
14048
 * Returns the new parser context or NULL
14049
 */
14050
xmlParserCtxtPtr
14051
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14052
0
                    const xmlChar *base) {
14053
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
14054
14055
0
}
14056
14057
/************************************************************************
14058
 *                  *
14059
 *    Front ends when parsing from a file     *
14060
 *                  *
14061
 ************************************************************************/
14062
14063
/**
14064
 * xmlCreateURLParserCtxt:
14065
 * @filename:  the filename or URL
14066
 * @options:  a combination of xmlParserOption
14067
 *
14068
 * Create a parser context for a file or URL content.
14069
 * Automatic support for ZLIB/Compress compressed document is provided
14070
 * by default if found at compile-time and for file accesses
14071
 *
14072
 * Returns the new parser context or NULL
14073
 */
14074
xmlParserCtxtPtr
14075
xmlCreateURLParserCtxt(const char *filename, int options)
14076
0
{
14077
0
    xmlParserCtxtPtr ctxt;
14078
0
    xmlParserInputPtr inputStream;
14079
0
    char *directory = NULL;
14080
14081
0
    ctxt = xmlNewParserCtxt();
14082
0
    if (ctxt == NULL) {
14083
0
  xmlErrMemory(NULL, "cannot allocate parser context");
14084
0
  return(NULL);
14085
0
    }
14086
14087
0
    if (options)
14088
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14089
0
    ctxt->linenumbers = 1;
14090
14091
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14092
0
    if (inputStream == NULL) {
14093
0
  xmlFreeParserCtxt(ctxt);
14094
0
  return(NULL);
14095
0
    }
14096
14097
0
    inputPush(ctxt, inputStream);
14098
0
    if ((ctxt->directory == NULL) && (directory == NULL))
14099
0
        directory = xmlParserGetDirectory(filename);
14100
0
    if ((ctxt->directory == NULL) && (directory != NULL))
14101
0
        ctxt->directory = directory;
14102
14103
0
    return(ctxt);
14104
0
}
14105
14106
/**
14107
 * xmlCreateFileParserCtxt:
14108
 * @filename:  the filename
14109
 *
14110
 * Create a parser context for a file content.
14111
 * Automatic support for ZLIB/Compress compressed document is provided
14112
 * by default if found at compile-time.
14113
 *
14114
 * Returns the new parser context or NULL
14115
 */
14116
xmlParserCtxtPtr
14117
xmlCreateFileParserCtxt(const char *filename)
14118
0
{
14119
0
    return(xmlCreateURLParserCtxt(filename, 0));
14120
0
}
14121
14122
#ifdef LIBXML_SAX1_ENABLED
14123
/**
14124
 * xmlSAXParseFileWithData:
14125
 * @sax:  the SAX handler block
14126
 * @filename:  the filename
14127
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14128
 *             documents
14129
 * @data:  the userdata
14130
 *
14131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14132
 *
14133
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14134
 * compressed document is provided by default if found at compile-time.
14135
 * It use the given SAX function block to handle the parsing callback.
14136
 * If sax is NULL, fallback to the default DOM tree building routines.
14137
 *
14138
 * User data (void *) is stored within the parser context in the
14139
 * context's _private member, so it is available nearly everywhere in libxml
14140
 *
14141
 * Returns the resulting document tree
14142
 */
14143
14144
xmlDocPtr
14145
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14146
0
                        int recovery, void *data) {
14147
0
    xmlDocPtr ret;
14148
0
    xmlParserCtxtPtr ctxt;
14149
14150
0
    xmlInitParser();
14151
14152
0
    ctxt = xmlCreateFileParserCtxt(filename);
14153
0
    if (ctxt == NULL) {
14154
0
  return(NULL);
14155
0
    }
14156
0
    if (sax != NULL) {
14157
0
  if (ctxt->sax != NULL)
14158
0
      xmlFree(ctxt->sax);
14159
0
        ctxt->sax = sax;
14160
0
    }
14161
0
    xmlDetectSAX2(ctxt);
14162
0
    if (data!=NULL) {
14163
0
  ctxt->_private = data;
14164
0
    }
14165
14166
0
    if (ctxt->directory == NULL)
14167
0
        ctxt->directory = xmlParserGetDirectory(filename);
14168
14169
0
    ctxt->recovery = recovery;
14170
14171
0
    xmlParseDocument(ctxt);
14172
14173
0
    if ((ctxt->wellFormed) || recovery) {
14174
0
        ret = ctxt->myDoc;
14175
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14176
0
      if (ctxt->input->buf->compressed > 0)
14177
0
    ret->compression = 9;
14178
0
      else
14179
0
    ret->compression = ctxt->input->buf->compressed;
14180
0
  }
14181
0
    }
14182
0
    else {
14183
0
       ret = NULL;
14184
0
       xmlFreeDoc(ctxt->myDoc);
14185
0
       ctxt->myDoc = NULL;
14186
0
    }
14187
0
    if (sax != NULL)
14188
0
        ctxt->sax = NULL;
14189
0
    xmlFreeParserCtxt(ctxt);
14190
14191
0
    return(ret);
14192
0
}
14193
14194
/**
14195
 * xmlSAXParseFile:
14196
 * @sax:  the SAX handler block
14197
 * @filename:  the filename
14198
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14199
 *             documents
14200
 *
14201
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14202
 *
14203
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14204
 * compressed document is provided by default if found at compile-time.
14205
 * It use the given SAX function block to handle the parsing callback.
14206
 * If sax is NULL, fallback to the default DOM tree building routines.
14207
 *
14208
 * Returns the resulting document tree
14209
 */
14210
14211
xmlDocPtr
14212
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14213
0
                          int recovery) {
14214
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14215
0
}
14216
14217
/**
14218
 * xmlRecoverDoc:
14219
 * @cur:  a pointer to an array of xmlChar
14220
 *
14221
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14222
 *
14223
 * parse an XML in-memory document and build a tree.
14224
 * In the case the document is not Well Formed, a attempt to build a
14225
 * tree is tried anyway
14226
 *
14227
 * Returns the resulting document tree or NULL in case of failure
14228
 */
14229
14230
xmlDocPtr
14231
0
xmlRecoverDoc(const xmlChar *cur) {
14232
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14233
0
}
14234
14235
/**
14236
 * xmlParseFile:
14237
 * @filename:  the filename
14238
 *
14239
 * DEPRECATED: Use xmlReadFile.
14240
 *
14241
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14242
 * compressed document is provided by default if found at compile-time.
14243
 *
14244
 * Returns the resulting document tree if the file was wellformed,
14245
 * NULL otherwise.
14246
 */
14247
14248
xmlDocPtr
14249
0
xmlParseFile(const char *filename) {
14250
0
    return(xmlSAXParseFile(NULL, filename, 0));
14251
0
}
14252
14253
/**
14254
 * xmlRecoverFile:
14255
 * @filename:  the filename
14256
 *
14257
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14258
 *
14259
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14260
 * compressed document is provided by default if found at compile-time.
14261
 * In the case the document is not Well Formed, it attempts to build
14262
 * a tree anyway
14263
 *
14264
 * Returns the resulting document tree or NULL in case of failure
14265
 */
14266
14267
xmlDocPtr
14268
0
xmlRecoverFile(const char *filename) {
14269
0
    return(xmlSAXParseFile(NULL, filename, 1));
14270
0
}
14271
14272
14273
/**
14274
 * xmlSetupParserForBuffer:
14275
 * @ctxt:  an XML parser context
14276
 * @buffer:  a xmlChar * buffer
14277
 * @filename:  a file name
14278
 *
14279
 * DEPRECATED: Don't use.
14280
 *
14281
 * Setup the parser context to parse a new buffer; Clears any prior
14282
 * contents from the parser context. The buffer parameter must not be
14283
 * NULL, but the filename parameter can be
14284
 */
14285
void
14286
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14287
                             const char* filename)
14288
0
{
14289
0
    xmlParserInputPtr input;
14290
14291
0
    if ((ctxt == NULL) || (buffer == NULL))
14292
0
        return;
14293
14294
0
    input = xmlNewInputStream(ctxt);
14295
0
    if (input == NULL) {
14296
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14297
0
        xmlClearParserCtxt(ctxt);
14298
0
        return;
14299
0
    }
14300
14301
0
    xmlClearParserCtxt(ctxt);
14302
0
    if (filename != NULL)
14303
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14304
0
    input->base = buffer;
14305
0
    input->cur = buffer;
14306
0
    input->end = &buffer[xmlStrlen(buffer)];
14307
0
    inputPush(ctxt, input);
14308
0
}
14309
14310
/**
14311
 * xmlSAXUserParseFile:
14312
 * @sax:  a SAX handler
14313
 * @user_data:  The user data returned on SAX callbacks
14314
 * @filename:  a file name
14315
 *
14316
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14317
 *
14318
 * parse an XML file and call the given SAX handler routines.
14319
 * Automatic support for ZLIB/Compress compressed document is provided
14320
 *
14321
 * Returns 0 in case of success or a error number otherwise
14322
 */
14323
int
14324
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14325
0
                    const char *filename) {
14326
0
    int ret = 0;
14327
0
    xmlParserCtxtPtr ctxt;
14328
14329
0
    ctxt = xmlCreateFileParserCtxt(filename);
14330
0
    if (ctxt == NULL) return -1;
14331
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14332
0
  xmlFree(ctxt->sax);
14333
0
    ctxt->sax = sax;
14334
0
    xmlDetectSAX2(ctxt);
14335
14336
0
    if (user_data != NULL)
14337
0
  ctxt->userData = user_data;
14338
14339
0
    xmlParseDocument(ctxt);
14340
14341
0
    if (ctxt->wellFormed)
14342
0
  ret = 0;
14343
0
    else {
14344
0
        if (ctxt->errNo != 0)
14345
0
      ret = ctxt->errNo;
14346
0
  else
14347
0
      ret = -1;
14348
0
    }
14349
0
    if (sax != NULL)
14350
0
  ctxt->sax = NULL;
14351
0
    if (ctxt->myDoc != NULL) {
14352
0
        xmlFreeDoc(ctxt->myDoc);
14353
0
  ctxt->myDoc = NULL;
14354
0
    }
14355
0
    xmlFreeParserCtxt(ctxt);
14356
14357
0
    return ret;
14358
0
}
14359
#endif /* LIBXML_SAX1_ENABLED */
14360
14361
/************************************************************************
14362
 *                  *
14363
 *    Front ends when parsing from memory     *
14364
 *                  *
14365
 ************************************************************************/
14366
14367
/**
14368
 * xmlCreateMemoryParserCtxt:
14369
 * @buffer:  a pointer to a char array
14370
 * @size:  the size of the array
14371
 *
14372
 * Create a parser context for an XML in-memory document.
14373
 *
14374
 * Returns the new parser context or NULL
14375
 */
14376
xmlParserCtxtPtr
14377
411k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14378
411k
    xmlParserCtxtPtr ctxt;
14379
411k
    xmlParserInputPtr input;
14380
411k
    xmlParserInputBufferPtr buf;
14381
14382
411k
    if (buffer == NULL)
14383
0
  return(NULL);
14384
411k
    if (size <= 0)
14385
1.37k
  return(NULL);
14386
14387
410k
    ctxt = xmlNewParserCtxt();
14388
410k
    if (ctxt == NULL)
14389
0
  return(NULL);
14390
14391
    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14392
410k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14393
410k
    if (buf == NULL) {
14394
0
  xmlFreeParserCtxt(ctxt);
14395
0
  return(NULL);
14396
0
    }
14397
14398
410k
    input = xmlNewInputStream(ctxt);
14399
410k
    if (input == NULL) {
14400
0
  xmlFreeParserInputBuffer(buf);
14401
0
  xmlFreeParserCtxt(ctxt);
14402
0
  return(NULL);
14403
0
    }
14404
14405
410k
    input->filename = NULL;
14406
410k
    input->buf = buf;
14407
410k
    xmlBufResetInput(input->buf->buffer, input);
14408
14409
410k
    inputPush(ctxt, input);
14410
410k
    return(ctxt);
14411
410k
}
14412
14413
#ifdef LIBXML_SAX1_ENABLED
14414
/**
14415
 * xmlSAXParseMemoryWithData:
14416
 * @sax:  the SAX handler block
14417
 * @buffer:  an pointer to a char array
14418
 * @size:  the size of the array
14419
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14420
 *             documents
14421
 * @data:  the userdata
14422
 *
14423
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14424
 *
14425
 * parse an XML in-memory block and use the given SAX function block
14426
 * to handle the parsing callback. If sax is NULL, fallback to the default
14427
 * DOM tree building routines.
14428
 *
14429
 * User data (void *) is stored within the parser context in the
14430
 * context's _private member, so it is available nearly everywhere in libxml
14431
 *
14432
 * Returns the resulting document tree
14433
 */
14434
14435
xmlDocPtr
14436
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14437
0
            int size, int recovery, void *data) {
14438
0
    xmlDocPtr ret;
14439
0
    xmlParserCtxtPtr ctxt;
14440
14441
0
    xmlInitParser();
14442
14443
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14444
0
    if (ctxt == NULL) return(NULL);
14445
0
    if (sax != NULL) {
14446
0
  if (ctxt->sax != NULL)
14447
0
      xmlFree(ctxt->sax);
14448
0
        ctxt->sax = sax;
14449
0
    }
14450
0
    xmlDetectSAX2(ctxt);
14451
0
    if (data!=NULL) {
14452
0
  ctxt->_private=data;
14453
0
    }
14454
14455
0
    ctxt->recovery = recovery;
14456
14457
0
    xmlParseDocument(ctxt);
14458
14459
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14460
0
    else {
14461
0
       ret = NULL;
14462
0
       xmlFreeDoc(ctxt->myDoc);
14463
0
       ctxt->myDoc = NULL;
14464
0
    }
14465
0
    if (sax != NULL)
14466
0
  ctxt->sax = NULL;
14467
0
    xmlFreeParserCtxt(ctxt);
14468
14469
0
    return(ret);
14470
0
}
14471
14472
/**
14473
 * xmlSAXParseMemory:
14474
 * @sax:  the SAX handler block
14475
 * @buffer:  an pointer to a char array
14476
 * @size:  the size of the array
14477
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14478
 *             documents
14479
 *
14480
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14481
 *
14482
 * parse an XML in-memory block and use the given SAX function block
14483
 * to handle the parsing callback. If sax is NULL, fallback to the default
14484
 * DOM tree building routines.
14485
 *
14486
 * Returns the resulting document tree
14487
 */
14488
xmlDocPtr
14489
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14490
0
            int size, int recovery) {
14491
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14492
0
}
14493
14494
/**
14495
 * xmlParseMemory:
14496
 * @buffer:  an pointer to a char array
14497
 * @size:  the size of the array
14498
 *
14499
 * DEPRECATED: Use xmlReadMemory.
14500
 *
14501
 * parse an XML in-memory block and build a tree.
14502
 *
14503
 * Returns the resulting document tree
14504
 */
14505
14506
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14507
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14508
0
}
14509
14510
/**
14511
 * xmlRecoverMemory:
14512
 * @buffer:  an pointer to a char array
14513
 * @size:  the size of the array
14514
 *
14515
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14516
 *
14517
 * parse an XML in-memory block and build a tree.
14518
 * In the case the document is not Well Formed, an attempt to
14519
 * build a tree is tried anyway
14520
 *
14521
 * Returns the resulting document tree or NULL in case of error
14522
 */
14523
14524
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14525
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14526
0
}
14527
14528
/**
14529
 * xmlSAXUserParseMemory:
14530
 * @sax:  a SAX handler
14531
 * @user_data:  The user data returned on SAX callbacks
14532
 * @buffer:  an in-memory XML document input
14533
 * @size:  the length of the XML document in bytes
14534
 *
14535
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14536
 *
14537
 * parse an XML in-memory buffer and call the given SAX handler routines.
14538
 *
14539
 * Returns 0 in case of success or a error number otherwise
14540
 */
14541
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14542
0
        const char *buffer, int size) {
14543
0
    int ret = 0;
14544
0
    xmlParserCtxtPtr ctxt;
14545
14546
0
    xmlInitParser();
14547
14548
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14549
0
    if (ctxt == NULL) return -1;
14550
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14551
0
        xmlFree(ctxt->sax);
14552
0
    ctxt->sax = sax;
14553
0
    xmlDetectSAX2(ctxt);
14554
14555
0
    if (user_data != NULL)
14556
0
  ctxt->userData = user_data;
14557
14558
0
    xmlParseDocument(ctxt);
14559
14560
0
    if (ctxt->wellFormed)
14561
0
  ret = 0;
14562
0
    else {
14563
0
        if (ctxt->errNo != 0)
14564
0
      ret = ctxt->errNo;
14565
0
  else
14566
0
      ret = -1;
14567
0
    }
14568
0
    if (sax != NULL)
14569
0
        ctxt->sax = NULL;
14570
0
    if (ctxt->myDoc != NULL) {
14571
0
        xmlFreeDoc(ctxt->myDoc);
14572
0
  ctxt->myDoc = NULL;
14573
0
    }
14574
0
    xmlFreeParserCtxt(ctxt);
14575
14576
0
    return ret;
14577
0
}
14578
#endif /* LIBXML_SAX1_ENABLED */
14579
14580
/**
14581
 * xmlCreateDocParserCtxt:
14582
 * @cur:  a pointer to an array of xmlChar
14583
 *
14584
 * Creates a parser context for an XML in-memory document.
14585
 *
14586
 * Returns the new parser context or NULL
14587
 */
14588
xmlParserCtxtPtr
14589
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14590
0
    int len;
14591
14592
0
    if (cur == NULL)
14593
0
  return(NULL);
14594
0
    len = xmlStrlen(cur);
14595
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14596
0
}
14597
14598
#ifdef LIBXML_SAX1_ENABLED
14599
/**
14600
 * xmlSAXParseDoc:
14601
 * @sax:  the SAX handler block
14602
 * @cur:  a pointer to an array of xmlChar
14603
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14604
 *             documents
14605
 *
14606
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14607
 *
14608
 * parse an XML in-memory document and build a tree.
14609
 * It use the given SAX function block to handle the parsing callback.
14610
 * If sax is NULL, fallback to the default DOM tree building routines.
14611
 *
14612
 * Returns the resulting document tree
14613
 */
14614
14615
xmlDocPtr
14616
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14617
0
    xmlDocPtr ret;
14618
0
    xmlParserCtxtPtr ctxt;
14619
0
    xmlSAXHandlerPtr oldsax = NULL;
14620
14621
0
    if (cur == NULL) return(NULL);
14622
14623
14624
0
    ctxt = xmlCreateDocParserCtxt(cur);
14625
0
    if (ctxt == NULL) return(NULL);
14626
0
    if (sax != NULL) {
14627
0
        oldsax = ctxt->sax;
14628
0
        ctxt->sax = sax;
14629
0
        ctxt->userData = NULL;
14630
0
    }
14631
0
    xmlDetectSAX2(ctxt);
14632
14633
0
    xmlParseDocument(ctxt);
14634
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14635
0
    else {
14636
0
       ret = NULL;
14637
0
       xmlFreeDoc(ctxt->myDoc);
14638
0
       ctxt->myDoc = NULL;
14639
0
    }
14640
0
    if (sax != NULL)
14641
0
  ctxt->sax = oldsax;
14642
0
    xmlFreeParserCtxt(ctxt);
14643
14644
0
    return(ret);
14645
0
}
14646
14647
/**
14648
 * xmlParseDoc:
14649
 * @cur:  a pointer to an array of xmlChar
14650
 *
14651
 * DEPRECATED: Use xmlReadDoc.
14652
 *
14653
 * parse an XML in-memory document and build a tree.
14654
 *
14655
 * Returns the resulting document tree
14656
 */
14657
14658
xmlDocPtr
14659
0
xmlParseDoc(const xmlChar *cur) {
14660
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14661
0
}
14662
#endif /* LIBXML_SAX1_ENABLED */
14663
14664
#ifdef LIBXML_LEGACY_ENABLED
14665
/************************************************************************
14666
 *                  *
14667
 *  Specific function to keep track of entities references    *
14668
 *  and used by the XSLT debugger         *
14669
 *                  *
14670
 ************************************************************************/
14671
14672
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14673
14674
/**
14675
 * xmlAddEntityReference:
14676
 * @ent : A valid entity
14677
 * @firstNode : A valid first node for children of entity
14678
 * @lastNode : A valid last node of children entity
14679
 *
14680
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14681
 */
14682
static void
14683
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14684
                      xmlNodePtr lastNode)
14685
{
14686
    if (xmlEntityRefFunc != NULL) {
14687
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14688
    }
14689
}
14690
14691
14692
/**
14693
 * xmlSetEntityReferenceFunc:
14694
 * @func: A valid function
14695
 *
14696
 * Set the function to call call back when a xml reference has been made
14697
 */
14698
void
14699
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14700
{
14701
    xmlEntityRefFunc = func;
14702
}
14703
#endif /* LIBXML_LEGACY_ENABLED */
14704
14705
/************************************************************************
14706
 *                  *
14707
 *        Miscellaneous       *
14708
 *                  *
14709
 ************************************************************************/
14710
14711
static int xmlParserInitialized = 0;
14712
14713
/**
14714
 * xmlInitParser:
14715
 *
14716
 * Initialization function for the XML parser.
14717
 * This is not reentrant. Call once before processing in case of
14718
 * use in multithreaded programs.
14719
 */
14720
14721
void
14722
4.05M
xmlInitParser(void) {
14723
4.05M
    if (xmlParserInitialized != 0)
14724
4.05M
  return;
14725
14726
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14727
    if (xmlFree == free)
14728
        atexit(xmlCleanupParser);
14729
#endif
14730
14731
3.11k
#ifdef LIBXML_THREAD_ENABLED
14732
3.11k
    __xmlGlobalInitMutexLock();
14733
3.11k
    if (xmlParserInitialized == 0) {
14734
3.11k
#endif
14735
3.11k
  xmlInitThreads();
14736
3.11k
  xmlInitGlobals();
14737
3.11k
  xmlInitMemory();
14738
3.11k
        xmlInitializeDict();
14739
3.11k
  xmlInitCharEncodingHandlers();
14740
3.11k
  xmlDefaultSAXHandlerInit();
14741
3.11k
  xmlRegisterDefaultInputCallbacks();
14742
3.11k
#ifdef LIBXML_OUTPUT_ENABLED
14743
3.11k
  xmlRegisterDefaultOutputCallbacks();
14744
3.11k
#endif /* LIBXML_OUTPUT_ENABLED */
14745
3.11k
#ifdef LIBXML_HTML_ENABLED
14746
3.11k
  htmlInitAutoClose();
14747
3.11k
  htmlDefaultSAXHandlerInit();
14748
3.11k
#endif
14749
3.11k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14750
3.11k
  xmlXPathInit();
14751
3.11k
#endif
14752
3.11k
  xmlParserInitialized = 1;
14753
3.11k
#ifdef LIBXML_THREAD_ENABLED
14754
3.11k
    }
14755
3.11k
    __xmlGlobalInitMutexUnlock();
14756
3.11k
#endif
14757
3.11k
}
14758
14759
/**
14760
 * xmlCleanupParser:
14761
 *
14762
 * This function name is somewhat misleading. It does not clean up
14763
 * parser state, it cleans up memory allocated by the library itself.
14764
 * It is a cleanup function for the XML library. It tries to reclaim all
14765
 * related global memory allocated for the library processing.
14766
 * It doesn't deallocate any document related memory. One should
14767
 * call xmlCleanupParser() only when the process has finished using
14768
 * the library and all XML/HTML documents built with it.
14769
 * See also xmlInitParser() which has the opposite function of preparing
14770
 * the library for operations.
14771
 *
14772
 * WARNING: if your application is multithreaded or has plugin support
14773
 *          calling this may crash the application if another thread or
14774
 *          a plugin is still using libxml2. It's sometimes very hard to
14775
 *          guess if libxml2 is in use in the application, some libraries
14776
 *          or plugins may use it without notice. In case of doubt abstain
14777
 *          from calling this function or do it just before calling exit()
14778
 *          to avoid leak reports from valgrind !
14779
 */
14780
14781
void
14782
0
xmlCleanupParser(void) {
14783
0
    if (!xmlParserInitialized)
14784
0
  return;
14785
14786
0
    xmlCleanupCharEncodingHandlers();
14787
0
#ifdef LIBXML_CATALOG_ENABLED
14788
0
    xmlCatalogCleanup();
14789
0
#endif
14790
0
    xmlDictCleanup();
14791
0
    xmlCleanupInputCallbacks();
14792
0
#ifdef LIBXML_OUTPUT_ENABLED
14793
0
    xmlCleanupOutputCallbacks();
14794
0
#endif
14795
0
#ifdef LIBXML_SCHEMAS_ENABLED
14796
0
    xmlSchemaCleanupTypes();
14797
0
    xmlRelaxNGCleanupTypes();
14798
0
#endif
14799
0
    xmlCleanupGlobals();
14800
0
    xmlCleanupThreads(); /* must be last if called not from the main thread */
14801
0
    xmlCleanupMemory();
14802
0
    xmlParserInitialized = 0;
14803
0
}
14804
14805
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14806
    !defined(_WIN32)
14807
static void
14808
ATTRIBUTE_DESTRUCTOR
14809
xmlDestructor(void) {
14810
    /*
14811
     * Calling custom deallocation functions in a destructor can cause
14812
     * problems, for example with Nokogiri.
14813
     */
14814
    if (xmlFree == free)
14815
        xmlCleanupParser();
14816
}
14817
#endif
14818
14819
/************************************************************************
14820
 *                  *
14821
 *  New set (2.6.0) of simpler and more flexible APIs   *
14822
 *                  *
14823
 ************************************************************************/
14824
14825
/**
14826
 * DICT_FREE:
14827
 * @str:  a string
14828
 *
14829
 * Free a string if it is not owned by the "dict" dictionary in the
14830
 * current scope
14831
 */
14832
#define DICT_FREE(str)            \
14833
0
  if ((str) && ((!dict) ||       \
14834
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14835
0
      xmlFree((char *)(str));
14836
14837
/**
14838
 * xmlCtxtReset:
14839
 * @ctxt: an XML parser context
14840
 *
14841
 * Reset a parser context
14842
 */
14843
void
14844
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14845
0
{
14846
0
    xmlParserInputPtr input;
14847
0
    xmlDictPtr dict;
14848
14849
0
    if (ctxt == NULL)
14850
0
        return;
14851
14852
0
    dict = ctxt->dict;
14853
14854
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14855
0
        xmlFreeInputStream(input);
14856
0
    }
14857
0
    ctxt->inputNr = 0;
14858
0
    ctxt->input = NULL;
14859
14860
0
    ctxt->spaceNr = 0;
14861
0
    if (ctxt->spaceTab != NULL) {
14862
0
  ctxt->spaceTab[0] = -1;
14863
0
  ctxt->space = &ctxt->spaceTab[0];
14864
0
    } else {
14865
0
        ctxt->space = NULL;
14866
0
    }
14867
14868
14869
0
    ctxt->nodeNr = 0;
14870
0
    ctxt->node = NULL;
14871
14872
0
    ctxt->nameNr = 0;
14873
0
    ctxt->name = NULL;
14874
14875
0
    ctxt->nsNr = 0;
14876
14877
0
    DICT_FREE(ctxt->version);
14878
0
    ctxt->version = NULL;
14879
0
    DICT_FREE(ctxt->encoding);
14880
0
    ctxt->encoding = NULL;
14881
0
    DICT_FREE(ctxt->directory);
14882
0
    ctxt->directory = NULL;
14883
0
    DICT_FREE(ctxt->extSubURI);
14884
0
    ctxt->extSubURI = NULL;
14885
0
    DICT_FREE(ctxt->extSubSystem);
14886
0
    ctxt->extSubSystem = NULL;
14887
0
    if (ctxt->myDoc != NULL)
14888
0
        xmlFreeDoc(ctxt->myDoc);
14889
0
    ctxt->myDoc = NULL;
14890
14891
0
    ctxt->standalone = -1;
14892
0
    ctxt->hasExternalSubset = 0;
14893
0
    ctxt->hasPErefs = 0;
14894
0
    ctxt->html = 0;
14895
0
    ctxt->external = 0;
14896
0
    ctxt->instate = XML_PARSER_START;
14897
0
    ctxt->token = 0;
14898
14899
0
    ctxt->wellFormed = 1;
14900
0
    ctxt->nsWellFormed = 1;
14901
0
    ctxt->disableSAX = 0;
14902
0
    ctxt->valid = 1;
14903
#if 0
14904
    ctxt->vctxt.userData = ctxt;
14905
    ctxt->vctxt.error = xmlParserValidityError;
14906
    ctxt->vctxt.warning = xmlParserValidityWarning;
14907
#endif
14908
0
    ctxt->record_info = 0;
14909
0
    ctxt->checkIndex = 0;
14910
0
    ctxt->inSubset = 0;
14911
0
    ctxt->errNo = XML_ERR_OK;
14912
0
    ctxt->depth = 0;
14913
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14914
0
    ctxt->catalogs = NULL;
14915
0
    ctxt->nbentities = 0;
14916
0
    ctxt->sizeentities = 0;
14917
0
    ctxt->sizeentcopy = 0;
14918
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14919
14920
0
    if (ctxt->attsDefault != NULL) {
14921
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14922
0
        ctxt->attsDefault = NULL;
14923
0
    }
14924
0
    if (ctxt->attsSpecial != NULL) {
14925
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14926
0
        ctxt->attsSpecial = NULL;
14927
0
    }
14928
14929
0
#ifdef LIBXML_CATALOG_ENABLED
14930
0
    if (ctxt->catalogs != NULL)
14931
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14932
0
#endif
14933
0
    if (ctxt->lastError.code != XML_ERR_OK)
14934
0
        xmlResetError(&ctxt->lastError);
14935
0
}
14936
14937
/**
14938
 * xmlCtxtResetPush:
14939
 * @ctxt: an XML parser context
14940
 * @chunk:  a pointer to an array of chars
14941
 * @size:  number of chars in the array
14942
 * @filename:  an optional file name or URI
14943
 * @encoding:  the document encoding, or NULL
14944
 *
14945
 * Reset a push parser context
14946
 *
14947
 * Returns 0 in case of success and 1 in case of error
14948
 */
14949
int
14950
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14951
                 int size, const char *filename, const char *encoding)
14952
0
{
14953
0
    xmlParserInputPtr inputStream;
14954
0
    xmlParserInputBufferPtr buf;
14955
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14956
14957
0
    if (ctxt == NULL)
14958
0
        return(1);
14959
14960
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14961
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14962
14963
0
    buf = xmlAllocParserInputBuffer(enc);
14964
0
    if (buf == NULL)
14965
0
        return(1);
14966
14967
0
    if (ctxt == NULL) {
14968
0
        xmlFreeParserInputBuffer(buf);
14969
0
        return(1);
14970
0
    }
14971
14972
0
    xmlCtxtReset(ctxt);
14973
14974
0
    if (filename == NULL) {
14975
0
        ctxt->directory = NULL;
14976
0
    } else {
14977
0
        ctxt->directory = xmlParserGetDirectory(filename);
14978
0
    }
14979
14980
0
    inputStream = xmlNewInputStream(ctxt);
14981
0
    if (inputStream == NULL) {
14982
0
        xmlFreeParserInputBuffer(buf);
14983
0
        return(1);
14984
0
    }
14985
14986
0
    if (filename == NULL)
14987
0
        inputStream->filename = NULL;
14988
0
    else
14989
0
        inputStream->filename = (char *)
14990
0
            xmlCanonicPath((const xmlChar *) filename);
14991
0
    inputStream->buf = buf;
14992
0
    xmlBufResetInput(buf->buffer, inputStream);
14993
14994
0
    inputPush(ctxt, inputStream);
14995
14996
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14997
0
        (ctxt->input->buf != NULL)) {
14998
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14999
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
15000
15001
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15002
15003
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15004
#ifdef DEBUG_PUSH
15005
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15006
#endif
15007
0
    }
15008
15009
0
    if (encoding != NULL) {
15010
0
        xmlCharEncodingHandlerPtr hdlr;
15011
15012
0
        if (ctxt->encoding != NULL)
15013
0
      xmlFree((xmlChar *) ctxt->encoding);
15014
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15015
15016
0
        hdlr = xmlFindCharEncodingHandler(encoding);
15017
0
        if (hdlr != NULL) {
15018
0
            xmlSwitchToEncoding(ctxt, hdlr);
15019
0
  } else {
15020
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15021
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
15022
0
        }
15023
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
15024
0
        xmlSwitchEncoding(ctxt, enc);
15025
0
    }
15026
15027
0
    return(0);
15028
0
}
15029
15030
15031
/**
15032
 * xmlCtxtUseOptionsInternal:
15033
 * @ctxt: an XML parser context
15034
 * @options:  a combination of xmlParserOption
15035
 * @encoding:  the user provided encoding to use
15036
 *
15037
 * Applies the options to the parser context
15038
 *
15039
 * Returns 0 in case of success, the set of unknown or unimplemented options
15040
 *         in case of error.
15041
 */
15042
static int
15043
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15044
643k
{
15045
643k
    if (ctxt == NULL)
15046
0
        return(-1);
15047
643k
    if (encoding != NULL) {
15048
0
        if (ctxt->encoding != NULL)
15049
0
      xmlFree((xmlChar *) ctxt->encoding);
15050
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15051
0
    }
15052
643k
    if (options & XML_PARSE_RECOVER) {
15053
400k
        ctxt->recovery = 1;
15054
400k
        options -= XML_PARSE_RECOVER;
15055
400k
  ctxt->options |= XML_PARSE_RECOVER;
15056
400k
    } else
15057
243k
        ctxt->recovery = 0;
15058
643k
    if (options & XML_PARSE_DTDLOAD) {
15059
464k
        ctxt->loadsubset = XML_DETECT_IDS;
15060
464k
        options -= XML_PARSE_DTDLOAD;
15061
464k
  ctxt->options |= XML_PARSE_DTDLOAD;
15062
464k
    } else
15063
179k
        ctxt->loadsubset = 0;
15064
643k
    if (options & XML_PARSE_DTDATTR) {
15065
279k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15066
279k
        options -= XML_PARSE_DTDATTR;
15067
279k
  ctxt->options |= XML_PARSE_DTDATTR;
15068
279k
    }
15069
643k
    if (options & XML_PARSE_NOENT) {
15070
344k
        ctxt->replaceEntities = 1;
15071
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
15072
344k
        options -= XML_PARSE_NOENT;
15073
344k
  ctxt->options |= XML_PARSE_NOENT;
15074
344k
    } else
15075
299k
        ctxt->replaceEntities = 0;
15076
643k
    if (options & XML_PARSE_PEDANTIC) {
15077
105k
        ctxt->pedantic = 1;
15078
105k
        options -= XML_PARSE_PEDANTIC;
15079
105k
  ctxt->options |= XML_PARSE_PEDANTIC;
15080
105k
    } else
15081
538k
        ctxt->pedantic = 0;
15082
643k
    if (options & XML_PARSE_NOBLANKS) {
15083
288k
        ctxt->keepBlanks = 0;
15084
288k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15085
288k
        options -= XML_PARSE_NOBLANKS;
15086
288k
  ctxt->options |= XML_PARSE_NOBLANKS;
15087
288k
    } else
15088
355k
        ctxt->keepBlanks = 1;
15089
643k
    if (options & XML_PARSE_DTDVALID) {
15090
267k
        ctxt->validate = 1;
15091
267k
        if (options & XML_PARSE_NOWARNING)
15092
129k
            ctxt->vctxt.warning = NULL;
15093
267k
        if (options & XML_PARSE_NOERROR)
15094
203k
            ctxt->vctxt.error = NULL;
15095
267k
        options -= XML_PARSE_DTDVALID;
15096
267k
  ctxt->options |= XML_PARSE_DTDVALID;
15097
267k
    } else
15098
376k
        ctxt->validate = 0;
15099
643k
    if (options & XML_PARSE_NOWARNING) {
15100
304k
        ctxt->sax->warning = NULL;
15101
304k
        options -= XML_PARSE_NOWARNING;
15102
304k
    }
15103
643k
    if (options & XML_PARSE_NOERROR) {
15104
409k
        ctxt->sax->error = NULL;
15105
409k
        ctxt->sax->fatalError = NULL;
15106
409k
        options -= XML_PARSE_NOERROR;
15107
409k
    }
15108
643k
#ifdef LIBXML_SAX1_ENABLED
15109
643k
    if (options & XML_PARSE_SAX1) {
15110
311k
        ctxt->sax->startElement = xmlSAX2StartElement;
15111
311k
        ctxt->sax->endElement = xmlSAX2EndElement;
15112
311k
        ctxt->sax->startElementNs = NULL;
15113
311k
        ctxt->sax->endElementNs = NULL;
15114
311k
        ctxt->sax->initialized = 1;
15115
311k
        options -= XML_PARSE_SAX1;
15116
311k
  ctxt->options |= XML_PARSE_SAX1;
15117
311k
    }
15118
643k
#endif /* LIBXML_SAX1_ENABLED */
15119
643k
    if (options & XML_PARSE_NODICT) {
15120
244k
        ctxt->dictNames = 0;
15121
244k
        options -= XML_PARSE_NODICT;
15122
244k
  ctxt->options |= XML_PARSE_NODICT;
15123
399k
    } else {
15124
399k
        ctxt->dictNames = 1;
15125
399k
    }
15126
643k
    if (options & XML_PARSE_NOCDATA) {
15127
292k
        ctxt->sax->cdataBlock = NULL;
15128
292k
        options -= XML_PARSE_NOCDATA;
15129
292k
  ctxt->options |= XML_PARSE_NOCDATA;
15130
292k
    }
15131
643k
    if (options & XML_PARSE_NSCLEAN) {
15132
414k
  ctxt->options |= XML_PARSE_NSCLEAN;
15133
414k
        options -= XML_PARSE_NSCLEAN;
15134
414k
    }
15135
643k
    if (options & XML_PARSE_NONET) {
15136
300k
  ctxt->options |= XML_PARSE_NONET;
15137
300k
        options -= XML_PARSE_NONET;
15138
300k
    }
15139
643k
    if (options & XML_PARSE_COMPACT) {
15140
388k
  ctxt->options |= XML_PARSE_COMPACT;
15141
388k
        options -= XML_PARSE_COMPACT;
15142
388k
    }
15143
643k
    if (options & XML_PARSE_OLD10) {
15144
255k
  ctxt->options |= XML_PARSE_OLD10;
15145
255k
        options -= XML_PARSE_OLD10;
15146
255k
    }
15147
643k
    if (options & XML_PARSE_NOBASEFIX) {
15148
305k
  ctxt->options |= XML_PARSE_NOBASEFIX;
15149
305k
        options -= XML_PARSE_NOBASEFIX;
15150
305k
    }
15151
643k
    if (options & XML_PARSE_HUGE) {
15152
260k
  ctxt->options |= XML_PARSE_HUGE;
15153
260k
        options -= XML_PARSE_HUGE;
15154
260k
        if (ctxt->dict != NULL)
15155
260k
            xmlDictSetLimit(ctxt->dict, 0);
15156
260k
    }
15157
643k
    if (options & XML_PARSE_OLDSAX) {
15158
236k
  ctxt->options |= XML_PARSE_OLDSAX;
15159
236k
        options -= XML_PARSE_OLDSAX;
15160
236k
    }
15161
643k
    if (options & XML_PARSE_IGNORE_ENC) {
15162
387k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
15163
387k
        options -= XML_PARSE_IGNORE_ENC;
15164
387k
    }
15165
643k
    if (options & XML_PARSE_BIG_LINES) {
15166
279k
  ctxt->options |= XML_PARSE_BIG_LINES;
15167
279k
        options -= XML_PARSE_BIG_LINES;
15168
279k
    }
15169
643k
    ctxt->linenumbers = 1;
15170
643k
    return (options);
15171
643k
}
15172
15173
/**
15174
 * xmlCtxtUseOptions:
15175
 * @ctxt: an XML parser context
15176
 * @options:  a combination of xmlParserOption
15177
 *
15178
 * Applies the options to the parser context
15179
 *
15180
 * Returns 0 in case of success, the set of unknown or unimplemented options
15181
 *         in case of error.
15182
 */
15183
int
15184
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15185
484k
{
15186
484k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15187
484k
}
15188
15189
/**
15190
 * xmlDoRead:
15191
 * @ctxt:  an XML parser context
15192
 * @URL:  the base URL to use for the document
15193
 * @encoding:  the document encoding, or NULL
15194
 * @options:  a combination of xmlParserOption
15195
 * @reuse:  keep the context for reuse
15196
 *
15197
 * Common front-end for the xmlRead functions
15198
 *
15199
 * Returns the resulting document tree or NULL
15200
 */
15201
static xmlDocPtr
15202
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15203
          int options, int reuse)
15204
158k
{
15205
158k
    xmlDocPtr ret;
15206
15207
158k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15208
158k
    if (encoding != NULL) {
15209
0
        xmlCharEncodingHandlerPtr hdlr;
15210
15211
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15212
0
  if (hdlr != NULL)
15213
0
      xmlSwitchToEncoding(ctxt, hdlr);
15214
0
    }
15215
158k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15216
158k
        (ctxt->input->filename == NULL))
15217
158k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15218
158k
    xmlParseDocument(ctxt);
15219
158k
    if ((ctxt->wellFormed) || ctxt->recovery)
15220
104k
        ret = ctxt->myDoc;
15221
54.6k
    else {
15222
54.6k
        ret = NULL;
15223
54.6k
  if (ctxt->myDoc != NULL) {
15224
51.0k
      xmlFreeDoc(ctxt->myDoc);
15225
51.0k
  }
15226
54.6k
    }
15227
158k
    ctxt->myDoc = NULL;
15228
158k
    if (!reuse) {
15229
158k
  xmlFreeParserCtxt(ctxt);
15230
158k
    }
15231
15232
158k
    return (ret);
15233
158k
}
15234
15235
/**
15236
 * xmlReadDoc:
15237
 * @cur:  a pointer to a zero terminated string
15238
 * @URL:  the base URL to use for the document
15239
 * @encoding:  the document encoding, or NULL
15240
 * @options:  a combination of xmlParserOption
15241
 *
15242
 * parse an XML in-memory document and build a tree.
15243
 *
15244
 * Returns the resulting document tree
15245
 */
15246
xmlDocPtr
15247
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15248
0
{
15249
0
    xmlParserCtxtPtr ctxt;
15250
15251
0
    if (cur == NULL)
15252
0
        return (NULL);
15253
0
    xmlInitParser();
15254
15255
0
    ctxt = xmlCreateDocParserCtxt(cur);
15256
0
    if (ctxt == NULL)
15257
0
        return (NULL);
15258
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15259
0
}
15260
15261
/**
15262
 * xmlReadFile:
15263
 * @filename:  a file or URL
15264
 * @encoding:  the document encoding, or NULL
15265
 * @options:  a combination of xmlParserOption
15266
 *
15267
 * parse an XML file from the filesystem or the network.
15268
 *
15269
 * Returns the resulting document tree
15270
 */
15271
xmlDocPtr
15272
xmlReadFile(const char *filename, const char *encoding, int options)
15273
0
{
15274
0
    xmlParserCtxtPtr ctxt;
15275
15276
0
    xmlInitParser();
15277
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15278
0
    if (ctxt == NULL)
15279
0
        return (NULL);
15280
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15281
0
}
15282
15283
/**
15284
 * xmlReadMemory:
15285
 * @buffer:  a pointer to a char array
15286
 * @size:  the size of the array
15287
 * @URL:  the base URL to use for the document
15288
 * @encoding:  the document encoding, or NULL
15289
 * @options:  a combination of xmlParserOption
15290
 *
15291
 * parse an XML in-memory document and build a tree.
15292
 *
15293
 * Returns the resulting document tree
15294
 */
15295
xmlDocPtr
15296
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15297
158k
{
15298
158k
    xmlParserCtxtPtr ctxt;
15299
15300
158k
    xmlInitParser();
15301
158k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15302
158k
    if (ctxt == NULL)
15303
35
        return (NULL);
15304
158k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15305
158k
}
15306
15307
/**
15308
 * xmlReadFd:
15309
 * @fd:  an open file descriptor
15310
 * @URL:  the base URL to use for the document
15311
 * @encoding:  the document encoding, or NULL
15312
 * @options:  a combination of xmlParserOption
15313
 *
15314
 * parse an XML from a file descriptor and build a tree.
15315
 * NOTE that the file descriptor will not be closed when the
15316
 *      reader is closed or reset.
15317
 *
15318
 * Returns the resulting document tree
15319
 */
15320
xmlDocPtr
15321
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15322
0
{
15323
0
    xmlParserCtxtPtr ctxt;
15324
0
    xmlParserInputBufferPtr input;
15325
0
    xmlParserInputPtr stream;
15326
15327
0
    if (fd < 0)
15328
0
        return (NULL);
15329
0
    xmlInitParser();
15330
15331
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15332
0
    if (input == NULL)
15333
0
        return (NULL);
15334
0
    input->closecallback = NULL;
15335
0
    ctxt = xmlNewParserCtxt();
15336
0
    if (ctxt == NULL) {
15337
0
        xmlFreeParserInputBuffer(input);
15338
0
        return (NULL);
15339
0
    }
15340
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15341
0
    if (stream == NULL) {
15342
0
        xmlFreeParserInputBuffer(input);
15343
0
  xmlFreeParserCtxt(ctxt);
15344
0
        return (NULL);
15345
0
    }
15346
0
    inputPush(ctxt, stream);
15347
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15348
0
}
15349
15350
/**
15351
 * xmlReadIO:
15352
 * @ioread:  an I/O read function
15353
 * @ioclose:  an I/O close function
15354
 * @ioctx:  an I/O handler
15355
 * @URL:  the base URL to use for the document
15356
 * @encoding:  the document encoding, or NULL
15357
 * @options:  a combination of xmlParserOption
15358
 *
15359
 * parse an XML document from I/O functions and source and build a tree.
15360
 *
15361
 * Returns the resulting document tree
15362
 */
15363
xmlDocPtr
15364
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15365
          void *ioctx, const char *URL, const char *encoding, int options)
15366
0
{
15367
0
    xmlParserCtxtPtr ctxt;
15368
0
    xmlParserInputBufferPtr input;
15369
0
    xmlParserInputPtr stream;
15370
15371
0
    if (ioread == NULL)
15372
0
        return (NULL);
15373
0
    xmlInitParser();
15374
15375
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15376
0
                                         XML_CHAR_ENCODING_NONE);
15377
0
    if (input == NULL) {
15378
0
        if (ioclose != NULL)
15379
0
            ioclose(ioctx);
15380
0
        return (NULL);
15381
0
    }
15382
0
    ctxt = xmlNewParserCtxt();
15383
0
    if (ctxt == NULL) {
15384
0
        xmlFreeParserInputBuffer(input);
15385
0
        return (NULL);
15386
0
    }
15387
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15388
0
    if (stream == NULL) {
15389
0
        xmlFreeParserInputBuffer(input);
15390
0
  xmlFreeParserCtxt(ctxt);
15391
0
        return (NULL);
15392
0
    }
15393
0
    inputPush(ctxt, stream);
15394
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15395
0
}
15396
15397
/**
15398
 * xmlCtxtReadDoc:
15399
 * @ctxt:  an XML parser context
15400
 * @cur:  a pointer to a zero terminated string
15401
 * @URL:  the base URL to use for the document
15402
 * @encoding:  the document encoding, or NULL
15403
 * @options:  a combination of xmlParserOption
15404
 *
15405
 * parse an XML in-memory document and build a tree.
15406
 * This reuses the existing @ctxt parser context
15407
 *
15408
 * Returns the resulting document tree
15409
 */
15410
xmlDocPtr
15411
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15412
               const char *URL, const char *encoding, int options)
15413
0
{
15414
0
    if (cur == NULL)
15415
0
        return (NULL);
15416
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15417
0
                              encoding, options));
15418
0
}
15419
15420
/**
15421
 * xmlCtxtReadFile:
15422
 * @ctxt:  an XML parser context
15423
 * @filename:  a file or URL
15424
 * @encoding:  the document encoding, or NULL
15425
 * @options:  a combination of xmlParserOption
15426
 *
15427
 * parse an XML file from the filesystem or the network.
15428
 * This reuses the existing @ctxt parser context
15429
 *
15430
 * Returns the resulting document tree
15431
 */
15432
xmlDocPtr
15433
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15434
                const char *encoding, int options)
15435
0
{
15436
0
    xmlParserInputPtr stream;
15437
15438
0
    if (filename == NULL)
15439
0
        return (NULL);
15440
0
    if (ctxt == NULL)
15441
0
        return (NULL);
15442
0
    xmlInitParser();
15443
15444
0
    xmlCtxtReset(ctxt);
15445
15446
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15447
0
    if (stream == NULL) {
15448
0
        return (NULL);
15449
0
    }
15450
0
    inputPush(ctxt, stream);
15451
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15452
0
}
15453
15454
/**
15455
 * xmlCtxtReadMemory:
15456
 * @ctxt:  an XML parser context
15457
 * @buffer:  a pointer to a char array
15458
 * @size:  the size of the array
15459
 * @URL:  the base URL to use for the document
15460
 * @encoding:  the document encoding, or NULL
15461
 * @options:  a combination of xmlParserOption
15462
 *
15463
 * parse an XML in-memory document and build a tree.
15464
 * This reuses the existing @ctxt parser context
15465
 *
15466
 * Returns the resulting document tree
15467
 */
15468
xmlDocPtr
15469
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15470
                  const char *URL, const char *encoding, int options)
15471
0
{
15472
0
    xmlParserInputBufferPtr input;
15473
0
    xmlParserInputPtr stream;
15474
15475
0
    if (ctxt == NULL)
15476
0
        return (NULL);
15477
0
    if (buffer == NULL)
15478
0
        return (NULL);
15479
0
    xmlInitParser();
15480
15481
0
    xmlCtxtReset(ctxt);
15482
15483
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15484
0
    if (input == NULL) {
15485
0
  return(NULL);
15486
0
    }
15487
15488
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15489
0
    if (stream == NULL) {
15490
0
  xmlFreeParserInputBuffer(input);
15491
0
  return(NULL);
15492
0
    }
15493
15494
0
    inputPush(ctxt, stream);
15495
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15496
0
}
15497
15498
/**
15499
 * xmlCtxtReadFd:
15500
 * @ctxt:  an XML parser context
15501
 * @fd:  an open file descriptor
15502
 * @URL:  the base URL to use for the document
15503
 * @encoding:  the document encoding, or NULL
15504
 * @options:  a combination of xmlParserOption
15505
 *
15506
 * parse an XML from a file descriptor and build a tree.
15507
 * This reuses the existing @ctxt parser context
15508
 * NOTE that the file descriptor will not be closed when the
15509
 *      reader is closed or reset.
15510
 *
15511
 * Returns the resulting document tree
15512
 */
15513
xmlDocPtr
15514
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15515
              const char *URL, const char *encoding, int options)
15516
0
{
15517
0
    xmlParserInputBufferPtr input;
15518
0
    xmlParserInputPtr stream;
15519
15520
0
    if (fd < 0)
15521
0
        return (NULL);
15522
0
    if (ctxt == NULL)
15523
0
        return (NULL);
15524
0
    xmlInitParser();
15525
15526
0
    xmlCtxtReset(ctxt);
15527
15528
15529
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15530
0
    if (input == NULL)
15531
0
        return (NULL);
15532
0
    input->closecallback = NULL;
15533
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15534
0
    if (stream == NULL) {
15535
0
        xmlFreeParserInputBuffer(input);
15536
0
        return (NULL);
15537
0
    }
15538
0
    inputPush(ctxt, stream);
15539
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15540
0
}
15541
15542
/**
15543
 * xmlCtxtReadIO:
15544
 * @ctxt:  an XML parser context
15545
 * @ioread:  an I/O read function
15546
 * @ioclose:  an I/O close function
15547
 * @ioctx:  an I/O handler
15548
 * @URL:  the base URL to use for the document
15549
 * @encoding:  the document encoding, or NULL
15550
 * @options:  a combination of xmlParserOption
15551
 *
15552
 * parse an XML document from I/O functions and source and build a tree.
15553
 * This reuses the existing @ctxt parser context
15554
 *
15555
 * Returns the resulting document tree
15556
 */
15557
xmlDocPtr
15558
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15559
              xmlInputCloseCallback ioclose, void *ioctx,
15560
        const char *URL,
15561
              const char *encoding, int options)
15562
0
{
15563
0
    xmlParserInputBufferPtr input;
15564
0
    xmlParserInputPtr stream;
15565
15566
0
    if (ioread == NULL)
15567
0
        return (NULL);
15568
0
    if (ctxt == NULL)
15569
0
        return (NULL);
15570
0
    xmlInitParser();
15571
15572
0
    xmlCtxtReset(ctxt);
15573
15574
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15575
0
                                         XML_CHAR_ENCODING_NONE);
15576
0
    if (input == NULL) {
15577
0
        if (ioclose != NULL)
15578
0
            ioclose(ioctx);
15579
0
        return (NULL);
15580
0
    }
15581
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15582
0
    if (stream == NULL) {
15583
0
        xmlFreeParserInputBuffer(input);
15584
0
        return (NULL);
15585
0
    }
15586
0
    inputPush(ctxt, stream);
15587
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15588
0
}
15589