Coverage Report

Created: 2023-12-13 20:03

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/enc.h"
80
#include "private/error.h"
81
#include "private/html.h"
82
#include "private/io.h"
83
#include "private/parser.h"
84
#include "private/threads.h"
85
86
struct _xmlStartTag {
87
    const xmlChar *prefix;
88
    const xmlChar *URI;
89
    int line;
90
    int nsNr;
91
};
92
93
static void
94
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
95
96
static xmlParserCtxtPtr
97
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
98
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
99
        xmlParserCtxtPtr pctx);
100
101
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
93.8M
#define XML_MAX_HUGE_LENGTH 1000000000
116
117
64.8k
#define XML_PARSER_BIG_ENTITY 1000
118
#define XML_PARSER_LOT_ENTITY 5000
119
120
/*
121
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
122
 *    replacement over the size in byte of the input indicates that you have
123
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
124
 *    replacement per byte of input.
125
 */
126
1.89M
#define XML_PARSER_NON_LINEAR 10
127
128
/*
129
 * xmlParserEntityCheck
130
 *
131
 * Function to check non-linear entity expansion behaviour
132
 * This is here to detect and stop exponential linear entity expansion
133
 * This is not a limitation of the parser but a safety
134
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
135
 * parser option.
136
 */
137
static int
138
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
139
                     xmlEntityPtr ent, size_t replacement)
140
18.2M
{
141
18.2M
    size_t consumed = 0;
142
18.2M
    int i;
143
144
18.2M
    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
145
15.6M
        return (0);
146
2.59M
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
147
342
        return (1);
148
149
    /*
150
     * This may look absurd but is needed to detect
151
     * entities problems
152
     */
153
2.59M
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
154
2.59M
  (ent->content != NULL) && (ent->checked == 0) &&
155
2.59M
  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
156
337k
  unsigned long oldnbent = ctxt->nbentities, diff;
157
337k
  xmlChar *rep;
158
159
337k
  ent->checked = 1;
160
161
337k
        ++ctxt->depth;
162
337k
  rep = xmlStringDecodeEntities(ctxt, ent->content,
163
337k
          XML_SUBSTITUTE_REF, 0, 0, 0);
164
337k
        --ctxt->depth;
165
337k
  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
166
984
      ent->content[0] = 0;
167
984
  }
168
169
337k
        diff = ctxt->nbentities - oldnbent + 1;
170
337k
        if (diff > INT_MAX / 2)
171
0
            diff = INT_MAX / 2;
172
337k
  ent->checked = diff * 2;
173
337k
  if (rep != NULL) {
174
336k
      if (xmlStrchr(rep, '<'))
175
6.84k
    ent->checked |= 1;
176
336k
      xmlFree(rep);
177
336k
      rep = NULL;
178
336k
  }
179
337k
    }
180
181
    /*
182
     * Prevent entity exponential check, not just replacement while
183
     * parsing the DTD
184
     * The check is potentially costly so do that only once in a thousand
185
     */
186
2.59M
    if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
187
2.59M
        (ctxt->nbentities % 1024 == 0)) {
188
0
  for (i = 0;i < ctxt->inputNr;i++) {
189
0
      consumed += ctxt->inputTab[i]->consumed +
190
0
                 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
191
0
  }
192
0
  if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
193
0
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
194
0
      ctxt->instate = XML_PARSER_EOF;
195
0
      return (1);
196
0
  }
197
0
  consumed = 0;
198
0
    }
199
200
201
202
2.59M
    if (replacement != 0) {
203
43.3k
  if (replacement < XML_MAX_TEXT_LENGTH)
204
43.3k
      return(0);
205
206
        /*
207
   * If the volume of entity copy reaches 10 times the
208
   * amount of parsed data and over the large text threshold
209
   * then that's very likely to be an abuse.
210
   */
211
0
        if (ctxt->input != NULL) {
212
0
      consumed = ctxt->input->consumed +
213
0
                 (ctxt->input->cur - ctxt->input->base);
214
0
  }
215
0
        consumed += ctxt->sizeentities;
216
217
0
        if (replacement < XML_PARSER_NON_LINEAR * consumed)
218
0
      return(0);
219
2.54M
    } else if (size != 0) {
220
        /*
221
         * Do the check based on the replacement size of the entity
222
         */
223
64.8k
        if (size < XML_PARSER_BIG_ENTITY)
224
61.3k
      return(0);
225
226
        /*
227
         * A limit on the amount of text data reasonably used
228
         */
229
3.46k
        if (ctxt->input != NULL) {
230
3.46k
            consumed = ctxt->input->consumed +
231
3.46k
                (ctxt->input->cur - ctxt->input->base);
232
3.46k
        }
233
3.46k
        consumed += ctxt->sizeentities;
234
235
3.46k
        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
236
3.46k
      (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
237
2.63k
            return (0);
238
2.48M
    } else if (ent != NULL) {
239
        /*
240
         * use the number of parsed entities in the replacement
241
         */
242
1.88M
        size = ent->checked / 2;
243
244
        /*
245
         * The amount of data parsed counting entities size only once
246
         */
247
1.88M
        if (ctxt->input != NULL) {
248
1.88M
            consumed = ctxt->input->consumed +
249
1.88M
                (ctxt->input->cur - ctxt->input->base);
250
1.88M
        }
251
1.88M
        consumed += ctxt->sizeentities;
252
253
        /*
254
         * Check the density of entities for the amount of data
255
   * knowing an entity reference will take at least 3 bytes
256
         */
257
1.88M
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
258
1.88M
            return (0);
259
1.88M
    } else {
260
        /*
261
         * strange we got no data for checking
262
         */
263
599k
  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
264
599k
       (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
265
599k
      (ctxt->nbentities <= 10000))
266
598k
      return (0);
267
599k
    }
268
4.03k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
269
4.03k
    return (1);
270
2.59M
}
271
272
/**
273
 * xmlParserMaxDepth:
274
 *
275
 * arbitrary depth limit for the XML documents that we allow to
276
 * process. This is not a limitation of the parser but a safety
277
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
278
 * parser option.
279
 */
280
unsigned int xmlParserMaxDepth = 256;
281
282
283
284
#define SAX2 1
285
1.91G
#define XML_PARSER_BIG_BUFFER_SIZE 300
286
785M
#define XML_PARSER_BUFFER_SIZE 100
287
1.11M
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
288
289
/**
290
 * XML_PARSER_CHUNK_SIZE
291
 *
292
 * When calling GROW that's the minimal amount of data
293
 * the parser expected to have received. It is not a hard
294
 * limit but an optimization when reading strings like Names
295
 * It is not strictly needed as long as inputs available characters
296
 * are followed by 0, which should be provided by the I/O level
297
 */
298
310M
#define XML_PARSER_CHUNK_SIZE 100
299
300
/*
301
 * List of XML prefixed PI allowed by W3C specs
302
 */
303
304
static const char* const xmlW3CPIs[] = {
305
    "xml-stylesheet",
306
    "xml-model",
307
    NULL
308
};
309
310
311
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
312
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
313
                                              const xmlChar **str);
314
315
static xmlParserErrors
316
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
317
                xmlSAXHandlerPtr sax,
318
          void *user_data, int depth, const xmlChar *URL,
319
          const xmlChar *ID, xmlNodePtr *list);
320
321
static int
322
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
323
                          const char *encoding);
324
#ifdef LIBXML_LEGACY_ENABLED
325
static void
326
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
327
                      xmlNodePtr lastNode);
328
#endif /* LIBXML_LEGACY_ENABLED */
329
330
static xmlParserErrors
331
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
332
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
333
334
static int
335
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
336
337
/************************************************************************
338
 *                  *
339
 *    Some factorized error routines        *
340
 *                  *
341
 ************************************************************************/
342
343
/**
344
 * xmlErrAttributeDup:
345
 * @ctxt:  an XML parser context
346
 * @prefix:  the attribute prefix
347
 * @localname:  the attribute localname
348
 *
349
 * Handle a redefinition of attribute error
350
 */
351
static void
352
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
353
                   const xmlChar * localname)
354
79.1k
{
355
79.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
356
79.1k
        (ctxt->instate == XML_PARSER_EOF))
357
0
  return;
358
79.1k
    if (ctxt != NULL)
359
79.1k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
360
361
79.1k
    if (prefix == NULL)
362
75.2k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
363
75.2k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
364
75.2k
                        (const char *) localname, NULL, NULL, 0, 0,
365
75.2k
                        "Attribute %s redefined\n", localname);
366
3.86k
    else
367
3.86k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
368
3.86k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
369
3.86k
                        (const char *) prefix, (const char *) localname,
370
3.86k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
371
3.86k
                        localname);
372
79.1k
    if (ctxt != NULL) {
373
79.1k
  ctxt->wellFormed = 0;
374
79.1k
  if (ctxt->recovery == 0)
375
67.0k
      ctxt->disableSAX = 1;
376
79.1k
    }
377
79.1k
}
378
379
/**
380
 * xmlFatalErr:
381
 * @ctxt:  an XML parser context
382
 * @error:  the error number
383
 * @extra:  extra information string
384
 *
385
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
386
 */
387
static void
388
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
389
18.6M
{
390
18.6M
    const char *errmsg;
391
392
18.6M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393
18.6M
        (ctxt->instate == XML_PARSER_EOF))
394
573k
  return;
395
18.0M
    switch (error) {
396
353k
        case XML_ERR_INVALID_HEX_CHARREF:
397
353k
            errmsg = "CharRef: invalid hexadecimal value";
398
353k
            break;
399
704k
        case XML_ERR_INVALID_DEC_CHARREF:
400
704k
            errmsg = "CharRef: invalid decimal value";
401
704k
            break;
402
0
        case XML_ERR_INVALID_CHARREF:
403
0
            errmsg = "CharRef: invalid value";
404
0
            break;
405
295k
        case XML_ERR_INTERNAL_ERROR:
406
295k
            errmsg = "internal error";
407
295k
            break;
408
0
        case XML_ERR_PEREF_AT_EOF:
409
0
            errmsg = "PEReference at end of document";
410
0
            break;
411
0
        case XML_ERR_PEREF_IN_PROLOG:
412
0
            errmsg = "PEReference in prolog";
413
0
            break;
414
0
        case XML_ERR_PEREF_IN_EPILOG:
415
0
            errmsg = "PEReference in epilog";
416
0
            break;
417
0
        case XML_ERR_PEREF_NO_NAME:
418
0
            errmsg = "PEReference: no name";
419
0
            break;
420
5.00k
        case XML_ERR_PEREF_SEMICOL_MISSING:
421
5.00k
            errmsg = "PEReference: expecting ';'";
422
5.00k
            break;
423
619k
        case XML_ERR_ENTITY_LOOP:
424
619k
            errmsg = "Detected an entity reference loop";
425
619k
            break;
426
0
        case XML_ERR_ENTITY_NOT_STARTED:
427
0
            errmsg = "EntityValue: \" or ' expected";
428
0
            break;
429
967
        case XML_ERR_ENTITY_PE_INTERNAL:
430
967
            errmsg = "PEReferences forbidden in internal subset";
431
967
            break;
432
2.63k
        case XML_ERR_ENTITY_NOT_FINISHED:
433
2.63k
            errmsg = "EntityValue: \" or ' expected";
434
2.63k
            break;
435
1.16M
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
436
1.16M
            errmsg = "AttValue: \" or ' expected";
437
1.16M
            break;
438
2.79M
        case XML_ERR_LT_IN_ATTRIBUTE:
439
2.79M
            errmsg = "Unescaped '<' not allowed in attributes values";
440
2.79M
            break;
441
6.03k
        case XML_ERR_LITERAL_NOT_STARTED:
442
6.03k
            errmsg = "SystemLiteral \" or ' expected";
443
6.03k
            break;
444
7.76k
        case XML_ERR_LITERAL_NOT_FINISHED:
445
7.76k
            errmsg = "Unfinished System or Public ID \" or ' expected";
446
7.76k
            break;
447
834k
        case XML_ERR_MISPLACED_CDATA_END:
448
834k
            errmsg = "Sequence ']]>' not allowed in content";
449
834k
            break;
450
5.31k
        case XML_ERR_URI_REQUIRED:
451
5.31k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
452
5.31k
            break;
453
737
        case XML_ERR_PUBID_REQUIRED:
454
737
            errmsg = "PUBLIC, the Public Identifier is missing";
455
737
            break;
456
860k
        case XML_ERR_HYPHEN_IN_COMMENT:
457
860k
            errmsg = "Comment must not contain '--' (double-hyphen)";
458
860k
            break;
459
81.6k
        case XML_ERR_PI_NOT_STARTED:
460
81.6k
            errmsg = "xmlParsePI : no target name";
461
81.6k
            break;
462
9.71k
        case XML_ERR_RESERVED_XML_NAME:
463
9.71k
            errmsg = "Invalid PI name";
464
9.71k
            break;
465
394
        case XML_ERR_NOTATION_NOT_STARTED:
466
394
            errmsg = "NOTATION: Name expected here";
467
394
            break;
468
1.52k
        case XML_ERR_NOTATION_NOT_FINISHED:
469
1.52k
            errmsg = "'>' required to close NOTATION declaration";
470
1.52k
            break;
471
13.8k
        case XML_ERR_VALUE_REQUIRED:
472
13.8k
            errmsg = "Entity value required";
473
13.8k
            break;
474
896
        case XML_ERR_URI_FRAGMENT:
475
896
            errmsg = "Fragment not allowed";
476
896
            break;
477
8.84k
        case XML_ERR_ATTLIST_NOT_STARTED:
478
8.84k
            errmsg = "'(' required to start ATTLIST enumeration";
479
8.84k
            break;
480
3.35k
        case XML_ERR_NMTOKEN_REQUIRED:
481
3.35k
            errmsg = "NmToken expected in ATTLIST enumeration";
482
3.35k
            break;
483
1.43k
        case XML_ERR_ATTLIST_NOT_FINISHED:
484
1.43k
            errmsg = "')' required to finish ATTLIST enumeration";
485
1.43k
            break;
486
2.56k
        case XML_ERR_MIXED_NOT_STARTED:
487
2.56k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
488
2.56k
            break;
489
0
        case XML_ERR_PCDATA_REQUIRED:
490
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
491
0
            break;
492
5.29k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
493
5.29k
            errmsg = "ContentDecl : Name or '(' expected";
494
5.29k
            break;
495
11.6k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
496
11.6k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
497
11.6k
            break;
498
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
499
0
            errmsg =
500
0
                "PEReference: forbidden within markup decl in internal subset";
501
0
            break;
502
2.08M
        case XML_ERR_GT_REQUIRED:
503
2.08M
            errmsg = "expected '>'";
504
2.08M
            break;
505
439
        case XML_ERR_CONDSEC_INVALID:
506
439
            errmsg = "XML conditional section '[' expected";
507
439
            break;
508
29.9k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
509
29.9k
            errmsg = "Content error in the external subset";
510
29.9k
            break;
511
2.53k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
512
2.53k
            errmsg =
513
2.53k
                "conditional section INCLUDE or IGNORE keyword expected";
514
2.53k
            break;
515
2.20k
        case XML_ERR_CONDSEC_NOT_FINISHED:
516
2.20k
            errmsg = "XML conditional section not closed";
517
2.20k
            break;
518
423
        case XML_ERR_XMLDECL_NOT_STARTED:
519
423
            errmsg = "Text declaration '<?xml' required";
520
423
            break;
521
138k
        case XML_ERR_XMLDECL_NOT_FINISHED:
522
138k
            errmsg = "parsing XML declaration: '?>' expected";
523
138k
            break;
524
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
525
0
            errmsg = "external parsed entities cannot be standalone";
526
0
            break;
527
3.56M
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
528
3.56M
            errmsg = "EntityRef: expecting ';'";
529
3.56M
            break;
530
84.8k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
531
84.8k
            errmsg = "DOCTYPE improperly terminated";
532
84.8k
            break;
533
0
        case XML_ERR_LTSLASH_REQUIRED:
534
0
            errmsg = "EndTag: '</' not found";
535
0
            break;
536
7.80k
        case XML_ERR_EQUAL_REQUIRED:
537
7.80k
            errmsg = "expected '='";
538
7.80k
            break;
539
29.5k
        case XML_ERR_STRING_NOT_CLOSED:
540
29.5k
            errmsg = "String not closed expecting \" or '";
541
29.5k
            break;
542
6.52k
        case XML_ERR_STRING_NOT_STARTED:
543
6.52k
            errmsg = "String not started expecting ' or \"";
544
6.52k
            break;
545
1.96k
        case XML_ERR_ENCODING_NAME:
546
1.96k
            errmsg = "Invalid XML encoding name";
547
1.96k
            break;
548
1.01k
        case XML_ERR_STANDALONE_VALUE:
549
1.01k
            errmsg = "standalone accepts only 'yes' or 'no'";
550
1.01k
            break;
551
44.9k
        case XML_ERR_DOCUMENT_EMPTY:
552
44.9k
            errmsg = "Document is empty";
553
44.9k
            break;
554
224k
        case XML_ERR_DOCUMENT_END:
555
224k
            errmsg = "Extra content at the end of the document";
556
224k
            break;
557
3.89M
        case XML_ERR_NOT_WELL_BALANCED:
558
3.89M
            errmsg = "chunk is not well balanced";
559
3.89M
            break;
560
0
        case XML_ERR_EXTRA_CONTENT:
561
0
            errmsg = "extra content at the end of well balanced chunk";
562
0
            break;
563
74.3k
        case XML_ERR_VERSION_MISSING:
564
74.3k
            errmsg = "Malformed declaration expecting version";
565
74.3k
            break;
566
166
        case XML_ERR_NAME_TOO_LONG:
567
166
            errmsg = "Name too long";
568
166
            break;
569
#if 0
570
        case:
571
            errmsg = "";
572
            break;
573
#endif
574
39.4k
        default:
575
39.4k
            errmsg = "Unregistered error message";
576
18.0M
    }
577
18.0M
    if (ctxt != NULL)
578
18.0M
  ctxt->errNo = error;
579
18.0M
    if (info == NULL) {
580
17.7M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581
17.7M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
582
17.7M
                        errmsg);
583
17.7M
    } else {
584
296k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
585
296k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
586
296k
                        errmsg, info);
587
296k
    }
588
18.0M
    if (ctxt != NULL) {
589
18.0M
  ctxt->wellFormed = 0;
590
18.0M
  if (ctxt->recovery == 0)
591
15.8M
      ctxt->disableSAX = 1;
592
18.0M
    }
593
18.0M
}
594
595
/**
596
 * xmlFatalErrMsg:
597
 * @ctxt:  an XML parser context
598
 * @error:  the error number
599
 * @msg:  the error message
600
 *
601
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
602
 */
603
static void LIBXML_ATTR_FORMAT(3,0)
604
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
605
               const char *msg)
606
122M
{
607
122M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
608
122M
        (ctxt->instate == XML_PARSER_EOF))
609
0
  return;
610
122M
    if (ctxt != NULL)
611
122M
  ctxt->errNo = error;
612
122M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
613
122M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
614
122M
    if (ctxt != NULL) {
615
122M
  ctxt->wellFormed = 0;
616
122M
  if (ctxt->recovery == 0)
617
99.5M
      ctxt->disableSAX = 1;
618
122M
    }
619
122M
}
620
621
/**
622
 * xmlWarningMsg:
623
 * @ctxt:  an XML parser context
624
 * @error:  the error number
625
 * @msg:  the error message
626
 * @str1:  extra data
627
 * @str2:  extra data
628
 *
629
 * Handle a warning.
630
 */
631
static void LIBXML_ATTR_FORMAT(3,0)
632
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633
              const char *msg, const xmlChar *str1, const xmlChar *str2)
634
433k
{
635
433k
    xmlStructuredErrorFunc schannel = NULL;
636
637
433k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638
433k
        (ctxt->instate == XML_PARSER_EOF))
639
0
  return;
640
433k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
641
433k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
642
172k
        schannel = ctxt->sax->serror;
643
433k
    if (ctxt != NULL) {
644
433k
        __xmlRaiseError(schannel,
645
433k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
646
433k
                    ctxt->userData,
647
433k
                    ctxt, NULL, XML_FROM_PARSER, error,
648
433k
                    XML_ERR_WARNING, NULL, 0,
649
433k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
650
433k
        msg, (const char *) str1, (const char *) str2);
651
433k
    } else {
652
0
        __xmlRaiseError(schannel, NULL, NULL,
653
0
                    ctxt, NULL, XML_FROM_PARSER, error,
654
0
                    XML_ERR_WARNING, NULL, 0,
655
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
656
0
        msg, (const char *) str1, (const char *) str2);
657
0
    }
658
433k
}
659
660
/**
661
 * xmlValidityError:
662
 * @ctxt:  an XML parser context
663
 * @error:  the error number
664
 * @msg:  the error message
665
 * @str1:  extra data
666
 *
667
 * Handle a validity error.
668
 */
669
static void LIBXML_ATTR_FORMAT(3,0)
670
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
671
              const char *msg, const xmlChar *str1, const xmlChar *str2)
672
14.5k
{
673
14.5k
    xmlStructuredErrorFunc schannel = NULL;
674
675
14.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676
14.5k
        (ctxt->instate == XML_PARSER_EOF))
677
0
  return;
678
14.5k
    if (ctxt != NULL) {
679
14.5k
  ctxt->errNo = error;
680
14.5k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
681
11.4k
      schannel = ctxt->sax->serror;
682
14.5k
    }
683
14.5k
    if (ctxt != NULL) {
684
14.5k
        __xmlRaiseError(schannel,
685
14.5k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
686
14.5k
                    ctxt, NULL, XML_FROM_DTD, error,
687
14.5k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
688
14.5k
        (const char *) str2, NULL, 0, 0,
689
14.5k
        msg, (const char *) str1, (const char *) str2);
690
14.5k
  ctxt->valid = 0;
691
14.5k
    } else {
692
0
        __xmlRaiseError(schannel, NULL, NULL,
693
0
                    ctxt, NULL, XML_FROM_DTD, error,
694
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
695
0
        (const char *) str2, NULL, 0, 0,
696
0
        msg, (const char *) str1, (const char *) str2);
697
0
    }
698
14.5k
}
699
700
/**
701
 * xmlFatalErrMsgInt:
702
 * @ctxt:  an XML parser context
703
 * @error:  the error number
704
 * @msg:  the error message
705
 * @val:  an integer value
706
 *
707
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
708
 */
709
static void LIBXML_ATTR_FORMAT(3,0)
710
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
711
                  const char *msg, int val)
712
97.3M
{
713
97.3M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714
97.3M
        (ctxt->instate == XML_PARSER_EOF))
715
0
  return;
716
97.3M
    if (ctxt != NULL)
717
97.3M
  ctxt->errNo = error;
718
97.3M
    __xmlRaiseError(NULL, NULL, NULL,
719
97.3M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
720
97.3M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
721
97.3M
    if (ctxt != NULL) {
722
97.3M
  ctxt->wellFormed = 0;
723
97.3M
  if (ctxt->recovery == 0)
724
85.6M
      ctxt->disableSAX = 1;
725
97.3M
    }
726
97.3M
}
727
728
/**
729
 * xmlFatalErrMsgStrIntStr:
730
 * @ctxt:  an XML parser context
731
 * @error:  the error number
732
 * @msg:  the error message
733
 * @str1:  an string info
734
 * @val:  an integer value
735
 * @str2:  an string info
736
 *
737
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
738
 */
739
static void LIBXML_ATTR_FORMAT(3,0)
740
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
741
                  const char *msg, const xmlChar *str1, int val,
742
      const xmlChar *str2)
743
23.3M
{
744
23.3M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
745
23.3M
        (ctxt->instate == XML_PARSER_EOF))
746
0
  return;
747
23.3M
    if (ctxt != NULL)
748
23.3M
  ctxt->errNo = error;
749
23.3M
    __xmlRaiseError(NULL, NULL, NULL,
750
23.3M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
751
23.3M
                    NULL, 0, (const char *) str1, (const char *) str2,
752
23.3M
        NULL, val, 0, msg, str1, val, str2);
753
23.3M
    if (ctxt != NULL) {
754
23.3M
  ctxt->wellFormed = 0;
755
23.3M
  if (ctxt->recovery == 0)
756
20.7M
      ctxt->disableSAX = 1;
757
23.3M
    }
758
23.3M
}
759
760
/**
761
 * xmlFatalErrMsgStr:
762
 * @ctxt:  an XML parser context
763
 * @error:  the error number
764
 * @msg:  the error message
765
 * @val:  a string value
766
 *
767
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
768
 */
769
static void LIBXML_ATTR_FORMAT(3,0)
770
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
771
                  const char *msg, const xmlChar * val)
772
21.4M
{
773
21.4M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
774
21.4M
        (ctxt->instate == XML_PARSER_EOF))
775
0
  return;
776
21.4M
    if (ctxt != NULL)
777
21.4M
  ctxt->errNo = error;
778
21.4M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
779
21.4M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
780
21.4M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
781
21.4M
                    val);
782
21.4M
    if (ctxt != NULL) {
783
21.4M
  ctxt->wellFormed = 0;
784
21.4M
  if (ctxt->recovery == 0)
785
18.5M
      ctxt->disableSAX = 1;
786
21.4M
    }
787
21.4M
}
788
789
/**
790
 * xmlErrMsgStr:
791
 * @ctxt:  an XML parser context
792
 * @error:  the error number
793
 * @msg:  the error message
794
 * @val:  a string value
795
 *
796
 * Handle a non fatal parser error
797
 */
798
static void LIBXML_ATTR_FORMAT(3,0)
799
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
800
                  const char *msg, const xmlChar * val)
801
92.1k
{
802
92.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
803
92.1k
        (ctxt->instate == XML_PARSER_EOF))
804
0
  return;
805
92.1k
    if (ctxt != NULL)
806
92.1k
  ctxt->errNo = error;
807
92.1k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
808
92.1k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
809
92.1k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
810
92.1k
                    val);
811
92.1k
}
812
813
/**
814
 * xmlNsErr:
815
 * @ctxt:  an XML parser context
816
 * @error:  the error number
817
 * @msg:  the message
818
 * @info1:  extra information string
819
 * @info2:  extra information string
820
 *
821
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
822
 */
823
static void LIBXML_ATTR_FORMAT(3,0)
824
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
825
         const char *msg,
826
         const xmlChar * info1, const xmlChar * info2,
827
         const xmlChar * info3)
828
1.25M
{
829
1.25M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
830
1.25M
        (ctxt->instate == XML_PARSER_EOF))
831
0
  return;
832
1.25M
    if (ctxt != NULL)
833
1.25M
  ctxt->errNo = error;
834
1.25M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
835
1.25M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
836
1.25M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
837
1.25M
                    info1, info2, info3);
838
1.25M
    if (ctxt != NULL)
839
1.25M
  ctxt->nsWellFormed = 0;
840
1.25M
}
841
842
/**
843
 * xmlNsWarn
844
 * @ctxt:  an XML parser context
845
 * @error:  the error number
846
 * @msg:  the message
847
 * @info1:  extra information string
848
 * @info2:  extra information string
849
 *
850
 * Handle a namespace warning error
851
 */
852
static void LIBXML_ATTR_FORMAT(3,0)
853
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
854
         const char *msg,
855
         const xmlChar * info1, const xmlChar * info2,
856
         const xmlChar * info3)
857
60.9k
{
858
60.9k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
859
60.9k
        (ctxt->instate == XML_PARSER_EOF))
860
0
  return;
861
60.9k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
862
60.9k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
863
60.9k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
864
60.9k
                    info1, info2, info3);
865
60.9k
}
866
867
/************************************************************************
868
 *                  *
869
 *    Library wide options          *
870
 *                  *
871
 ************************************************************************/
872
873
/**
874
  * xmlHasFeature:
875
  * @feature: the feature to be examined
876
  *
877
  * Examines if the library has been compiled with a given feature.
878
  *
879
  * Returns a non-zero value if the feature exist, otherwise zero.
880
  * Returns zero (0) if the feature does not exist or an unknown
881
  * unknown feature is requested, non-zero otherwise.
882
  */
883
int
884
xmlHasFeature(xmlFeature feature)
885
0
{
886
0
    switch (feature) {
887
0
  case XML_WITH_THREAD:
888
0
#ifdef LIBXML_THREAD_ENABLED
889
0
      return(1);
890
#else
891
      return(0);
892
#endif
893
0
        case XML_WITH_TREE:
894
0
#ifdef LIBXML_TREE_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_OUTPUT:
900
0
#ifdef LIBXML_OUTPUT_ENABLED
901
0
            return(1);
902
#else
903
            return(0);
904
#endif
905
0
        case XML_WITH_PUSH:
906
0
#ifdef LIBXML_PUSH_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_READER:
912
0
#ifdef LIBXML_READER_ENABLED
913
0
            return(1);
914
#else
915
            return(0);
916
#endif
917
0
        case XML_WITH_PATTERN:
918
0
#ifdef LIBXML_PATTERN_ENABLED
919
0
            return(1);
920
#else
921
            return(0);
922
#endif
923
0
        case XML_WITH_WRITER:
924
0
#ifdef LIBXML_WRITER_ENABLED
925
0
            return(1);
926
#else
927
            return(0);
928
#endif
929
0
        case XML_WITH_SAX1:
930
0
#ifdef LIBXML_SAX1_ENABLED
931
0
            return(1);
932
#else
933
            return(0);
934
#endif
935
0
        case XML_WITH_FTP:
936
#ifdef LIBXML_FTP_ENABLED
937
            return(1);
938
#else
939
0
            return(0);
940
0
#endif
941
0
        case XML_WITH_HTTP:
942
#ifdef LIBXML_HTTP_ENABLED
943
            return(1);
944
#else
945
0
            return(0);
946
0
#endif
947
0
        case XML_WITH_VALID:
948
0
#ifdef LIBXML_VALID_ENABLED
949
0
            return(1);
950
#else
951
            return(0);
952
#endif
953
0
        case XML_WITH_HTML:
954
0
#ifdef LIBXML_HTML_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_LEGACY:
960
#ifdef LIBXML_LEGACY_ENABLED
961
            return(1);
962
#else
963
0
            return(0);
964
0
#endif
965
0
        case XML_WITH_C14N:
966
0
#ifdef LIBXML_C14N_ENABLED
967
0
            return(1);
968
#else
969
            return(0);
970
#endif
971
0
        case XML_WITH_CATALOG:
972
0
#ifdef LIBXML_CATALOG_ENABLED
973
0
            return(1);
974
#else
975
            return(0);
976
#endif
977
0
        case XML_WITH_XPATH:
978
0
#ifdef LIBXML_XPATH_ENABLED
979
0
            return(1);
980
#else
981
            return(0);
982
#endif
983
0
        case XML_WITH_XPTR:
984
0
#ifdef LIBXML_XPTR_ENABLED
985
0
            return(1);
986
#else
987
            return(0);
988
#endif
989
0
        case XML_WITH_XINCLUDE:
990
0
#ifdef LIBXML_XINCLUDE_ENABLED
991
0
            return(1);
992
#else
993
            return(0);
994
#endif
995
0
        case XML_WITH_ICONV:
996
0
#ifdef LIBXML_ICONV_ENABLED
997
0
            return(1);
998
#else
999
            return(0);
1000
#endif
1001
0
        case XML_WITH_ISO8859X:
1002
0
#ifdef LIBXML_ISO8859X_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_UNICODE:
1008
0
#ifdef LIBXML_UNICODE_ENABLED
1009
0
            return(1);
1010
#else
1011
            return(0);
1012
#endif
1013
0
        case XML_WITH_REGEXP:
1014
0
#ifdef LIBXML_REGEXP_ENABLED
1015
0
            return(1);
1016
#else
1017
            return(0);
1018
#endif
1019
0
        case XML_WITH_AUTOMATA:
1020
0
#ifdef LIBXML_AUTOMATA_ENABLED
1021
0
            return(1);
1022
#else
1023
            return(0);
1024
#endif
1025
0
        case XML_WITH_EXPR:
1026
#ifdef LIBXML_EXPR_ENABLED
1027
            return(1);
1028
#else
1029
0
            return(0);
1030
0
#endif
1031
0
        case XML_WITH_SCHEMAS:
1032
0
#ifdef LIBXML_SCHEMAS_ENABLED
1033
0
            return(1);
1034
#else
1035
            return(0);
1036
#endif
1037
0
        case XML_WITH_SCHEMATRON:
1038
0
#ifdef LIBXML_SCHEMATRON_ENABLED
1039
0
            return(1);
1040
#else
1041
            return(0);
1042
#endif
1043
0
        case XML_WITH_MODULES:
1044
0
#ifdef LIBXML_MODULES_ENABLED
1045
0
            return(1);
1046
#else
1047
            return(0);
1048
#endif
1049
0
        case XML_WITH_DEBUG:
1050
#ifdef LIBXML_DEBUG_ENABLED
1051
            return(1);
1052
#else
1053
0
            return(0);
1054
0
#endif
1055
0
        case XML_WITH_DEBUG_MEM:
1056
#ifdef DEBUG_MEMORY_LOCATION
1057
            return(1);
1058
#else
1059
0
            return(0);
1060
0
#endif
1061
0
        case XML_WITH_DEBUG_RUN:
1062
0
            return(0);
1063
0
        case XML_WITH_ZLIB:
1064
0
#ifdef LIBXML_ZLIB_ENABLED
1065
0
            return(1);
1066
#else
1067
            return(0);
1068
#endif
1069
0
        case XML_WITH_LZMA:
1070
0
#ifdef LIBXML_LZMA_ENABLED
1071
0
            return(1);
1072
#else
1073
            return(0);
1074
#endif
1075
0
        case XML_WITH_ICU:
1076
#ifdef LIBXML_ICU_ENABLED
1077
            return(1);
1078
#else
1079
0
            return(0);
1080
0
#endif
1081
0
        default:
1082
0
      break;
1083
0
     }
1084
0
     return(0);
1085
0
}
1086
1087
/************************************************************************
1088
 *                  *
1089
 *    SAX2 defaulted attributes handling      *
1090
 *                  *
1091
 ************************************************************************/
1092
1093
/**
1094
 * xmlDetectSAX2:
1095
 * @ctxt:  an XML parser context
1096
 *
1097
 * Do the SAX2 detection and specific initialization
1098
 */
1099
static void
1100
6.83M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1101
6.83M
    xmlSAXHandlerPtr sax;
1102
1103
    /* Avoid unused variable warning if features are disabled. */
1104
6.83M
    (void) sax;
1105
1106
6.83M
    if (ctxt == NULL) return;
1107
6.83M
    sax = ctxt->sax;
1108
6.83M
#ifdef LIBXML_SAX1_ENABLED
1109
6.83M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1110
6.83M
        ((sax->startElementNs != NULL) ||
1111
1.40M
         (sax->endElementNs != NULL) ||
1112
1.40M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1113
1.40M
        ctxt->sax2 = 1;
1114
#else
1115
    ctxt->sax2 = 1;
1116
#endif /* LIBXML_SAX1_ENABLED */
1117
1118
6.83M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1119
6.83M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1120
6.83M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1121
6.83M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1122
6.83M
    (ctxt->str_xml_ns == NULL)) {
1123
0
        xmlErrMemory(ctxt, NULL);
1124
0
    }
1125
6.83M
}
1126
1127
typedef struct _xmlDefAttrs xmlDefAttrs;
1128
typedef xmlDefAttrs *xmlDefAttrsPtr;
1129
struct _xmlDefAttrs {
1130
    int nbAttrs;  /* number of defaulted attributes on that element */
1131
    int maxAttrs;       /* the size of the array */
1132
#if __STDC_VERSION__ >= 199901L
1133
    /* Using a C99 flexible array member avoids UBSan errors. */
1134
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1135
#else
1136
    const xmlChar *values[5];
1137
#endif
1138
};
1139
1140
/**
1141
 * xmlAttrNormalizeSpace:
1142
 * @src: the source string
1143
 * @dst: the target string
1144
 *
1145
 * Normalize the space in non CDATA attribute values:
1146
 * If the attribute type is not CDATA, then the XML processor MUST further
1147
 * process the normalized attribute value by discarding any leading and
1148
 * trailing space (#x20) characters, and by replacing sequences of space
1149
 * (#x20) characters by a single space (#x20) character.
1150
 * Note that the size of dst need to be at least src, and if one doesn't need
1151
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1152
 * passing src as dst is just fine.
1153
 *
1154
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1155
 *         is needed.
1156
 */
1157
static xmlChar *
1158
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1159
103k
{
1160
103k
    if ((src == NULL) || (dst == NULL))
1161
0
        return(NULL);
1162
1163
158k
    while (*src == 0x20) src++;
1164
4.50M
    while (*src != 0) {
1165
4.40M
  if (*src == 0x20) {
1166
452k
      while (*src == 0x20) src++;
1167
82.2k
      if (*src != 0)
1168
68.6k
    *dst++ = 0x20;
1169
4.32M
  } else {
1170
4.32M
      *dst++ = *src++;
1171
4.32M
  }
1172
4.40M
    }
1173
103k
    *dst = 0;
1174
103k
    if (dst == src)
1175
79.7k
       return(NULL);
1176
24.0k
    return(dst);
1177
103k
}
1178
1179
/**
1180
 * xmlAttrNormalizeSpace2:
1181
 * @src: the source string
1182
 *
1183
 * Normalize the space in non CDATA attribute values, a slightly more complex
1184
 * front end to avoid allocation problems when running on attribute values
1185
 * coming from the input.
1186
 *
1187
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1188
 *         is needed.
1189
 */
1190
static const xmlChar *
1191
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1192
59.5k
{
1193
59.5k
    int i;
1194
59.5k
    int remove_head = 0;
1195
59.5k
    int need_realloc = 0;
1196
59.5k
    const xmlChar *cur;
1197
1198
59.5k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1199
0
        return(NULL);
1200
59.5k
    i = *len;
1201
59.5k
    if (i <= 0)
1202
1.86k
        return(NULL);
1203
1204
57.6k
    cur = src;
1205
73.2k
    while (*cur == 0x20) {
1206
15.6k
        cur++;
1207
15.6k
  remove_head++;
1208
15.6k
    }
1209
2.31M
    while (*cur != 0) {
1210
2.26M
  if (*cur == 0x20) {
1211
59.8k
      cur++;
1212
59.8k
      if ((*cur == 0x20) || (*cur == 0)) {
1213
10.8k
          need_realloc = 1;
1214
10.8k
    break;
1215
10.8k
      }
1216
59.8k
  } else
1217
2.20M
      cur++;
1218
2.26M
    }
1219
57.6k
    if (need_realloc) {
1220
10.8k
        xmlChar *ret;
1221
1222
10.8k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1223
10.8k
  if (ret == NULL) {
1224
0
      xmlErrMemory(ctxt, NULL);
1225
0
      return(NULL);
1226
0
  }
1227
10.8k
  xmlAttrNormalizeSpace(ret, ret);
1228
10.8k
  *len = strlen((const char *)ret);
1229
10.8k
        return(ret);
1230
46.7k
    } else if (remove_head) {
1231
1.41k
        *len -= remove_head;
1232
1.41k
        memmove(src, src + remove_head, 1 + *len);
1233
1.41k
  return(src);
1234
1.41k
    }
1235
45.3k
    return(NULL);
1236
57.6k
}
1237
1238
/**
1239
 * xmlAddDefAttrs:
1240
 * @ctxt:  an XML parser context
1241
 * @fullname:  the element fullname
1242
 * @fullattr:  the attribute fullname
1243
 * @value:  the attribute value
1244
 *
1245
 * Add a defaulted attribute for an element
1246
 */
1247
static void
1248
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1249
               const xmlChar *fullname,
1250
               const xmlChar *fullattr,
1251
145k
               const xmlChar *value) {
1252
145k
    xmlDefAttrsPtr defaults;
1253
145k
    int len;
1254
145k
    const xmlChar *name;
1255
145k
    const xmlChar *prefix;
1256
1257
    /*
1258
     * Allows to detect attribute redefinitions
1259
     */
1260
145k
    if (ctxt->attsSpecial != NULL) {
1261
110k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1262
4.10k
      return;
1263
110k
    }
1264
1265
141k
    if (ctxt->attsDefault == NULL) {
1266
40.2k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1267
40.2k
  if (ctxt->attsDefault == NULL)
1268
0
      goto mem_error;
1269
40.2k
    }
1270
1271
    /*
1272
     * split the element name into prefix:localname , the string found
1273
     * are within the DTD and then not associated to namespace names.
1274
     */
1275
141k
    name = xmlSplitQName3(fullname, &len);
1276
141k
    if (name == NULL) {
1277
120k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1278
120k
  prefix = NULL;
1279
120k
    } else {
1280
21.3k
        name = xmlDictLookup(ctxt->dict, name, -1);
1281
21.3k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1282
21.3k
    }
1283
1284
    /*
1285
     * make sure there is some storage
1286
     */
1287
141k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1288
141k
    if (defaults == NULL) {
1289
79.8k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1290
79.8k
                     (4 * 5) * sizeof(const xmlChar *));
1291
79.8k
  if (defaults == NULL)
1292
0
      goto mem_error;
1293
79.8k
  defaults->nbAttrs = 0;
1294
79.8k
  defaults->maxAttrs = 4;
1295
79.8k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1296
79.8k
                          defaults, NULL) < 0) {
1297
0
      xmlFree(defaults);
1298
0
      goto mem_error;
1299
0
  }
1300
79.8k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1301
2.48k
        xmlDefAttrsPtr temp;
1302
1303
2.48k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1304
2.48k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1305
2.48k
  if (temp == NULL)
1306
0
      goto mem_error;
1307
2.48k
  defaults = temp;
1308
2.48k
  defaults->maxAttrs *= 2;
1309
2.48k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1310
2.48k
                          defaults, NULL) < 0) {
1311
0
      xmlFree(defaults);
1312
0
      goto mem_error;
1313
0
  }
1314
2.48k
    }
1315
1316
    /*
1317
     * Split the element name into prefix:localname , the string found
1318
     * are within the DTD and hen not associated to namespace names.
1319
     */
1320
141k
    name = xmlSplitQName3(fullattr, &len);
1321
141k
    if (name == NULL) {
1322
94.2k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1323
94.2k
  prefix = NULL;
1324
94.2k
    } else {
1325
47.1k
        name = xmlDictLookup(ctxt->dict, name, -1);
1326
47.1k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1327
47.1k
    }
1328
1329
141k
    defaults->values[5 * defaults->nbAttrs] = name;
1330
141k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1331
    /* intern the string and precompute the end */
1332
141k
    len = xmlStrlen(value);
1333
141k
    value = xmlDictLookup(ctxt->dict, value, len);
1334
141k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1335
141k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1336
141k
    if (ctxt->external)
1337
75.3k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1338
66.1k
    else
1339
66.1k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1340
141k
    defaults->nbAttrs++;
1341
1342
141k
    return;
1343
1344
0
mem_error:
1345
0
    xmlErrMemory(ctxt, NULL);
1346
0
    return;
1347
141k
}
1348
1349
/**
1350
 * xmlAddSpecialAttr:
1351
 * @ctxt:  an XML parser context
1352
 * @fullname:  the element fullname
1353
 * @fullattr:  the attribute fullname
1354
 * @type:  the attribute type
1355
 *
1356
 * Register this attribute type
1357
 */
1358
static void
1359
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1360
      const xmlChar *fullname,
1361
      const xmlChar *fullattr,
1362
      int type)
1363
1.55M
{
1364
1.55M
    if (ctxt->attsSpecial == NULL) {
1365
86.1k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1366
86.1k
  if (ctxt->attsSpecial == NULL)
1367
0
      goto mem_error;
1368
86.1k
    }
1369
1370
1.55M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1371
50.3k
        return;
1372
1373
1.50M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1374
1.50M
                     (void *) (ptrdiff_t) type);
1375
1.50M
    return;
1376
1377
0
mem_error:
1378
0
    xmlErrMemory(ctxt, NULL);
1379
0
    return;
1380
1.55M
}
1381
1382
/**
1383
 * xmlCleanSpecialAttrCallback:
1384
 *
1385
 * Removes CDATA attributes from the special attribute table
1386
 */
1387
static void
1388
xmlCleanSpecialAttrCallback(void *payload, void *data,
1389
                            const xmlChar *fullname, const xmlChar *fullattr,
1390
1.50M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1391
1.50M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1392
1393
1.50M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1394
530k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1395
530k
    }
1396
1.50M
}
1397
1398
/**
1399
 * xmlCleanSpecialAttr:
1400
 * @ctxt:  an XML parser context
1401
 *
1402
 * Trim the list of attributes defined to remove all those of type
1403
 * CDATA as they are not special. This call should be done when finishing
1404
 * to parse the DTD and before starting to parse the document root.
1405
 */
1406
static void
1407
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1408
360k
{
1409
360k
    if (ctxt->attsSpecial == NULL)
1410
276k
        return;
1411
1412
84.5k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1413
1414
84.5k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1415
13.1k
        xmlHashFree(ctxt->attsSpecial, NULL);
1416
13.1k
        ctxt->attsSpecial = NULL;
1417
13.1k
    }
1418
84.5k
    return;
1419
360k
}
1420
1421
/**
1422
 * xmlCheckLanguageID:
1423
 * @lang:  pointer to the string value
1424
 *
1425
 * Checks that the value conforms to the LanguageID production:
1426
 *
1427
 * NOTE: this is somewhat deprecated, those productions were removed from
1428
 *       the XML Second edition.
1429
 *
1430
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1431
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1432
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1433
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1434
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1435
 * [38] Subcode ::= ([a-z] | [A-Z])+
1436
 *
1437
 * The current REC reference the successors of RFC 1766, currently 5646
1438
 *
1439
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1440
 * langtag       = language
1441
 *                 ["-" script]
1442
 *                 ["-" region]
1443
 *                 *("-" variant)
1444
 *                 *("-" extension)
1445
 *                 ["-" privateuse]
1446
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1447
 *                 ["-" extlang]       ; sometimes followed by
1448
 *                                     ; extended language subtags
1449
 *               / 4ALPHA              ; or reserved for future use
1450
 *               / 5*8ALPHA            ; or registered language subtag
1451
 *
1452
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1453
 *                 *2("-" 3ALPHA)      ; permanently reserved
1454
 *
1455
 * script        = 4ALPHA              ; ISO 15924 code
1456
 *
1457
 * region        = 2ALPHA              ; ISO 3166-1 code
1458
 *               / 3DIGIT              ; UN M.49 code
1459
 *
1460
 * variant       = 5*8alphanum         ; registered variants
1461
 *               / (DIGIT 3alphanum)
1462
 *
1463
 * extension     = singleton 1*("-" (2*8alphanum))
1464
 *
1465
 *                                     ; Single alphanumerics
1466
 *                                     ; "x" reserved for private use
1467
 * singleton     = DIGIT               ; 0 - 9
1468
 *               / %x41-57             ; A - W
1469
 *               / %x59-5A             ; Y - Z
1470
 *               / %x61-77             ; a - w
1471
 *               / %x79-7A             ; y - z
1472
 *
1473
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1474
 * The parser below doesn't try to cope with extension or privateuse
1475
 * that could be added but that's not interoperable anyway
1476
 *
1477
 * Returns 1 if correct 0 otherwise
1478
 **/
1479
int
1480
xmlCheckLanguageID(const xmlChar * lang)
1481
37.5k
{
1482
37.5k
    const xmlChar *cur = lang, *nxt;
1483
1484
37.5k
    if (cur == NULL)
1485
887
        return (0);
1486
36.6k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1487
36.6k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1488
36.6k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1489
36.6k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1490
        /*
1491
         * Still allow IANA code and user code which were coming
1492
         * from the previous version of the XML-1.0 specification
1493
         * it's deprecated but we should not fail
1494
         */
1495
1.71k
        cur += 2;
1496
21.5k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1497
21.5k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1498
19.8k
            cur++;
1499
1.71k
        return(cur[0] == 0);
1500
1.71k
    }
1501
34.9k
    nxt = cur;
1502
131k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1503
131k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1504
96.2k
           nxt++;
1505
34.9k
    if (nxt - cur >= 4) {
1506
        /*
1507
         * Reserved
1508
         */
1509
1.57k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1510
678
            return(0);
1511
892
        return(1);
1512
1.57k
    }
1513
33.3k
    if (nxt - cur < 2)
1514
1.29k
        return(0);
1515
    /* we got an ISO 639 code */
1516
32.0k
    if (nxt[0] == 0)
1517
21.2k
        return(1);
1518
10.8k
    if (nxt[0] != '-')
1519
1.25k
        return(0);
1520
1521
9.55k
    nxt++;
1522
9.55k
    cur = nxt;
1523
    /* now we can have extlang or script or region or variant */
1524
9.55k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1525
1.45k
        goto region_m49;
1526
1527
46.9k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1528
46.9k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1529
38.8k
           nxt++;
1530
8.10k
    if (nxt - cur == 4)
1531
3.00k
        goto script;
1532
5.09k
    if (nxt - cur == 2)
1533
1.40k
        goto region;
1534
3.68k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535
563
        goto variant;
1536
3.12k
    if (nxt - cur != 3)
1537
973
        return(0);
1538
    /* we parsed an extlang */
1539
2.15k
    if (nxt[0] == 0)
1540
10
        return(1);
1541
2.14k
    if (nxt[0] != '-')
1542
617
        return(0);
1543
1544
1.52k
    nxt++;
1545
1.52k
    cur = nxt;
1546
    /* now we can have script or region or variant */
1547
1.52k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1548
252
        goto region_m49;
1549
1550
10.9k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1551
10.9k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1552
9.63k
           nxt++;
1553
1.27k
    if (nxt - cur == 2)
1554
235
        goto region;
1555
1.03k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1556
370
        goto variant;
1557
668
    if (nxt - cur != 4)
1558
497
        return(0);
1559
    /* we parsed a script */
1560
3.17k
script:
1561
3.17k
    if (nxt[0] == 0)
1562
191
        return(1);
1563
2.98k
    if (nxt[0] != '-')
1564
1.29k
        return(0);
1565
1566
1.69k
    nxt++;
1567
1.69k
    cur = nxt;
1568
    /* now we can have region or variant */
1569
1.69k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1570
306
        goto region_m49;
1571
1572
13.6k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1573
13.6k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1574
12.2k
           nxt++;
1575
1576
1.39k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1577
384
        goto variant;
1578
1.00k
    if (nxt - cur != 2)
1579
611
        return(0);
1580
    /* we parsed a region */
1581
2.91k
region:
1582
2.91k
    if (nxt[0] == 0)
1583
778
        return(1);
1584
2.13k
    if (nxt[0] != '-')
1585
1.23k
        return(0);
1586
1587
898
    nxt++;
1588
898
    cur = nxt;
1589
    /* now we can just have a variant */
1590
11.0k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1591
11.0k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1592
10.1k
           nxt++;
1593
1594
898
    if ((nxt - cur < 5) || (nxt - cur > 8))
1595
612
        return(0);
1596
1597
    /* we parsed a variant */
1598
1.60k
variant:
1599
1.60k
    if (nxt[0] == 0)
1600
355
        return(1);
1601
1.24k
    if (nxt[0] != '-')
1602
1.17k
        return(0);
1603
    /* extensions and private use subtags not checked */
1604
75
    return (1);
1605
1606
2.01k
region_m49:
1607
2.01k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1608
2.01k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1609
875
        nxt += 3;
1610
875
        goto region;
1611
875
    }
1612
1.13k
    return(0);
1613
2.01k
}
1614
1615
/************************************************************************
1616
 *                  *
1617
 *    Parser stacks related functions and macros    *
1618
 *                  *
1619
 ************************************************************************/
1620
1621
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1622
                                            const xmlChar ** str);
1623
1624
#ifdef SAX2
1625
/**
1626
 * nsPush:
1627
 * @ctxt:  an XML parser context
1628
 * @prefix:  the namespace prefix or NULL
1629
 * @URL:  the namespace name
1630
 *
1631
 * Pushes a new parser namespace on top of the ns stack
1632
 *
1633
 * Returns -1 in case of error, -2 if the namespace should be discarded
1634
 *     and the index in the stack otherwise.
1635
 */
1636
static int
1637
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1638
1.58M
{
1639
1.58M
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1640
283k
        int i;
1641
407k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1642
174k
      if (ctxt->nsTab[i] == prefix) {
1643
    /* in scope */
1644
50.5k
          if (ctxt->nsTab[i + 1] == URL)
1645
22.9k
        return(-2);
1646
    /* out of scope keep it */
1647
27.5k
    break;
1648
50.5k
      }
1649
174k
  }
1650
283k
    }
1651
1.55M
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1652
219k
  ctxt->nsMax = 10;
1653
219k
  ctxt->nsNr = 0;
1654
219k
  ctxt->nsTab = (const xmlChar **)
1655
219k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1656
219k
  if (ctxt->nsTab == NULL) {
1657
0
      xmlErrMemory(ctxt, NULL);
1658
0
      ctxt->nsMax = 0;
1659
0
            return (-1);
1660
0
  }
1661
1.33M
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1662
33.3k
        const xmlChar ** tmp;
1663
33.3k
        ctxt->nsMax *= 2;
1664
33.3k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1665
33.3k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1666
33.3k
        if (tmp == NULL) {
1667
0
            xmlErrMemory(ctxt, NULL);
1668
0
      ctxt->nsMax /= 2;
1669
0
            return (-1);
1670
0
        }
1671
33.3k
  ctxt->nsTab = tmp;
1672
33.3k
    }
1673
1.55M
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1674
1.55M
    ctxt->nsTab[ctxt->nsNr++] = URL;
1675
1.55M
    return (ctxt->nsNr);
1676
1.55M
}
1677
/**
1678
 * nsPop:
1679
 * @ctxt: an XML parser context
1680
 * @nr:  the number to pop
1681
 *
1682
 * Pops the top @nr parser prefix/namespace from the ns stack
1683
 *
1684
 * Returns the number of namespaces removed
1685
 */
1686
static int
1687
nsPop(xmlParserCtxtPtr ctxt, int nr)
1688
288k
{
1689
288k
    int i;
1690
1691
288k
    if (ctxt->nsTab == NULL) return(0);
1692
288k
    if (ctxt->nsNr < nr) {
1693
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1694
0
        nr = ctxt->nsNr;
1695
0
    }
1696
288k
    if (ctxt->nsNr <= 0)
1697
0
        return (0);
1698
1699
902k
    for (i = 0;i < nr;i++) {
1700
613k
         ctxt->nsNr--;
1701
613k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1702
613k
    }
1703
288k
    return(nr);
1704
288k
}
1705
#endif
1706
1707
static int
1708
269k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1709
269k
    const xmlChar **atts;
1710
269k
    int *attallocs;
1711
269k
    int maxatts;
1712
1713
269k
    if (ctxt->atts == NULL) {
1714
269k
  maxatts = 55; /* allow for 10 attrs by default */
1715
269k
  atts = (const xmlChar **)
1716
269k
         xmlMalloc(maxatts * sizeof(xmlChar *));
1717
269k
  if (atts == NULL) goto mem_error;
1718
269k
  ctxt->atts = atts;
1719
269k
  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1720
269k
  if (attallocs == NULL) goto mem_error;
1721
269k
  ctxt->attallocs = attallocs;
1722
269k
  ctxt->maxatts = maxatts;
1723
269k
    } else if (nr + 5 > ctxt->maxatts) {
1724
449
  maxatts = (nr + 5) * 2;
1725
449
  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1726
449
             maxatts * sizeof(const xmlChar *));
1727
449
  if (atts == NULL) goto mem_error;
1728
449
  ctxt->atts = atts;
1729
449
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1730
449
                               (maxatts / 5) * sizeof(int));
1731
449
  if (attallocs == NULL) goto mem_error;
1732
449
  ctxt->attallocs = attallocs;
1733
449
  ctxt->maxatts = maxatts;
1734
449
    }
1735
269k
    return(ctxt->maxatts);
1736
0
mem_error:
1737
0
    xmlErrMemory(ctxt, NULL);
1738
0
    return(-1);
1739
269k
}
1740
1741
/**
1742
 * inputPush:
1743
 * @ctxt:  an XML parser context
1744
 * @value:  the parser input
1745
 *
1746
 * Pushes a new parser input on top of the input stack
1747
 *
1748
 * Returns -1 in case of error, the index in the stack otherwise
1749
 */
1750
int
1751
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1752
7.51M
{
1753
7.51M
    if ((ctxt == NULL) || (value == NULL))
1754
0
        return(-1);
1755
7.51M
    if (ctxt->inputNr >= ctxt->inputMax) {
1756
2.52k
        ctxt->inputMax *= 2;
1757
2.52k
        ctxt->inputTab =
1758
2.52k
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1759
2.52k
                                             ctxt->inputMax *
1760
2.52k
                                             sizeof(ctxt->inputTab[0]));
1761
2.52k
        if (ctxt->inputTab == NULL) {
1762
0
            xmlErrMemory(ctxt, NULL);
1763
0
      ctxt->inputMax /= 2;
1764
0
            return (-1);
1765
0
        }
1766
2.52k
    }
1767
7.51M
    ctxt->inputTab[ctxt->inputNr] = value;
1768
7.51M
    ctxt->input = value;
1769
7.51M
    return (ctxt->inputNr++);
1770
7.51M
}
1771
/**
1772
 * inputPop:
1773
 * @ctxt: an XML parser context
1774
 *
1775
 * Pops the top parser input from the input stack
1776
 *
1777
 * Returns the input just removed
1778
 */
1779
xmlParserInputPtr
1780
inputPop(xmlParserCtxtPtr ctxt)
1781
20.5M
{
1782
20.5M
    xmlParserInputPtr ret;
1783
1784
20.5M
    if (ctxt == NULL)
1785
0
        return(NULL);
1786
20.5M
    if (ctxt->inputNr <= 0)
1787
13.0M
        return (NULL);
1788
7.44M
    ctxt->inputNr--;
1789
7.44M
    if (ctxt->inputNr > 0)
1790
1.11M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1791
6.32M
    else
1792
6.32M
        ctxt->input = NULL;
1793
7.44M
    ret = ctxt->inputTab[ctxt->inputNr];
1794
7.44M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1795
7.44M
    return (ret);
1796
20.5M
}
1797
/**
1798
 * nodePush:
1799
 * @ctxt:  an XML parser context
1800
 * @value:  the element node
1801
 *
1802
 * Pushes a new element node on top of the node stack
1803
 *
1804
 * Returns -1 in case of error, the index in the stack otherwise
1805
 */
1806
int
1807
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1808
30.9M
{
1809
30.9M
    if (ctxt == NULL) return(0);
1810
30.9M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1811
78.3k
        xmlNodePtr *tmp;
1812
1813
78.3k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1814
78.3k
                                      ctxt->nodeMax * 2 *
1815
78.3k
                                      sizeof(ctxt->nodeTab[0]));
1816
78.3k
        if (tmp == NULL) {
1817
0
            xmlErrMemory(ctxt, NULL);
1818
0
            return (-1);
1819
0
        }
1820
78.3k
        ctxt->nodeTab = tmp;
1821
78.3k
  ctxt->nodeMax *= 2;
1822
78.3k
    }
1823
30.9M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1824
30.9M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1825
15
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1826
15
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1827
15
        xmlParserMaxDepth);
1828
15
  xmlHaltParser(ctxt);
1829
15
  return(-1);
1830
15
    }
1831
30.9M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1832
30.9M
    ctxt->node = value;
1833
30.9M
    return (ctxt->nodeNr++);
1834
30.9M
}
1835
1836
/**
1837
 * nodePop:
1838
 * @ctxt: an XML parser context
1839
 *
1840
 * Pops the top element node from the node stack
1841
 *
1842
 * Returns the node just removed
1843
 */
1844
xmlNodePtr
1845
nodePop(xmlParserCtxtPtr ctxt)
1846
30.9M
{
1847
30.9M
    xmlNodePtr ret;
1848
1849
30.9M
    if (ctxt == NULL) return(NULL);
1850
30.9M
    if (ctxt->nodeNr <= 0)
1851
7.12M
        return (NULL);
1852
23.8M
    ctxt->nodeNr--;
1853
23.8M
    if (ctxt->nodeNr > 0)
1854
21.2M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1855
2.53M
    else
1856
2.53M
        ctxt->node = NULL;
1857
23.8M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1858
23.8M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1859
23.8M
    return (ret);
1860
30.9M
}
1861
1862
/**
1863
 * nameNsPush:
1864
 * @ctxt:  an XML parser context
1865
 * @value:  the element name
1866
 * @prefix:  the element prefix
1867
 * @URI:  the element namespace name
1868
 * @line:  the current line number for error messages
1869
 * @nsNr:  the number of namespaces pushed on the namespace table
1870
 *
1871
 * Pushes a new element name/prefix/URL on top of the name stack
1872
 *
1873
 * Returns -1 in case of error, the index in the stack otherwise
1874
 */
1875
static int
1876
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1877
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1878
123M
{
1879
123M
    xmlStartTag *tag;
1880
1881
123M
    if (ctxt->nameNr >= ctxt->nameMax) {
1882
1.00M
        const xmlChar * *tmp;
1883
1.00M
        xmlStartTag *tmp2;
1884
1.00M
        ctxt->nameMax *= 2;
1885
1.00M
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1886
1.00M
                                    ctxt->nameMax *
1887
1.00M
                                    sizeof(ctxt->nameTab[0]));
1888
1.00M
        if (tmp == NULL) {
1889
0
      ctxt->nameMax /= 2;
1890
0
      goto mem_error;
1891
0
        }
1892
1.00M
  ctxt->nameTab = tmp;
1893
1.00M
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1894
1.00M
                                    ctxt->nameMax *
1895
1.00M
                                    sizeof(ctxt->pushTab[0]));
1896
1.00M
        if (tmp2 == NULL) {
1897
0
      ctxt->nameMax /= 2;
1898
0
      goto mem_error;
1899
0
        }
1900
1.00M
  ctxt->pushTab = tmp2;
1901
122M
    } else if (ctxt->pushTab == NULL) {
1902
5.84M
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1903
5.84M
                                            sizeof(ctxt->pushTab[0]));
1904
5.84M
        if (ctxt->pushTab == NULL)
1905
0
            goto mem_error;
1906
5.84M
    }
1907
123M
    ctxt->nameTab[ctxt->nameNr] = value;
1908
123M
    ctxt->name = value;
1909
123M
    tag = &ctxt->pushTab[ctxt->nameNr];
1910
123M
    tag->prefix = prefix;
1911
123M
    tag->URI = URI;
1912
123M
    tag->line = line;
1913
123M
    tag->nsNr = nsNr;
1914
123M
    return (ctxt->nameNr++);
1915
0
mem_error:
1916
0
    xmlErrMemory(ctxt, NULL);
1917
0
    return (-1);
1918
123M
}
1919
#ifdef LIBXML_PUSH_ENABLED
1920
/**
1921
 * nameNsPop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element/prefix/URI name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
static const xmlChar *
1929
nameNsPop(xmlParserCtxtPtr ctxt)
1930
1.09M
{
1931
1.09M
    const xmlChar *ret;
1932
1933
1.09M
    if (ctxt->nameNr <= 0)
1934
0
        return (NULL);
1935
1.09M
    ctxt->nameNr--;
1936
1.09M
    if (ctxt->nameNr > 0)
1937
1.06M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
36.7k
    else
1939
36.7k
        ctxt->name = NULL;
1940
1.09M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
1.09M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
1.09M
    return (ret);
1943
1.09M
}
1944
#endif /* LIBXML_PUSH_ENABLED */
1945
1946
/**
1947
 * namePush:
1948
 * @ctxt:  an XML parser context
1949
 * @value:  the element name
1950
 *
1951
 * Pushes a new element name on top of the name stack
1952
 *
1953
 * Returns -1 in case of error, the index in the stack otherwise
1954
 */
1955
int
1956
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1957
0
{
1958
0
    if (ctxt == NULL) return (-1);
1959
1960
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1961
0
        const xmlChar * *tmp;
1962
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1963
0
                                    ctxt->nameMax * 2 *
1964
0
                                    sizeof(ctxt->nameTab[0]));
1965
0
        if (tmp == NULL) {
1966
0
      goto mem_error;
1967
0
        }
1968
0
  ctxt->nameTab = tmp;
1969
0
        ctxt->nameMax *= 2;
1970
0
    }
1971
0
    ctxt->nameTab[ctxt->nameNr] = value;
1972
0
    ctxt->name = value;
1973
0
    return (ctxt->nameNr++);
1974
0
mem_error:
1975
0
    xmlErrMemory(ctxt, NULL);
1976
0
    return (-1);
1977
0
}
1978
/**
1979
 * namePop:
1980
 * @ctxt: an XML parser context
1981
 *
1982
 * Pops the top element name from the name stack
1983
 *
1984
 * Returns the name just removed
1985
 */
1986
const xmlChar *
1987
namePop(xmlParserCtxtPtr ctxt)
1988
103M
{
1989
103M
    const xmlChar *ret;
1990
1991
103M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1992
0
        return (NULL);
1993
103M
    ctxt->nameNr--;
1994
103M
    if (ctxt->nameNr > 0)
1995
94.4M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1996
8.76M
    else
1997
8.76M
        ctxt->name = NULL;
1998
103M
    ret = ctxt->nameTab[ctxt->nameNr];
1999
103M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2000
103M
    return (ret);
2001
103M
}
2002
2003
160M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2004
160M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2005
1.02M
        int *tmp;
2006
2007
1.02M
  ctxt->spaceMax *= 2;
2008
1.02M
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2009
1.02M
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2010
1.02M
        if (tmp == NULL) {
2011
0
      xmlErrMemory(ctxt, NULL);
2012
0
      ctxt->spaceMax /=2;
2013
0
      return(-1);
2014
0
  }
2015
1.02M
  ctxt->spaceTab = tmp;
2016
1.02M
    }
2017
160M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2018
160M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2019
160M
    return(ctxt->spaceNr++);
2020
160M
}
2021
2022
142M
static int spacePop(xmlParserCtxtPtr ctxt) {
2023
142M
    int ret;
2024
142M
    if (ctxt->spaceNr <= 0) return(0);
2025
142M
    ctxt->spaceNr--;
2026
142M
    if (ctxt->spaceNr > 0)
2027
142M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2028
115k
    else
2029
115k
        ctxt->space = &ctxt->spaceTab[0];
2030
142M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2031
142M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2032
142M
    return(ret);
2033
142M
}
2034
2035
/*
2036
 * Macros for accessing the content. Those should be used only by the parser,
2037
 * and not exported.
2038
 *
2039
 * Dirty macros, i.e. one often need to make assumption on the context to
2040
 * use them
2041
 *
2042
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2043
 *           To be used with extreme caution since operations consuming
2044
 *           characters may move the input buffer to a different location !
2045
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2046
 *           This should be used internally by the parser
2047
 *           only to compare to ASCII values otherwise it would break when
2048
 *           running with UTF-8 encoding.
2049
 *   RAW     same as CUR but in the input buffer, bypass any token
2050
 *           extraction that may have been done
2051
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2052
 *           to compare on ASCII based substring.
2053
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2054
 *           strings without newlines within the parser.
2055
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2056
 *           defined char within the parser.
2057
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2058
 *
2059
 *   NEXT    Skip to the next character, this does the proper decoding
2060
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2061
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2062
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2063
 *           to the number of xmlChars used for the encoding [0-5].
2064
 *   CUR_SCHAR  same but operate on a string instead of the context
2065
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2066
 *            the index
2067
 *   GROW, SHRINK  handling of input buffers
2068
 */
2069
2070
2.08G
#define RAW (*ctxt->input->cur)
2071
161M
#define CUR (*ctxt->input->cur)
2072
1.35G
#define NXT(val) ctxt->input->cur[(val)]
2073
186M
#define CUR_PTR ctxt->input->cur
2074
3.18M
#define BASE_PTR ctxt->input->base
2075
2076
#define CMP4( s, c1, c2, c3, c4 ) \
2077
1.15G
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2078
578M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2079
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2080
1.14G
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2081
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2082
1.12G
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2083
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2084
1.12G
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2085
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2086
1.11G
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2087
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2088
554M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2089
554M
    ((unsigned char *) s)[ 8 ] == c9 )
2090
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2091
257k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2092
257k
    ((unsigned char *) s)[ 9 ] == c10 )
2093
2094
149M
#define SKIP(val) do {             \
2095
149M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2096
149M
    if (*ctxt->input->cur == 0)           \
2097
149M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2098
149M
  } while (0)
2099
2100
985k
#define SKIPL(val) do {             \
2101
985k
    int skipl;                \
2102
21.3M
    for(skipl=0; skipl<val; skipl++) {         \
2103
20.3M
  if (*(ctxt->input->cur) == '\n') {       \
2104
345k
  ctxt->input->line++; ctxt->input->col = 1;      \
2105
19.9M
  } else ctxt->input->col++;         \
2106
20.3M
  ctxt->input->cur++;           \
2107
20.3M
    }                  \
2108
985k
    if (*ctxt->input->cur == 0)           \
2109
985k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2110
985k
  } while (0)
2111
2112
1.24G
#define SHRINK if ((ctxt->progressive == 0) &&       \
2113
1.24G
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2114
1.24G
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2115
1.24G
  xmlSHRINK (ctxt);
2116
2117
2.40M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2118
2.40M
    xmlParserInputShrink(ctxt->input);
2119
2.40M
    if (*ctxt->input->cur == 0)
2120
237k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2121
2.40M
}
2122
2123
2.97G
#define GROW if ((ctxt->progressive == 0) &&       \
2124
2.97G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2125
2.97G
  xmlGROW (ctxt);
2126
2127
174M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2128
174M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2129
174M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2130
2131
174M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2132
174M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2133
174M
         ((ctxt->input->buf) &&
2134
0
          (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2135
174M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2136
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2137
0
        xmlHaltParser(ctxt);
2138
0
  return;
2139
0
    }
2140
174M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2141
174M
    if ((ctxt->input->cur > ctxt->input->end) ||
2142
174M
        (ctxt->input->cur < ctxt->input->base)) {
2143
0
        xmlHaltParser(ctxt);
2144
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2145
0
  return;
2146
0
    }
2147
174M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2148
6.17M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2149
174M
}
2150
2151
479M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2152
2153
405M
#define NEXT xmlNextChar(ctxt)
2154
2155
262M
#define NEXT1 {               \
2156
262M
  ctxt->input->col++;           \
2157
262M
  ctxt->input->cur++;           \
2158
262M
  if (*ctxt->input->cur == 0)         \
2159
262M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2160
262M
    }
2161
2162
5.03G
#define NEXTL(l) do {             \
2163
5.03G
    if (*(ctxt->input->cur) == '\n') {         \
2164
114M
  ctxt->input->line++; ctxt->input->col = 1;      \
2165
4.91G
    } else ctxt->input->col++;           \
2166
5.03G
    ctxt->input->cur += l;        \
2167
5.03G
  } while (0)
2168
2169
5.18G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2170
731M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2171
2172
#define COPY_BUF(l,b,i,v)           \
2173
5.34G
    if (l == 1) b[i++] = v;           \
2174
5.34G
    else i += xmlCopyCharMultiByte(&b[i],v)
2175
2176
#define CUR_CONSUMED \
2177
1.33G
    (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2178
2179
/**
2180
 * xmlSkipBlankChars:
2181
 * @ctxt:  the XML parser context
2182
 *
2183
 * skip all blanks character found at that point in the input streams.
2184
 * It pops up finished entities in the process if allowable at that point.
2185
 *
2186
 * Returns the number of space chars skipped
2187
 */
2188
2189
int
2190
479M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2191
479M
    int res = 0;
2192
2193
    /*
2194
     * It's Okay to use CUR/NEXT here since all the blanks are on
2195
     * the ASCII range.
2196
     */
2197
479M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2198
479M
        (ctxt->instate == XML_PARSER_START)) {
2199
441M
  const xmlChar *cur;
2200
  /*
2201
   * if we are in the document content, go really fast
2202
   */
2203
441M
  cur = ctxt->input->cur;
2204
441M
  while (IS_BLANK_CH(*cur)) {
2205
130M
      if (*cur == '\n') {
2206
5.01M
    ctxt->input->line++; ctxt->input->col = 1;
2207
125M
      } else {
2208
125M
    ctxt->input->col++;
2209
125M
      }
2210
130M
      cur++;
2211
130M
      if (res < INT_MAX)
2212
130M
    res++;
2213
130M
      if (*cur == 0) {
2214
169k
    ctxt->input->cur = cur;
2215
169k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2216
169k
    cur = ctxt->input->cur;
2217
169k
      }
2218
130M
  }
2219
441M
  ctxt->input->cur = cur;
2220
441M
    } else {
2221
37.7M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2222
2223
153M
  while (1) {
2224
153M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2225
113M
    NEXT;
2226
113M
      } else if (CUR == '%') {
2227
                /*
2228
                 * Need to handle support of entities branching here
2229
                 */
2230
2.02M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2231
745k
                    break;
2232
1.28M
          xmlParsePEReference(ctxt);
2233
38.0M
            } else if (CUR == 0) {
2234
1.04M
                if (ctxt->inputNr <= 1)
2235
39.0k
                    break;
2236
1.00M
                xmlPopInput(ctxt);
2237
37.0M
            } else {
2238
37.0M
                break;
2239
37.0M
            }
2240
2241
            /*
2242
             * Also increase the counter when entering or exiting a PERef.
2243
             * The spec says: "When a parameter-entity reference is recognized
2244
             * in the DTD and included, its replacement text MUST be enlarged
2245
             * by the attachment of one leading and one following space (#x20)
2246
             * character."
2247
             */
2248
116M
      if (res < INT_MAX)
2249
116M
    res++;
2250
116M
        }
2251
37.7M
    }
2252
479M
    return(res);
2253
479M
}
2254
2255
/************************************************************************
2256
 *                  *
2257
 *    Commodity functions to handle entities      *
2258
 *                  *
2259
 ************************************************************************/
2260
2261
/**
2262
 * xmlPopInput:
2263
 * @ctxt:  an XML parser context
2264
 *
2265
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2266
 *          pop it and return the next char.
2267
 *
2268
 * Returns the current xmlChar in the parser context
2269
 */
2270
xmlChar
2271
1.01M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2272
1.01M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2273
1.01M
    if (xmlParserDebugEntities)
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Popping input %d\n", ctxt->inputNr);
2276
1.01M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2277
1.01M
        (ctxt->instate != XML_PARSER_EOF))
2278
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2279
0
                    "Unfinished entity outside the DTD");
2280
1.01M
    xmlFreeInputStream(inputPop(ctxt));
2281
1.01M
    if (*ctxt->input->cur == 0)
2282
422
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2283
1.01M
    return(CUR);
2284
1.01M
}
2285
2286
/**
2287
 * xmlPushInput:
2288
 * @ctxt:  an XML parser context
2289
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2290
 *
2291
 * xmlPushInput: switch to a new input stream which is stacked on top
2292
 *               of the previous one(s).
2293
 * Returns -1 in case of error or the index in the input stack
2294
 */
2295
int
2296
1.17M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2297
1.17M
    int ret;
2298
1.17M
    if (input == NULL) return(-1);
2299
2300
1.17M
    if (xmlParserDebugEntities) {
2301
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2302
0
      xmlGenericError(xmlGenericErrorContext,
2303
0
        "%s(%d): ", ctxt->input->filename,
2304
0
        ctxt->input->line);
2305
0
  xmlGenericError(xmlGenericErrorContext,
2306
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2307
0
    }
2308
1.17M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2309
1.17M
        (ctxt->inputNr > 1024)) {
2310
560
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2311
95.7k
        while (ctxt->inputNr > 1)
2312
95.2k
            xmlFreeInputStream(inputPop(ctxt));
2313
560
  return(-1);
2314
560
    }
2315
1.16M
    ret = inputPush(ctxt, input);
2316
1.16M
    if (ctxt->instate == XML_PARSER_EOF)
2317
0
        return(-1);
2318
1.16M
    GROW;
2319
1.16M
    return(ret);
2320
1.16M
}
2321
2322
/**
2323
 * xmlParseCharRef:
2324
 * @ctxt:  an XML parser context
2325
 *
2326
 * DEPRECATED: Internal function, don't use.
2327
 *
2328
 * parse Reference declarations
2329
 *
2330
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2331
 *                  '&#x' [0-9a-fA-F]+ ';'
2332
 *
2333
 * [ WFC: Legal Character ]
2334
 * Characters referred to using character references must match the
2335
 * production for Char.
2336
 *
2337
 * Returns the value parsed (as an int), 0 in case of error
2338
 */
2339
int
2340
13.3M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2341
13.3M
    int val = 0;
2342
13.3M
    int count = 0;
2343
2344
    /*
2345
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2346
     */
2347
13.3M
    if ((RAW == '&') && (NXT(1) == '#') &&
2348
13.3M
        (NXT(2) == 'x')) {
2349
4.14M
  SKIP(3);
2350
4.14M
  GROW;
2351
14.4M
  while (RAW != ';') { /* loop blocked by count */
2352
10.6M
      if (count++ > 20) {
2353
403k
    count = 0;
2354
403k
    GROW;
2355
403k
                if (ctxt->instate == XML_PARSER_EOF)
2356
0
                    return(0);
2357
403k
      }
2358
10.6M
      if ((RAW >= '0') && (RAW <= '9'))
2359
7.14M
          val = val * 16 + (CUR - '0');
2360
3.54M
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2361
3.12M
          val = val * 16 + (CUR - 'a') + 10;
2362
415k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2363
64.2k
          val = val * 16 + (CUR - 'A') + 10;
2364
351k
      else {
2365
351k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2366
351k
    val = 0;
2367
351k
    break;
2368
351k
      }
2369
10.3M
      if (val > 0x110000)
2370
4.47M
          val = 0x110000;
2371
2372
10.3M
      NEXT;
2373
10.3M
      count++;
2374
10.3M
  }
2375
4.14M
  if (RAW == ';') {
2376
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2377
3.79M
      ctxt->input->col++;
2378
3.79M
      ctxt->input->cur++;
2379
3.79M
  }
2380
9.23M
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2381
9.23M
  SKIP(2);
2382
9.23M
  GROW;
2383
30.8M
  while (RAW != ';') { /* loop blocked by count */
2384
22.3M
      if (count++ > 20) {
2385
75.4k
    count = 0;
2386
75.4k
    GROW;
2387
75.4k
                if (ctxt->instate == XML_PARSER_EOF)
2388
0
                    return(0);
2389
75.4k
      }
2390
22.3M
      if ((RAW >= '0') && (RAW <= '9'))
2391
21.6M
          val = val * 10 + (CUR - '0');
2392
700k
      else {
2393
700k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2394
700k
    val = 0;
2395
700k
    break;
2396
700k
      }
2397
21.6M
      if (val > 0x110000)
2398
812k
          val = 0x110000;
2399
2400
21.6M
      NEXT;
2401
21.6M
      count++;
2402
21.6M
  }
2403
9.23M
  if (RAW == ';') {
2404
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2405
8.52M
      ctxt->input->col++;
2406
8.52M
      ctxt->input->cur++;
2407
8.52M
  }
2408
9.23M
    } else {
2409
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2410
0
    }
2411
2412
    /*
2413
     * [ WFC: Legal Character ]
2414
     * Characters referred to using character references must match the
2415
     * production for Char.
2416
     */
2417
13.3M
    if (val >= 0x110000) {
2418
2.40k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2419
2.40k
                "xmlParseCharRef: character reference out of bounds\n",
2420
2.40k
          val);
2421
13.3M
    } else if (IS_CHAR(val)) {
2422
12.3M
        return(val);
2423
12.3M
    } else {
2424
1.06M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2425
1.06M
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2426
1.06M
                    val);
2427
1.06M
    }
2428
1.06M
    return(0);
2429
13.3M
}
2430
2431
/**
2432
 * xmlParseStringCharRef:
2433
 * @ctxt:  an XML parser context
2434
 * @str:  a pointer to an index in the string
2435
 *
2436
 * parse Reference declarations, variant parsing from a string rather
2437
 * than an an input flow.
2438
 *
2439
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2440
 *                  '&#x' [0-9a-fA-F]+ ';'
2441
 *
2442
 * [ WFC: Legal Character ]
2443
 * Characters referred to using character references must match the
2444
 * production for Char.
2445
 *
2446
 * Returns the value parsed (as an int), 0 in case of error, str will be
2447
 *         updated to the current value of the index
2448
 */
2449
static int
2450
1.34M
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2451
1.34M
    const xmlChar *ptr;
2452
1.34M
    xmlChar cur;
2453
1.34M
    int val = 0;
2454
2455
1.34M
    if ((str == NULL) || (*str == NULL)) return(0);
2456
1.34M
    ptr = *str;
2457
1.34M
    cur = *ptr;
2458
1.34M
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2459
228k
  ptr += 3;
2460
228k
  cur = *ptr;
2461
574k
  while (cur != ';') { /* Non input consuming loop */
2462
347k
      if ((cur >= '0') && (cur <= '9'))
2463
84.7k
          val = val * 16 + (cur - '0');
2464
262k
      else if ((cur >= 'a') && (cur <= 'f'))
2465
201k
          val = val * 16 + (cur - 'a') + 10;
2466
61.6k
      else if ((cur >= 'A') && (cur <= 'F'))
2467
59.4k
          val = val * 16 + (cur - 'A') + 10;
2468
2.11k
      else {
2469
2.11k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2470
2.11k
    val = 0;
2471
2.11k
    break;
2472
2.11k
      }
2473
345k
      if (val > 0x110000)
2474
71.5k
          val = 0x110000;
2475
2476
345k
      ptr++;
2477
345k
      cur = *ptr;
2478
345k
  }
2479
228k
  if (cur == ';')
2480
226k
      ptr++;
2481
1.11M
    } else if  ((cur == '&') && (ptr[1] == '#')){
2482
1.11M
  ptr += 2;
2483
1.11M
  cur = *ptr;
2484
4.20M
  while (cur != ';') { /* Non input consuming loops */
2485
3.10M
      if ((cur >= '0') && (cur <= '9'))
2486
3.09M
          val = val * 10 + (cur - '0');
2487
3.45k
      else {
2488
3.45k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2489
3.45k
    val = 0;
2490
3.45k
    break;
2491
3.45k
      }
2492
3.09M
      if (val > 0x110000)
2493
1.75k
          val = 0x110000;
2494
2495
3.09M
      ptr++;
2496
3.09M
      cur = *ptr;
2497
3.09M
  }
2498
1.11M
  if (cur == ';')
2499
1.10M
      ptr++;
2500
1.11M
    } else {
2501
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2502
0
  return(0);
2503
0
    }
2504
1.34M
    *str = ptr;
2505
2506
    /*
2507
     * [ WFC: Legal Character ]
2508
     * Characters referred to using character references must match the
2509
     * production for Char.
2510
     */
2511
1.34M
    if (val >= 0x110000) {
2512
282
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2513
282
                "xmlParseStringCharRef: character reference out of bounds\n",
2514
282
                val);
2515
1.34M
    } else if (IS_CHAR(val)) {
2516
1.33M
        return(val);
2517
1.33M
    } else {
2518
5.97k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2519
5.97k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2520
5.97k
        val);
2521
5.97k
    }
2522
6.25k
    return(0);
2523
1.34M
}
2524
2525
/**
2526
 * xmlParserHandlePEReference:
2527
 * @ctxt:  the parser context
2528
 *
2529
 * [69] PEReference ::= '%' Name ';'
2530
 *
2531
 * [ WFC: No Recursion ]
2532
 * A parsed entity must not contain a recursive
2533
 * reference to itself, either directly or indirectly.
2534
 *
2535
 * [ WFC: Entity Declared ]
2536
 * In a document without any DTD, a document with only an internal DTD
2537
 * subset which contains no parameter entity references, or a document
2538
 * with "standalone='yes'", ...  ... The declaration of a parameter
2539
 * entity must precede any reference to it...
2540
 *
2541
 * [ VC: Entity Declared ]
2542
 * In a document with an external subset or external parameter entities
2543
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2544
 * must precede any reference to it...
2545
 *
2546
 * [ WFC: In DTD ]
2547
 * Parameter-entity references may only appear in the DTD.
2548
 * NOTE: misleading but this is handled.
2549
 *
2550
 * A PEReference may have been detected in the current input stream
2551
 * the handling is done accordingly to
2552
 *      http://www.w3.org/TR/REC-xml#entproc
2553
 * i.e.
2554
 *   - Included in literal in entity values
2555
 *   - Included as Parameter Entity reference within DTDs
2556
 */
2557
void
2558
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2559
0
    switch(ctxt->instate) {
2560
0
  case XML_PARSER_CDATA_SECTION:
2561
0
      return;
2562
0
        case XML_PARSER_COMMENT:
2563
0
      return;
2564
0
  case XML_PARSER_START_TAG:
2565
0
      return;
2566
0
  case XML_PARSER_END_TAG:
2567
0
      return;
2568
0
        case XML_PARSER_EOF:
2569
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2570
0
      return;
2571
0
        case XML_PARSER_PROLOG:
2572
0
  case XML_PARSER_START:
2573
0
  case XML_PARSER_MISC:
2574
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2575
0
      return;
2576
0
  case XML_PARSER_ENTITY_DECL:
2577
0
        case XML_PARSER_CONTENT:
2578
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2579
0
        case XML_PARSER_PI:
2580
0
  case XML_PARSER_SYSTEM_LITERAL:
2581
0
  case XML_PARSER_PUBLIC_LITERAL:
2582
      /* we just ignore it there */
2583
0
      return;
2584
0
        case XML_PARSER_EPILOG:
2585
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2586
0
      return;
2587
0
  case XML_PARSER_ENTITY_VALUE:
2588
      /*
2589
       * NOTE: in the case of entity values, we don't do the
2590
       *       substitution here since we need the literal
2591
       *       entity value to be able to save the internal
2592
       *       subset of the document.
2593
       *       This will be handled by xmlStringDecodeEntities
2594
       */
2595
0
      return;
2596
0
        case XML_PARSER_DTD:
2597
      /*
2598
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2599
       * In the internal DTD subset, parameter-entity references
2600
       * can occur only where markup declarations can occur, not
2601
       * within markup declarations.
2602
       * In that case this is handled in xmlParseMarkupDecl
2603
       */
2604
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2605
0
    return;
2606
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2607
0
    return;
2608
0
            break;
2609
0
        case XML_PARSER_IGNORE:
2610
0
            return;
2611
0
    }
2612
2613
0
    xmlParsePEReference(ctxt);
2614
0
}
2615
2616
/*
2617
 * Macro used to grow the current buffer.
2618
 * buffer##_size is expected to be a size_t
2619
 * mem_error: is expected to handle memory allocation failures
2620
 */
2621
1.54M
#define growBuffer(buffer, n) {           \
2622
1.54M
    xmlChar *tmp;             \
2623
1.54M
    size_t new_size = buffer##_size * 2 + n;                            \
2624
1.54M
    if (new_size < buffer##_size) goto mem_error;                       \
2625
1.54M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2626
1.54M
    if (tmp == NULL) goto mem_error;         \
2627
1.54M
    buffer = tmp;             \
2628
1.54M
    buffer##_size = new_size;                                           \
2629
1.54M
}
2630
2631
/**
2632
 * xmlStringLenDecodeEntities:
2633
 * @ctxt:  the parser context
2634
 * @str:  the input string
2635
 * @len: the string length
2636
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637
 * @end:  an end marker xmlChar, 0 if none
2638
 * @end2:  an end marker xmlChar, 0 if none
2639
 * @end3:  an end marker xmlChar, 0 if none
2640
 *
2641
 * Takes a entity string content and process to do the adequate substitutions.
2642
 *
2643
 * [67] Reference ::= EntityRef | CharRef
2644
 *
2645
 * [69] PEReference ::= '%' Name ';'
2646
 *
2647
 * Returns A newly allocated string with the substitution done. The caller
2648
 *      must deallocate it !
2649
 */
2650
xmlChar *
2651
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2652
9.34M
          int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2653
9.34M
    xmlChar *buffer = NULL;
2654
9.34M
    size_t buffer_size = 0;
2655
9.34M
    size_t nbchars = 0;
2656
2657
9.34M
    xmlChar *current = NULL;
2658
9.34M
    xmlChar *rep = NULL;
2659
9.34M
    const xmlChar *last;
2660
9.34M
    xmlEntityPtr ent;
2661
9.34M
    int c,l;
2662
2663
9.34M
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2664
0
  return(NULL);
2665
9.34M
    last = str + len;
2666
2667
9.34M
    if (((ctxt->depth > 40) &&
2668
9.34M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2669
9.34M
  (ctxt->depth > 1024)) {
2670
11.1k
  xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2671
11.1k
  return(NULL);
2672
11.1k
    }
2673
2674
    /*
2675
     * allocate a translation buffer.
2676
     */
2677
9.33M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2678
9.33M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2679
9.33M
    if (buffer == NULL) goto mem_error;
2680
2681
    /*
2682
     * OK loop until we reach one of the ending char or a size limit.
2683
     * we are operating on already parsed values.
2684
     */
2685
9.33M
    if (str < last)
2686
8.97M
  c = CUR_SCHAR(str, l);
2687
365k
    else
2688
365k
        c = 0;
2689
705M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2690
705M
           (c != end2) && (c != end3) &&
2691
705M
           (ctxt->instate != XML_PARSER_EOF)) {
2692
2693
696M
  if (c == 0) break;
2694
696M
        if ((c == '&') && (str[1] == '#')) {
2695
1.34M
      int val = xmlParseStringCharRef(ctxt, &str);
2696
1.34M
      if (val == 0)
2697
6.25k
                goto int_error;
2698
1.33M
      COPY_BUF(0,buffer,nbchars,val);
2699
1.33M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2700
6.31k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2701
6.31k
      }
2702
695M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2703
686k
      if (xmlParserDebugEntities)
2704
0
    xmlGenericError(xmlGenericErrorContext,
2705
0
      "String decoding Entity Reference: %.30s\n",
2706
0
      str);
2707
686k
      ent = xmlParseStringEntityRef(ctxt, &str);
2708
686k
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2709
686k
      if (ent != NULL)
2710
562k
          ctxt->nbentities += ent->checked / 2;
2711
686k
      if ((ent != NULL) &&
2712
686k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2713
51.3k
    if (ent->content != NULL) {
2714
51.3k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2715
51.3k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2716
7.69k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2717
7.69k
        }
2718
51.3k
    } else {
2719
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2720
0
          "predefined entity has no content\n");
2721
0
                    goto int_error;
2722
0
    }
2723
635k
      } else if ((ent != NULL) && (ent->content != NULL)) {
2724
507k
    ctxt->depth++;
2725
507k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2726
507k
                            0, 0, 0);
2727
507k
    ctxt->depth--;
2728
507k
    if (rep == NULL) {
2729
325k
                    ent->content[0] = 0;
2730
325k
                    goto int_error;
2731
325k
                }
2732
2733
182k
                current = rep;
2734
16.8M
                while (*current != 0) { /* non input consuming loop */
2735
16.6M
                    buffer[nbchars++] = *current++;
2736
16.6M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2737
27.8k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2738
0
                            goto int_error;
2739
83.5k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2740
83.5k
                    }
2741
16.6M
                }
2742
182k
                xmlFree(rep);
2743
182k
                rep = NULL;
2744
182k
      } else if (ent != NULL) {
2745
4.03k
    int i = xmlStrlen(ent->name);
2746
4.03k
    const xmlChar *cur = ent->name;
2747
2748
4.03k
    buffer[nbchars++] = '&';
2749
4.03k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2750
194
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2751
194
    }
2752
18.9k
    for (;i > 0;i--)
2753
14.9k
        buffer[nbchars++] = *cur++;
2754
4.03k
    buffer[nbchars++] = ';';
2755
4.03k
      }
2756
694M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2757
2.22M
      if (xmlParserDebugEntities)
2758
0
    xmlGenericError(xmlGenericErrorContext,
2759
0
      "String decoding PE Reference: %.30s\n", str);
2760
2.22M
      ent = xmlParseStringPEReference(ctxt, &str);
2761
2.22M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2762
2.22M
      if (ent != NULL)
2763
941k
          ctxt->nbentities += ent->checked / 2;
2764
2.22M
      if (ent != NULL) {
2765
941k
                if (ent->content == NULL) {
2766
        /*
2767
         * Note: external parsed entities will not be loaded,
2768
         * it is not required for a non-validating parser to
2769
         * complete external PEReferences coming from the
2770
         * internal subset
2771
         */
2772
7.08k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2773
7.08k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2774
7.08k
      (ctxt->validate != 0)) {
2775
6.91k
      xmlLoadEntityContent(ctxt, ent);
2776
6.91k
        } else {
2777
174
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2778
174
      "not validating will not read content for PE entity %s\n",
2779
174
                          ent->name, NULL);
2780
174
        }
2781
7.08k
    }
2782
941k
    ctxt->depth++;
2783
941k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2784
941k
                            0, 0, 0);
2785
941k
    ctxt->depth--;
2786
941k
    if (rep == NULL) {
2787
214k
                    if (ent->content != NULL)
2788
211k
                        ent->content[0] = 0;
2789
214k
                    goto int_error;
2790
214k
                }
2791
727k
                current = rep;
2792
36.2M
                while (*current != 0) { /* non input consuming loop */
2793
35.4M
                    buffer[nbchars++] = *current++;
2794
35.4M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2795
73.8k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2796
828
                            goto int_error;
2797
219k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798
219k
                    }
2799
35.4M
                }
2800
726k
                xmlFree(rep);
2801
726k
                rep = NULL;
2802
726k
      }
2803
692M
  } else {
2804
692M
      COPY_BUF(l,buffer,nbchars,c);
2805
692M
      str += l;
2806
692M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2807
1.22M
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2808
1.22M
      }
2809
692M
  }
2810
695M
  if (str < last)
2811
687M
      c = CUR_SCHAR(str, l);
2812
8.42M
  else
2813
8.42M
      c = 0;
2814
695M
    }
2815
8.79M
    buffer[nbchars] = 0;
2816
8.79M
    return(buffer);
2817
2818
0
mem_error:
2819
0
    xmlErrMemory(ctxt, NULL);
2820
546k
int_error:
2821
546k
    if (rep != NULL)
2822
828
        xmlFree(rep);
2823
546k
    if (buffer != NULL)
2824
546k
        xmlFree(buffer);
2825
546k
    return(NULL);
2826
0
}
2827
2828
/**
2829
 * xmlStringDecodeEntities:
2830
 * @ctxt:  the parser context
2831
 * @str:  the input string
2832
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2833
 * @end:  an end marker xmlChar, 0 if none
2834
 * @end2:  an end marker xmlChar, 0 if none
2835
 * @end3:  an end marker xmlChar, 0 if none
2836
 *
2837
 * Takes a entity string content and process to do the adequate substitutions.
2838
 *
2839
 * [67] Reference ::= EntityRef | CharRef
2840
 *
2841
 * [69] PEReference ::= '%' Name ';'
2842
 *
2843
 * Returns A newly allocated string with the substitution done. The caller
2844
 *      must deallocate it !
2845
 */
2846
xmlChar *
2847
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2848
9.34M
            xmlChar end, xmlChar  end2, xmlChar end3) {
2849
9.34M
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2850
9.33M
    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2851
9.33M
           end, end2, end3));
2852
9.34M
}
2853
2854
/************************************************************************
2855
 *                  *
2856
 *    Commodity functions, cleanup needed ?     *
2857
 *                  *
2858
 ************************************************************************/
2859
2860
/**
2861
 * areBlanks:
2862
 * @ctxt:  an XML parser context
2863
 * @str:  a xmlChar *
2864
 * @len:  the size of @str
2865
 * @blank_chars: we know the chars are blanks
2866
 *
2867
 * Is this a sequence of blank chars that one can ignore ?
2868
 *
2869
 * Returns 1 if ignorable 0 otherwise.
2870
 */
2871
2872
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2873
91.6M
                     int blank_chars) {
2874
91.6M
    int i, ret;
2875
91.6M
    xmlNodePtr lastChild;
2876
2877
    /*
2878
     * Don't spend time trying to differentiate them, the same callback is
2879
     * used !
2880
     */
2881
91.6M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2882
1.61M
  return(0);
2883
2884
    /*
2885
     * Check for xml:space value.
2886
     */
2887
90.0M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2888
90.0M
        (*(ctxt->space) == -2))
2889
61.6M
  return(0);
2890
2891
    /*
2892
     * Check that the string is made of blanks
2893
     */
2894
28.3M
    if (blank_chars == 0) {
2895
69.9M
  for (i = 0;i < len;i++)
2896
62.8M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2897
12.5M
    }
2898
2899
    /*
2900
     * Look if the element is mixed content in the DTD if available
2901
     */
2902
22.9M
    if (ctxt->node == NULL) return(0);
2903
15.2M
    if (ctxt->myDoc != NULL) {
2904
15.2M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2905
15.2M
        if (ret == 0) return(1);
2906
15.0M
        if (ret == 1) return(0);
2907
15.0M
    }
2908
2909
    /*
2910
     * Otherwise, heuristic :-\
2911
     */
2912
15.0M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2913
11.9M
    if ((ctxt->node->children == NULL) &&
2914
11.9M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2915
2916
11.9M
    lastChild = xmlGetLastChild(ctxt->node);
2917
11.9M
    if (lastChild == NULL) {
2918
1.35M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2919
1.35M
            (ctxt->node->content != NULL)) return(0);
2920
10.5M
    } else if (xmlNodeIsText(lastChild))
2921
6.53M
        return(0);
2922
4.04M
    else if ((ctxt->node->children != NULL) &&
2923
4.04M
             (xmlNodeIsText(ctxt->node->children)))
2924
571k
        return(0);
2925
4.82M
    return(1);
2926
11.9M
}
2927
2928
/************************************************************************
2929
 *                  *
2930
 *    Extra stuff for namespace support     *
2931
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2932
 *                  *
2933
 ************************************************************************/
2934
2935
/**
2936
 * xmlSplitQName:
2937
 * @ctxt:  an XML parser context
2938
 * @name:  an XML parser context
2939
 * @prefix:  a xmlChar **
2940
 *
2941
 * parse an UTF8 encoded XML qualified name string
2942
 *
2943
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2944
 *
2945
 * [NS 6] Prefix ::= NCName
2946
 *
2947
 * [NS 7] LocalPart ::= NCName
2948
 *
2949
 * Returns the local part, and prefix is updated
2950
 *   to get the Prefix if any.
2951
 */
2952
2953
xmlChar *
2954
33.9M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2955
33.9M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2956
33.9M
    xmlChar *buffer = NULL;
2957
33.9M
    int len = 0;
2958
33.9M
    int max = XML_MAX_NAMELEN;
2959
33.9M
    xmlChar *ret = NULL;
2960
33.9M
    const xmlChar *cur = name;
2961
33.9M
    int c;
2962
2963
33.9M
    if (prefix == NULL) return(NULL);
2964
33.9M
    *prefix = NULL;
2965
2966
33.9M
    if (cur == NULL) return(NULL);
2967
2968
#ifndef XML_XML_NAMESPACE
2969
    /* xml: prefix is not really a namespace */
2970
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2971
        (cur[2] == 'l') && (cur[3] == ':'))
2972
  return(xmlStrdup(name));
2973
#endif
2974
2975
    /* nasty but well=formed */
2976
33.9M
    if (cur[0] == ':')
2977
7.05k
  return(xmlStrdup(name));
2978
2979
33.9M
    c = *cur++;
2980
156M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2981
122M
  buf[len++] = c;
2982
122M
  c = *cur++;
2983
122M
    }
2984
33.9M
    if (len >= max) {
2985
  /*
2986
   * Okay someone managed to make a huge name, so he's ready to pay
2987
   * for the processing speed.
2988
   */
2989
25.3k
  max = len * 2;
2990
2991
25.3k
  buffer = (xmlChar *) xmlMallocAtomic(max);
2992
25.3k
  if (buffer == NULL) {
2993
0
      xmlErrMemory(ctxt, NULL);
2994
0
      return(NULL);
2995
0
  }
2996
25.3k
  memcpy(buffer, buf, len);
2997
15.9M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2998
15.8M
      if (len + 10 > max) {
2999
21.5k
          xmlChar *tmp;
3000
3001
21.5k
    max *= 2;
3002
21.5k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3003
21.5k
    if (tmp == NULL) {
3004
0
        xmlFree(buffer);
3005
0
        xmlErrMemory(ctxt, NULL);
3006
0
        return(NULL);
3007
0
    }
3008
21.5k
    buffer = tmp;
3009
21.5k
      }
3010
15.8M
      buffer[len++] = c;
3011
15.8M
      c = *cur++;
3012
15.8M
  }
3013
25.3k
  buffer[len] = 0;
3014
25.3k
    }
3015
3016
33.9M
    if ((c == ':') && (*cur == 0)) {
3017
62.8k
        if (buffer != NULL)
3018
15
      xmlFree(buffer);
3019
62.8k
  *prefix = NULL;
3020
62.8k
  return(xmlStrdup(name));
3021
62.8k
    }
3022
3023
33.9M
    if (buffer == NULL)
3024
33.8M
  ret = xmlStrndup(buf, len);
3025
25.3k
    else {
3026
25.3k
  ret = buffer;
3027
25.3k
  buffer = NULL;
3028
25.3k
  max = XML_MAX_NAMELEN;
3029
25.3k
    }
3030
3031
3032
33.9M
    if (c == ':') {
3033
11.6M
  c = *cur;
3034
11.6M
        *prefix = ret;
3035
11.6M
  if (c == 0) {
3036
0
      return(xmlStrndup(BAD_CAST "", 0));
3037
0
  }
3038
11.6M
  len = 0;
3039
3040
  /*
3041
   * Check that the first character is proper to start
3042
   * a new name
3043
   */
3044
11.6M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3045
11.6M
        ((c >= 0x41) && (c <= 0x5A)) ||
3046
11.6M
        (c == '_') || (c == ':'))) {
3047
7.07k
      int l;
3048
7.07k
      int first = CUR_SCHAR(cur, l);
3049
3050
7.07k
      if (!IS_LETTER(first) && (first != '_')) {
3051
4.44k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3052
4.44k
          "Name %s is not XML Namespace compliant\n",
3053
4.44k
          name);
3054
4.44k
      }
3055
7.07k
  }
3056
11.6M
  cur++;
3057
3058
40.9M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3059
29.2M
      buf[len++] = c;
3060
29.2M
      c = *cur++;
3061
29.2M
  }
3062
11.6M
  if (len >= max) {
3063
      /*
3064
       * Okay someone managed to make a huge name, so he's ready to pay
3065
       * for the processing speed.
3066
       */
3067
9.14k
      max = len * 2;
3068
3069
9.14k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3070
9.14k
      if (buffer == NULL) {
3071
0
          xmlErrMemory(ctxt, NULL);
3072
0
    return(NULL);
3073
0
      }
3074
9.14k
      memcpy(buffer, buf, len);
3075
6.81M
      while (c != 0) { /* tested bigname2.xml */
3076
6.80M
    if (len + 10 > max) {
3077
7.64k
        xmlChar *tmp;
3078
3079
7.64k
        max *= 2;
3080
7.64k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3081
7.64k
        if (tmp == NULL) {
3082
0
      xmlErrMemory(ctxt, NULL);
3083
0
      xmlFree(buffer);
3084
0
      return(NULL);
3085
0
        }
3086
7.64k
        buffer = tmp;
3087
7.64k
    }
3088
6.80M
    buffer[len++] = c;
3089
6.80M
    c = *cur++;
3090
6.80M
      }
3091
9.14k
      buffer[len] = 0;
3092
9.14k
  }
3093
3094
11.6M
  if (buffer == NULL)
3095
11.6M
      ret = xmlStrndup(buf, len);
3096
9.14k
  else {
3097
9.14k
      ret = buffer;
3098
9.14k
  }
3099
11.6M
    }
3100
3101
33.9M
    return(ret);
3102
33.9M
}
3103
3104
/************************************************************************
3105
 *                  *
3106
 *      The parser itself       *
3107
 *  Relates to http://www.w3.org/TR/REC-xml       *
3108
 *                  *
3109
 ************************************************************************/
3110
3111
/************************************************************************
3112
 *                  *
3113
 *  Routines to parse Name, NCName and NmToken      *
3114
 *                  *
3115
 ************************************************************************/
3116
#ifdef DEBUG
3117
static unsigned long nbParseName = 0;
3118
static unsigned long nbParseNmToken = 0;
3119
static unsigned long nbParseNCName = 0;
3120
static unsigned long nbParseNCNameComplex = 0;
3121
static unsigned long nbParseNameComplex = 0;
3122
static unsigned long nbParseStringName = 0;
3123
#endif
3124
3125
/*
3126
 * The two following functions are related to the change of accepted
3127
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3128
 * They correspond to the modified production [4] and the new production [4a]
3129
 * changes in that revision. Also note that the macros used for the
3130
 * productions Letter, Digit, CombiningChar and Extender are not needed
3131
 * anymore.
3132
 * We still keep compatibility to pre-revision5 parsing semantic if the
3133
 * new XML_PARSE_OLD10 option is given to the parser.
3134
 */
3135
static int
3136
6.39M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3137
6.39M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3138
        /*
3139
   * Use the new checks of production [4] [4a] amd [5] of the
3140
   * Update 5 of XML-1.0
3141
   */
3142
3.42M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3143
3.42M
      (((c >= 'a') && (c <= 'z')) ||
3144
3.37M
       ((c >= 'A') && (c <= 'Z')) ||
3145
3.37M
       (c == '_') || (c == ':') ||
3146
3.37M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3147
3.37M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3148
3.37M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3149
3.37M
       ((c >= 0x370) && (c <= 0x37D)) ||
3150
3.37M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3151
3.37M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3152
3.37M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3153
3.37M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3154
3.37M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3155
3.37M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3156
3.37M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3157
3.37M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3158
1.74M
      return(1);
3159
3.42M
    } else {
3160
2.96M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3161
1.27M
      return(1);
3162
2.96M
    }
3163
3.37M
    return(0);
3164
6.39M
}
3165
3166
static int
3167
56.5M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3168
56.5M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3169
        /*
3170
   * Use the new checks of production [4] [4a] amd [5] of the
3171
   * Update 5 of XML-1.0
3172
   */
3173
37.6M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3174
37.6M
      (((c >= 'a') && (c <= 'z')) ||
3175
37.5M
       ((c >= 'A') && (c <= 'Z')) ||
3176
37.5M
       ((c >= '0') && (c <= '9')) || /* !start */
3177
37.5M
       (c == '_') || (c == ':') ||
3178
37.5M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3179
37.5M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3180
37.5M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3181
37.5M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3182
37.5M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3183
37.5M
       ((c >= 0x370) && (c <= 0x37D)) ||
3184
37.5M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3185
37.5M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3186
37.5M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3187
37.5M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3188
37.5M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189
37.5M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190
37.5M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191
37.5M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192
37.5M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3193
35.6M
       return(1);
3194
37.6M
    } else {
3195
18.8M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3196
18.8M
            (c == '.') || (c == '-') ||
3197
18.8M
      (c == '_') || (c == ':') ||
3198
18.8M
      (IS_COMBINING(c)) ||
3199
18.8M
      (IS_EXTENDER(c)))
3200
17.3M
      return(1);
3201
18.8M
    }
3202
3.53M
    return(0);
3203
56.5M
}
3204
3205
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3206
                                          int *len, int *alloc, int normalize);
3207
3208
static const xmlChar *
3209
85.4M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3210
85.4M
    int len = 0, l;
3211
85.4M
    int c;
3212
85.4M
    int count = 0;
3213
85.4M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3214
63.9M
                    XML_MAX_TEXT_LENGTH :
3215
85.4M
                    XML_MAX_NAME_LENGTH;
3216
3217
#ifdef DEBUG
3218
    nbParseNameComplex++;
3219
#endif
3220
3221
    /*
3222
     * Handler for more complex cases
3223
     */
3224
85.4M
    GROW;
3225
85.4M
    if (ctxt->instate == XML_PARSER_EOF)
3226
0
        return(NULL);
3227
85.4M
    c = CUR_CHAR(l);
3228
85.4M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3229
        /*
3230
   * Use the new checks of production [4] [4a] amd [5] of the
3231
   * Update 5 of XML-1.0
3232
   */
3233
45.3M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3234
45.3M
      (!(((c >= 'a') && (c <= 'z')) ||
3235
44.2M
         ((c >= 'A') && (c <= 'Z')) ||
3236
44.2M
         (c == '_') || (c == ':') ||
3237
44.2M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3238
44.2M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3239
44.2M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3240
44.2M
         ((c >= 0x370) && (c <= 0x37D)) ||
3241
44.2M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3242
44.2M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3243
44.2M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3244
44.2M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3245
44.2M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3246
44.2M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3247
44.2M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3248
44.2M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3249
43.0M
      return(NULL);
3250
43.0M
  }
3251
2.24M
  len += l;
3252
2.24M
  NEXTL(l);
3253
2.24M
  c = CUR_CHAR(l);
3254
104M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3255
104M
         (((c >= 'a') && (c <= 'z')) ||
3256
104M
          ((c >= 'A') && (c <= 'Z')) ||
3257
104M
          ((c >= '0') && (c <= '9')) || /* !start */
3258
104M
          (c == '_') || (c == ':') ||
3259
104M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3260
104M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3261
104M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3262
104M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3263
104M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3264
104M
          ((c >= 0x370) && (c <= 0x37D)) ||
3265
104M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3266
104M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3267
104M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3268
104M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3269
104M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270
104M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271
104M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272
104M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273
104M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3274
104M
    )) {
3275
102M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3276
842k
    count = 0;
3277
842k
    GROW;
3278
842k
                if (ctxt->instate == XML_PARSER_EOF)
3279
0
                    return(NULL);
3280
842k
      }
3281
102M
            if (len <= INT_MAX - l)
3282
102M
          len += l;
3283
102M
      NEXTL(l);
3284
102M
      c = CUR_CHAR(l);
3285
102M
  }
3286
40.0M
    } else {
3287
40.0M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3288
40.0M
      (!IS_LETTER(c) && (c != '_') &&
3289
39.1M
       (c != ':'))) {
3290
36.9M
      return(NULL);
3291
36.9M
  }
3292
3.12M
  len += l;
3293
3.12M
  NEXTL(l);
3294
3.12M
  c = CUR_CHAR(l);
3295
3296
172M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3297
172M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298
171M
    (c == '.') || (c == '-') ||
3299
171M
    (c == '_') || (c == ':') ||
3300
171M
    (IS_COMBINING(c)) ||
3301
171M
    (IS_EXTENDER(c)))) {
3302
169M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3303
1.45M
    count = 0;
3304
1.45M
    GROW;
3305
1.45M
                if (ctxt->instate == XML_PARSER_EOF)
3306
0
                    return(NULL);
3307
1.45M
      }
3308
169M
            if (len <= INT_MAX - l)
3309
169M
          len += l;
3310
169M
      NEXTL(l);
3311
169M
      c = CUR_CHAR(l);
3312
169M
  }
3313
3.12M
    }
3314
5.37M
    if (len > maxLength) {
3315
58
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316
58
        return(NULL);
3317
58
    }
3318
5.37M
    if (ctxt->input->cur - ctxt->input->base < len) {
3319
        /*
3320
         * There were a couple of bugs where PERefs lead to to a change
3321
         * of the buffer. Check the buffer size to avoid passing an invalid
3322
         * pointer to xmlDictLookup.
3323
         */
3324
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3325
0
                    "unexpected change of input buffer");
3326
0
        return (NULL);
3327
0
    }
3328
5.37M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3329
8.85k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3330
5.36M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3331
5.37M
}
3332
3333
/**
3334
 * xmlParseName:
3335
 * @ctxt:  an XML parser context
3336
 *
3337
 * DEPRECATED: Internal function, don't use.
3338
 *
3339
 * parse an XML name.
3340
 *
3341
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3342
 *                  CombiningChar | Extender
3343
 *
3344
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3345
 *
3346
 * [6] Names ::= Name (#x20 Name)*
3347
 *
3348
 * Returns the Name parsed or NULL
3349
 */
3350
3351
const xmlChar *
3352
343M
xmlParseName(xmlParserCtxtPtr ctxt) {
3353
343M
    const xmlChar *in;
3354
343M
    const xmlChar *ret;
3355
343M
    size_t count = 0;
3356
343M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3357
260M
                       XML_MAX_TEXT_LENGTH :
3358
343M
                       XML_MAX_NAME_LENGTH;
3359
3360
343M
    GROW;
3361
3362
#ifdef DEBUG
3363
    nbParseName++;
3364
#endif
3365
3366
    /*
3367
     * Accelerator for simple ASCII names
3368
     */
3369
343M
    in = ctxt->input->cur;
3370
343M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3371
343M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3372
343M
  (*in == '_') || (*in == ':')) {
3373
261M
  in++;
3374
1.35G
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3375
1.35G
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3376
1.35G
         ((*in >= 0x30) && (*in <= 0x39)) ||
3377
1.35G
         (*in == '_') || (*in == '-') ||
3378
1.35G
         (*in == ':') || (*in == '.'))
3379
1.09G
      in++;
3380
261M
  if ((*in > 0) && (*in < 0x80)) {
3381
257M
      count = in - ctxt->input->cur;
3382
257M
            if (count > maxLength) {
3383
25
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384
25
                return(NULL);
3385
25
            }
3386
257M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3387
257M
      ctxt->input->cur = in;
3388
257M
      ctxt->input->col += count;
3389
257M
      if (ret == NULL)
3390
0
          xmlErrMemory(ctxt, NULL);
3391
257M
      return(ret);
3392
257M
  }
3393
261M
    }
3394
    /* accelerator for special cases */
3395
85.4M
    return(xmlParseNameComplex(ctxt));
3396
343M
}
3397
3398
static const xmlChar *
3399
2.76M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3400
2.76M
    int len = 0, l;
3401
2.76M
    int c;
3402
2.76M
    int count = 0;
3403
2.76M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3404
906k
                    XML_MAX_TEXT_LENGTH :
3405
2.76M
                    XML_MAX_NAME_LENGTH;
3406
2.76M
    size_t startPosition = 0;
3407
3408
#ifdef DEBUG
3409
    nbParseNCNameComplex++;
3410
#endif
3411
3412
    /*
3413
     * Handler for more complex cases
3414
     */
3415
2.76M
    GROW;
3416
2.76M
    startPosition = CUR_PTR - BASE_PTR;
3417
2.76M
    c = CUR_CHAR(l);
3418
2.76M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
2.76M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
2.35M
  return(NULL);
3421
2.35M
    }
3422
3423
16.2M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
16.2M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
15.7M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3426
130k
      count = 0;
3427
130k
      GROW;
3428
130k
            if (ctxt->instate == XML_PARSER_EOF)
3429
0
                return(NULL);
3430
130k
  }
3431
15.7M
        if (len <= INT_MAX - l)
3432
15.7M
      len += l;
3433
15.7M
  NEXTL(l);
3434
15.7M
  c = CUR_CHAR(l);
3435
15.7M
  if (c == 0) {
3436
11.7k
      count = 0;
3437
      /*
3438
       * when shrinking to extend the buffer we really need to preserve
3439
       * the part of the name we already parsed. Hence rolling back
3440
       * by current length.
3441
       */
3442
11.7k
      ctxt->input->cur -= l;
3443
11.7k
      GROW;
3444
11.7k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
11.7k
      ctxt->input->cur += l;
3447
11.7k
      c = CUR_CHAR(l);
3448
11.7k
  }
3449
15.7M
    }
3450
414k
    if (len > maxLength) {
3451
45
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3452
45
        return(NULL);
3453
45
    }
3454
414k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3455
414k
}
3456
3457
/**
3458
 * xmlParseNCName:
3459
 * @ctxt:  an XML parser context
3460
 * @len:  length of the string parsed
3461
 *
3462
 * parse an XML name.
3463
 *
3464
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3465
 *                      CombiningChar | Extender
3466
 *
3467
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3468
 *
3469
 * Returns the Name parsed or NULL
3470
 */
3471
3472
static const xmlChar *
3473
25.7M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3474
25.7M
    const xmlChar *in, *e;
3475
25.7M
    const xmlChar *ret;
3476
25.7M
    size_t count = 0;
3477
25.7M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3478
7.01M
                       XML_MAX_TEXT_LENGTH :
3479
25.7M
                       XML_MAX_NAME_LENGTH;
3480
3481
#ifdef DEBUG
3482
    nbParseNCName++;
3483
#endif
3484
3485
    /*
3486
     * Accelerator for simple ASCII names
3487
     */
3488
25.7M
    in = ctxt->input->cur;
3489
25.7M
    e = ctxt->input->end;
3490
25.7M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3491
25.7M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3492
25.7M
   (*in == '_')) && (in < e)) {
3493
23.3M
  in++;
3494
97.6M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3495
97.6M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3496
97.6M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3497
97.6M
          (*in == '_') || (*in == '-') ||
3498
97.6M
          (*in == '.')) && (in < e))
3499
74.2M
      in++;
3500
23.3M
  if (in >= e)
3501
1.73k
      goto complex;
3502
23.3M
  if ((*in > 0) && (*in < 0x80)) {
3503
23.0M
      count = in - ctxt->input->cur;
3504
23.0M
            if (count > maxLength) {
3505
10
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3506
10
                return(NULL);
3507
10
            }
3508
23.0M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3509
23.0M
      ctxt->input->cur = in;
3510
23.0M
      ctxt->input->col += count;
3511
23.0M
      if (ret == NULL) {
3512
0
          xmlErrMemory(ctxt, NULL);
3513
0
      }
3514
23.0M
      return(ret);
3515
23.0M
  }
3516
23.3M
    }
3517
2.76M
complex:
3518
2.76M
    return(xmlParseNCNameComplex(ctxt));
3519
25.7M
}
3520
3521
/**
3522
 * xmlParseNameAndCompare:
3523
 * @ctxt:  an XML parser context
3524
 *
3525
 * parse an XML name and compares for match
3526
 * (specialized for endtag parsing)
3527
 *
3528
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3529
 * and the name for mismatch
3530
 */
3531
3532
static const xmlChar *
3533
43.6M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3534
43.6M
    register const xmlChar *cmp = other;
3535
43.6M
    register const xmlChar *in;
3536
43.6M
    const xmlChar *ret;
3537
3538
43.6M
    GROW;
3539
43.6M
    if (ctxt->instate == XML_PARSER_EOF)
3540
0
        return(NULL);
3541
3542
43.6M
    in = ctxt->input->cur;
3543
231M
    while (*in != 0 && *in == *cmp) {
3544
187M
  ++in;
3545
187M
  ++cmp;
3546
187M
    }
3547
43.6M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3548
  /* success */
3549
37.1M
  ctxt->input->col += in - ctxt->input->cur;
3550
37.1M
  ctxt->input->cur = in;
3551
37.1M
  return (const xmlChar*) 1;
3552
37.1M
    }
3553
    /* failure (or end of input buffer), check with full function */
3554
6.53M
    ret = xmlParseName (ctxt);
3555
    /* strings coming from the dictionary direct compare possible */
3556
6.53M
    if (ret == other) {
3557
127k
  return (const xmlChar*) 1;
3558
127k
    }
3559
6.40M
    return ret;
3560
6.53M
}
3561
3562
/**
3563
 * xmlParseStringName:
3564
 * @ctxt:  an XML parser context
3565
 * @str:  a pointer to the string pointer (IN/OUT)
3566
 *
3567
 * parse an XML name.
3568
 *
3569
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3570
 *                  CombiningChar | Extender
3571
 *
3572
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3573
 *
3574
 * [6] Names ::= Name (#x20 Name)*
3575
 *
3576
 * Returns the Name parsed or NULL. The @str pointer
3577
 * is updated to the current location in the string.
3578
 */
3579
3580
static xmlChar *
3581
3.68M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3582
3.68M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3583
3.68M
    const xmlChar *cur = *str;
3584
3.68M
    int len = 0, l;
3585
3.68M
    int c;
3586
3.68M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3587
2.25M
                    XML_MAX_TEXT_LENGTH :
3588
3.68M
                    XML_MAX_NAME_LENGTH;
3589
3590
#ifdef DEBUG
3591
    nbParseStringName++;
3592
#endif
3593
3594
3.68M
    c = CUR_SCHAR(cur, l);
3595
3.68M
    if (!xmlIsNameStartChar(ctxt, c)) {
3596
1.08M
  return(NULL);
3597
1.08M
    }
3598
3599
2.59M
    COPY_BUF(l,buf,len,c);
3600
2.59M
    cur += l;
3601
2.59M
    c = CUR_SCHAR(cur, l);
3602
24.0M
    while (xmlIsNameChar(ctxt, c)) {
3603
21.4M
  COPY_BUF(l,buf,len,c);
3604
21.4M
  cur += l;
3605
21.4M
  c = CUR_SCHAR(cur, l);
3606
21.4M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3607
      /*
3608
       * Okay someone managed to make a huge name, so he's ready to pay
3609
       * for the processing speed.
3610
       */
3611
7.14k
      xmlChar *buffer;
3612
7.14k
      int max = len * 2;
3613
3614
7.14k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3615
7.14k
      if (buffer == NULL) {
3616
0
          xmlErrMemory(ctxt, NULL);
3617
0
    return(NULL);
3618
0
      }
3619
7.14k
      memcpy(buffer, buf, len);
3620
7.13M
      while (xmlIsNameChar(ctxt, c)) {
3621
7.12M
    if (len + 10 > max) {
3622
18.2k
        xmlChar *tmp;
3623
3624
18.2k
        max *= 2;
3625
18.2k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3626
18.2k
        if (tmp == NULL) {
3627
0
      xmlErrMemory(ctxt, NULL);
3628
0
      xmlFree(buffer);
3629
0
      return(NULL);
3630
0
        }
3631
18.2k
        buffer = tmp;
3632
18.2k
    }
3633
7.12M
    COPY_BUF(l,buffer,len,c);
3634
7.12M
    cur += l;
3635
7.12M
    c = CUR_SCHAR(cur, l);
3636
7.12M
                if (len > maxLength) {
3637
4
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638
4
                    xmlFree(buffer);
3639
4
                    return(NULL);
3640
4
                }
3641
7.12M
      }
3642
7.14k
      buffer[len] = 0;
3643
7.14k
      *str = cur;
3644
7.14k
      return(buffer);
3645
7.14k
  }
3646
21.4M
    }
3647
2.58M
    if (len > maxLength) {
3648
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3649
0
        return(NULL);
3650
0
    }
3651
2.58M
    *str = cur;
3652
2.58M
    return(xmlStrndup(buf, len));
3653
2.58M
}
3654
3655
/**
3656
 * xmlParseNmtoken:
3657
 * @ctxt:  an XML parser context
3658
 *
3659
 * DEPRECATED: Internal function, don't use.
3660
 *
3661
 * parse an XML Nmtoken.
3662
 *
3663
 * [7] Nmtoken ::= (NameChar)+
3664
 *
3665
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3666
 *
3667
 * Returns the Nmtoken parsed or NULL
3668
 */
3669
3670
xmlChar *
3671
665k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3672
665k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3673
665k
    int len = 0, l;
3674
665k
    int c;
3675
665k
    int count = 0;
3676
665k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3677
210k
                    XML_MAX_TEXT_LENGTH :
3678
665k
                    XML_MAX_NAME_LENGTH;
3679
3680
#ifdef DEBUG
3681
    nbParseNmToken++;
3682
#endif
3683
3684
665k
    GROW;
3685
665k
    if (ctxt->instate == XML_PARSER_EOF)
3686
0
        return(NULL);
3687
665k
    c = CUR_CHAR(l);
3688
3689
4.07M
    while (xmlIsNameChar(ctxt, c)) {
3690
3.41M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3691
0
      count = 0;
3692
0
      GROW;
3693
0
  }
3694
3.41M
  COPY_BUF(l,buf,len,c);
3695
3.41M
  NEXTL(l);
3696
3.41M
  c = CUR_CHAR(l);
3697
3.41M
  if (c == 0) {
3698
755
      count = 0;
3699
755
      GROW;
3700
755
      if (ctxt->instate == XML_PARSER_EOF)
3701
0
    return(NULL);
3702
755
            c = CUR_CHAR(l);
3703
755
  }
3704
3.41M
  if (len >= XML_MAX_NAMELEN) {
3705
      /*
3706
       * Okay someone managed to make a huge token, so he's ready to pay
3707
       * for the processing speed.
3708
       */
3709
3.47k
      xmlChar *buffer;
3710
3.47k
      int max = len * 2;
3711
3712
3.47k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3713
3.47k
      if (buffer == NULL) {
3714
0
          xmlErrMemory(ctxt, NULL);
3715
0
    return(NULL);
3716
0
      }
3717
3.47k
      memcpy(buffer, buf, len);
3718
5.18M
      while (xmlIsNameChar(ctxt, c)) {
3719
5.17M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3720
52.5k
        count = 0;
3721
52.5k
        GROW;
3722
52.5k
                    if (ctxt->instate == XML_PARSER_EOF) {
3723
0
                        xmlFree(buffer);
3724
0
                        return(NULL);
3725
0
                    }
3726
52.5k
    }
3727
5.17M
    if (len + 10 > max) {
3728
6.35k
        xmlChar *tmp;
3729
3730
6.35k
        max *= 2;
3731
6.35k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3732
6.35k
        if (tmp == NULL) {
3733
0
      xmlErrMemory(ctxt, NULL);
3734
0
      xmlFree(buffer);
3735
0
      return(NULL);
3736
0
        }
3737
6.35k
        buffer = tmp;
3738
6.35k
    }
3739
5.17M
    COPY_BUF(l,buffer,len,c);
3740
5.17M
    NEXTL(l);
3741
5.17M
    c = CUR_CHAR(l);
3742
5.17M
                if (len > maxLength) {
3743
12
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3744
12
                    xmlFree(buffer);
3745
12
                    return(NULL);
3746
12
                }
3747
5.17M
      }
3748
3.46k
      buffer[len] = 0;
3749
3.46k
      return(buffer);
3750
3.47k
  }
3751
3.41M
    }
3752
662k
    if (len == 0)
3753
41.2k
        return(NULL);
3754
621k
    if (len > maxLength) {
3755
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3756
0
        return(NULL);
3757
0
    }
3758
621k
    return(xmlStrndup(buf, len));
3759
621k
}
3760
3761
/**
3762
 * xmlParseEntityValue:
3763
 * @ctxt:  an XML parser context
3764
 * @orig:  if non-NULL store a copy of the original entity value
3765
 *
3766
 * DEPRECATED: Internal function, don't use.
3767
 *
3768
 * parse a value for ENTITY declarations
3769
 *
3770
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3771
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3772
 *
3773
 * Returns the EntityValue parsed with reference substituted or NULL
3774
 */
3775
3776
xmlChar *
3777
1.36M
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3778
1.36M
    xmlChar *buf = NULL;
3779
1.36M
    int len = 0;
3780
1.36M
    int size = XML_PARSER_BUFFER_SIZE;
3781
1.36M
    int c, l;
3782
1.36M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3783
254k
                    XML_MAX_HUGE_LENGTH :
3784
1.36M
                    XML_MAX_TEXT_LENGTH;
3785
1.36M
    xmlChar stop;
3786
1.36M
    xmlChar *ret = NULL;
3787
1.36M
    const xmlChar *cur = NULL;
3788
1.36M
    xmlParserInputPtr input;
3789
3790
1.36M
    if (RAW == '"') stop = '"';
3791
256k
    else if (RAW == '\'') stop = '\'';
3792
0
    else {
3793
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3794
0
  return(NULL);
3795
0
    }
3796
1.36M
    buf = (xmlChar *) xmlMallocAtomic(size);
3797
1.36M
    if (buf == NULL) {
3798
0
  xmlErrMemory(ctxt, NULL);
3799
0
  return(NULL);
3800
0
    }
3801
3802
    /*
3803
     * The content of the entity definition is copied in a buffer.
3804
     */
3805
3806
1.36M
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3807
1.36M
    input = ctxt->input;
3808
1.36M
    GROW;
3809
1.36M
    if (ctxt->instate == XML_PARSER_EOF)
3810
0
        goto error;
3811
1.36M
    NEXT;
3812
1.36M
    c = CUR_CHAR(l);
3813
    /*
3814
     * NOTE: 4.4.5 Included in Literal
3815
     * When a parameter entity reference appears in a literal entity
3816
     * value, ... a single or double quote character in the replacement
3817
     * text is always treated as a normal data character and will not
3818
     * terminate the literal.
3819
     * In practice it means we stop the loop only when back at parsing
3820
     * the initial entity and the quote is found
3821
     */
3822
69.6M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3823
69.6M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3824
68.3M
  if (len + 5 >= size) {
3825
211k
      xmlChar *tmp;
3826
3827
211k
      size *= 2;
3828
211k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3829
211k
      if (tmp == NULL) {
3830
0
    xmlErrMemory(ctxt, NULL);
3831
0
                goto error;
3832
0
      }
3833
211k
      buf = tmp;
3834
211k
  }
3835
68.3M
  COPY_BUF(l,buf,len,c);
3836
68.3M
  NEXTL(l);
3837
3838
68.3M
  GROW;
3839
68.3M
  c = CUR_CHAR(l);
3840
68.3M
  if (c == 0) {
3841
1.47k
      GROW;
3842
1.47k
      c = CUR_CHAR(l);
3843
1.47k
  }
3844
3845
68.3M
        if (len > maxLength) {
3846
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3847
0
                           "entity value too long\n");
3848
0
            goto error;
3849
0
        }
3850
68.3M
    }
3851
1.36M
    buf[len] = 0;
3852
1.36M
    if (ctxt->instate == XML_PARSER_EOF)
3853
0
        goto error;
3854
1.36M
    if (c != stop) {
3855
2.63k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3856
2.63k
        goto error;
3857
2.63k
    }
3858
1.36M
    NEXT;
3859
3860
    /*
3861
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3862
     * reference constructs. Note Charref will be handled in
3863
     * xmlStringDecodeEntities()
3864
     */
3865
1.36M
    cur = buf;
3866
54.2M
    while (*cur != 0) { /* non input consuming */
3867
52.9M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3868
771k
      xmlChar *name;
3869
771k
      xmlChar tmp = *cur;
3870
771k
            int nameOk = 0;
3871
3872
771k
      cur++;
3873
771k
      name = xmlParseStringName(ctxt, &cur);
3874
771k
            if (name != NULL) {
3875
768k
                nameOk = 1;
3876
768k
                xmlFree(name);
3877
768k
            }
3878
771k
            if ((nameOk == 0) || (*cur != ';')) {
3879
7.25k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3880
7.25k
      "EntityValue: '%c' forbidden except for entities references\n",
3881
7.25k
                            tmp);
3882
7.25k
                goto error;
3883
7.25k
      }
3884
763k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3885
763k
    (ctxt->inputNr == 1)) {
3886
967
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3887
967
                goto error;
3888
967
      }
3889
763k
      if (*cur == 0)
3890
0
          break;
3891
763k
  }
3892
52.9M
  cur++;
3893
52.9M
    }
3894
3895
    /*
3896
     * Then PEReference entities are substituted.
3897
     *
3898
     * NOTE: 4.4.7 Bypassed
3899
     * When a general entity reference appears in the EntityValue in
3900
     * an entity declaration, it is bypassed and left as is.
3901
     * so XML_SUBSTITUTE_REF is not set here.
3902
     */
3903
1.35M
    ++ctxt->depth;
3904
1.35M
    ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3905
1.35M
                                  0, 0, 0);
3906
1.35M
    --ctxt->depth;
3907
1.35M
    if (orig != NULL) {
3908
1.35M
        *orig = buf;
3909
1.35M
        buf = NULL;
3910
1.35M
    }
3911
3912
1.36M
error:
3913
1.36M
    if (buf != NULL)
3914
10.8k
        xmlFree(buf);
3915
1.36M
    return(ret);
3916
1.35M
}
3917
3918
/**
3919
 * xmlParseAttValueComplex:
3920
 * @ctxt:  an XML parser context
3921
 * @len:   the resulting attribute len
3922
 * @normalize:  whether to apply the inner normalization
3923
 *
3924
 * parse a value for an attribute, this is the fallback function
3925
 * of xmlParseAttValue() when the attribute parsing requires handling
3926
 * of non-ASCII characters, or normalization compaction.
3927
 *
3928
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3929
 */
3930
static xmlChar *
3931
14.2M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3932
14.2M
    xmlChar limit = 0;
3933
14.2M
    xmlChar *buf = NULL;
3934
14.2M
    xmlChar *rep = NULL;
3935
14.2M
    size_t len = 0;
3936
14.2M
    size_t buf_size = 0;
3937
14.2M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3938
12.5M
                       XML_MAX_HUGE_LENGTH :
3939
14.2M
                       XML_MAX_TEXT_LENGTH;
3940
14.2M
    int c, l, in_space = 0;
3941
14.2M
    xmlChar *current = NULL;
3942
14.2M
    xmlEntityPtr ent;
3943
3944
14.2M
    if (NXT(0) == '"') {
3945
5.21M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3946
5.21M
  limit = '"';
3947
5.21M
        NEXT;
3948
9.00M
    } else if (NXT(0) == '\'') {
3949
9.00M
  limit = '\'';
3950
9.00M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3951
9.00M
        NEXT;
3952
9.00M
    } else {
3953
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3954
0
  return(NULL);
3955
0
    }
3956
3957
    /*
3958
     * allocate a translation buffer.
3959
     */
3960
14.2M
    buf_size = XML_PARSER_BUFFER_SIZE;
3961
14.2M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3962
14.2M
    if (buf == NULL) goto mem_error;
3963
3964
    /*
3965
     * OK loop until we reach one of the ending char or a size limit.
3966
     */
3967
14.2M
    c = CUR_CHAR(l);
3968
517M
    while (((NXT(0) != limit) && /* checked */
3969
517M
            (IS_CHAR(c)) && (c != '<')) &&
3970
517M
            (ctxt->instate != XML_PARSER_EOF)) {
3971
503M
  if (c == '&') {
3972
21.1M
      in_space = 0;
3973
21.1M
      if (NXT(1) == '#') {
3974
4.83M
    int val = xmlParseCharRef(ctxt);
3975
3976
4.83M
    if (val == '&') {
3977
77.8k
        if (ctxt->replaceEntities) {
3978
6.12k
      if (len + 10 > buf_size) {
3979
278
          growBuffer(buf, 10);
3980
278
      }
3981
6.12k
      buf[len++] = '&';
3982
71.6k
        } else {
3983
      /*
3984
       * The reparsing will be done in xmlStringGetNodeList()
3985
       * called by the attribute() function in SAX.c
3986
       */
3987
71.6k
      if (len + 10 > buf_size) {
3988
288
          growBuffer(buf, 10);
3989
288
      }
3990
71.6k
      buf[len++] = '&';
3991
71.6k
      buf[len++] = '#';
3992
71.6k
      buf[len++] = '3';
3993
71.6k
      buf[len++] = '8';
3994
71.6k
      buf[len++] = ';';
3995
71.6k
        }
3996
4.75M
    } else if (val != 0) {
3997
4.37M
        if (len + 10 > buf_size) {
3998
3.85k
      growBuffer(buf, 10);
3999
3.85k
        }
4000
4.37M
        len += xmlCopyChar(0, &buf[len], val);
4001
4.37M
    }
4002
16.3M
      } else {
4003
16.3M
    ent = xmlParseEntityRef(ctxt);
4004
16.3M
    ctxt->nbentities++;
4005
16.3M
    if (ent != NULL)
4006
5.74M
        ctxt->nbentities += ent->owner;
4007
16.3M
    if ((ent != NULL) &&
4008
16.3M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4009
5.54M
        if (len + 10 > buf_size) {
4010
336
      growBuffer(buf, 10);
4011
336
        }
4012
5.54M
        if ((ctxt->replaceEntities == 0) &&
4013
5.54M
            (ent->content[0] == '&')) {
4014
2.30M
      buf[len++] = '&';
4015
2.30M
      buf[len++] = '#';
4016
2.30M
      buf[len++] = '3';
4017
2.30M
      buf[len++] = '8';
4018
2.30M
      buf[len++] = ';';
4019
3.23M
        } else {
4020
3.23M
      buf[len++] = ent->content[0];
4021
3.23M
        }
4022
10.7M
    } else if ((ent != NULL) &&
4023
10.7M
               (ctxt->replaceEntities != 0)) {
4024
71.4k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4025
71.4k
      ++ctxt->depth;
4026
71.4k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4027
71.4k
                  XML_SUBSTITUTE_REF,
4028
71.4k
                  0, 0, 0);
4029
71.4k
      --ctxt->depth;
4030
71.4k
      if (rep != NULL) {
4031
69.4k
          current = rep;
4032
6.06M
          while (*current != 0) { /* non input consuming */
4033
6.00M
                                if ((*current == 0xD) || (*current == 0xA) ||
4034
6.00M
                                    (*current == 0x9)) {
4035
91.7k
                                    buf[len++] = 0x20;
4036
91.7k
                                    current++;
4037
91.7k
                                } else
4038
5.90M
                                    buf[len++] = *current++;
4039
6.00M
        if (len + 10 > buf_size) {
4040
9.11k
            growBuffer(buf, 10);
4041
9.11k
        }
4042
6.00M
          }
4043
69.4k
          xmlFree(rep);
4044
69.4k
          rep = NULL;
4045
69.4k
      }
4046
71.4k
        } else {
4047
0
      if (len + 10 > buf_size) {
4048
0
          growBuffer(buf, 10);
4049
0
      }
4050
0
      if (ent->content != NULL)
4051
0
          buf[len++] = ent->content[0];
4052
0
        }
4053
10.7M
    } else if (ent != NULL) {
4054
130k
        int i = xmlStrlen(ent->name);
4055
130k
        const xmlChar *cur = ent->name;
4056
4057
        /*
4058
         * This may look absurd but is needed to detect
4059
         * entities problems
4060
         */
4061
130k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4062
130k
      (ent->content != NULL) && (ent->checked == 0)) {
4063
19.7k
      unsigned long oldnbent = ctxt->nbentities, diff;
4064
4065
19.7k
      ++ctxt->depth;
4066
19.7k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4067
19.7k
              XML_SUBSTITUTE_REF, 0, 0, 0);
4068
19.7k
      --ctxt->depth;
4069
4070
19.7k
                        diff = ctxt->nbentities - oldnbent + 1;
4071
19.7k
                        if (diff > INT_MAX / 2)
4072
0
                            diff = INT_MAX / 2;
4073
19.7k
                        ent->checked = diff * 2;
4074
19.7k
      if (rep != NULL) {
4075
19.4k
          if (xmlStrchr(rep, '<'))
4076
973
              ent->checked |= 1;
4077
19.4k
          xmlFree(rep);
4078
19.4k
          rep = NULL;
4079
19.4k
      } else {
4080
276
                            ent->content[0] = 0;
4081
276
                        }
4082
19.7k
        }
4083
4084
        /*
4085
         * Just output the reference
4086
         */
4087
130k
        buf[len++] = '&';
4088
131k
        while (len + i + 10 > buf_size) {
4089
786
      growBuffer(buf, i + 10);
4090
786
        }
4091
533k
        for (;i > 0;i--)
4092
402k
      buf[len++] = *cur++;
4093
130k
        buf[len++] = ';';
4094
130k
    }
4095
16.3M
      }
4096
482M
  } else {
4097
482M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4098
122M
          if ((len != 0) || (!normalize)) {
4099
122M
        if ((!normalize) || (!in_space)) {
4100
122M
      COPY_BUF(l,buf,len,0x20);
4101
122M
      while (len + 10 > buf_size) {
4102
401k
          growBuffer(buf, 10);
4103
401k
      }
4104
122M
        }
4105
122M
        in_space = 1;
4106
122M
    }
4107
360M
      } else {
4108
360M
          in_space = 0;
4109
360M
    COPY_BUF(l,buf,len,c);
4110
360M
    if (len + 10 > buf_size) {
4111
1.23M
        growBuffer(buf, 10);
4112
1.23M
    }
4113
360M
      }
4114
482M
      NEXTL(l);
4115
482M
  }
4116
503M
  GROW;
4117
503M
  c = CUR_CHAR(l);
4118
503M
        if (len > maxLength) {
4119
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4120
0
                           "AttValue length too long\n");
4121
0
            goto mem_error;
4122
0
        }
4123
503M
    }
4124
14.2M
    if (ctxt->instate == XML_PARSER_EOF)
4125
0
        goto error;
4126
4127
14.2M
    if ((in_space) && (normalize)) {
4128
52.3k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4129
25.6k
    }
4130
14.2M
    buf[len] = 0;
4131
14.2M
    if (RAW == '<') {
4132
2.79M
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4133
11.4M
    } else if (RAW != limit) {
4134
1.13M
  if ((c != 0) && (!IS_CHAR(c))) {
4135
678k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4136
678k
         "invalid character in attribute value\n");
4137
678k
  } else {
4138
454k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4139
454k
         "AttValue: ' expected\n");
4140
454k
        }
4141
1.13M
    } else
4142
10.2M
  NEXT;
4143
4144
14.2M
    if (attlen != NULL) *attlen = len;
4145
14.2M
    return(buf);
4146
4147
0
mem_error:
4148
0
    xmlErrMemory(ctxt, NULL);
4149
0
error:
4150
0
    if (buf != NULL)
4151
0
        xmlFree(buf);
4152
0
    if (rep != NULL)
4153
0
        xmlFree(rep);
4154
0
    return(NULL);
4155
0
}
4156
4157
/**
4158
 * xmlParseAttValue:
4159
 * @ctxt:  an XML parser context
4160
 *
4161
 * DEPRECATED: Internal function, don't use.
4162
 *
4163
 * parse a value for an attribute
4164
 * Note: the parser won't do substitution of entities here, this
4165
 * will be handled later in xmlStringGetNodeList
4166
 *
4167
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4168
 *                   "'" ([^<&'] | Reference)* "'"
4169
 *
4170
 * 3.3.3 Attribute-Value Normalization:
4171
 * Before the value of an attribute is passed to the application or
4172
 * checked for validity, the XML processor must normalize it as follows:
4173
 * - a character reference is processed by appending the referenced
4174
 *   character to the attribute value
4175
 * - an entity reference is processed by recursively processing the
4176
 *   replacement text of the entity
4177
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4178
 *   appending #x20 to the normalized value, except that only a single
4179
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4180
 *   parsed entity or the literal entity value of an internal parsed entity
4181
 * - other characters are processed by appending them to the normalized value
4182
 * If the declared value is not CDATA, then the XML processor must further
4183
 * process the normalized attribute value by discarding any leading and
4184
 * trailing space (#x20) characters, and by replacing sequences of space
4185
 * (#x20) characters by a single space (#x20) character.
4186
 * All attributes for which no declaration has been read should be treated
4187
 * by a non-validating parser as if declared CDATA.
4188
 *
4189
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4190
 */
4191
4192
4193
xmlChar *
4194
89.6M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4195
89.6M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4196
89.6M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4197
89.6M
}
4198
4199
/**
4200
 * xmlParseSystemLiteral:
4201
 * @ctxt:  an XML parser context
4202
 *
4203
 * DEPRECATED: Internal function, don't use.
4204
 *
4205
 * parse an XML Literal
4206
 *
4207
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4208
 *
4209
 * Returns the SystemLiteral parsed or NULL
4210
 */
4211
4212
xmlChar *
4213
255k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4214
255k
    xmlChar *buf = NULL;
4215
255k
    int len = 0;
4216
255k
    int size = XML_PARSER_BUFFER_SIZE;
4217
255k
    int cur, l;
4218
255k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4219
56.4k
                    XML_MAX_TEXT_LENGTH :
4220
255k
                    XML_MAX_NAME_LENGTH;
4221
255k
    xmlChar stop;
4222
255k
    int state = ctxt->instate;
4223
255k
    int count = 0;
4224
4225
255k
    SHRINK;
4226
255k
    if (RAW == '"') {
4227
233k
        NEXT;
4228
233k
  stop = '"';
4229
233k
    } else if (RAW == '\'') {
4230
17.4k
        NEXT;
4231
17.4k
  stop = '\'';
4232
17.4k
    } else {
4233
5.30k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4234
5.30k
  return(NULL);
4235
5.30k
    }
4236
4237
250k
    buf = (xmlChar *) xmlMallocAtomic(size);
4238
250k
    if (buf == NULL) {
4239
0
        xmlErrMemory(ctxt, NULL);
4240
0
  return(NULL);
4241
0
    }
4242
250k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4243
250k
    cur = CUR_CHAR(l);
4244
9.16M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4245
8.91M
  if (len + 5 >= size) {
4246
9.06k
      xmlChar *tmp;
4247
4248
9.06k
      size *= 2;
4249
9.06k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4250
9.06k
      if (tmp == NULL) {
4251
0
          xmlFree(buf);
4252
0
    xmlErrMemory(ctxt, NULL);
4253
0
    ctxt->instate = (xmlParserInputState) state;
4254
0
    return(NULL);
4255
0
      }
4256
9.06k
      buf = tmp;
4257
9.06k
  }
4258
8.91M
  count++;
4259
8.91M
  if (count > 50) {
4260
88.0k
      SHRINK;
4261
88.0k
      GROW;
4262
88.0k
      count = 0;
4263
88.0k
            if (ctxt->instate == XML_PARSER_EOF) {
4264
0
          xmlFree(buf);
4265
0
    return(NULL);
4266
0
            }
4267
88.0k
  }
4268
8.91M
  COPY_BUF(l,buf,len,cur);
4269
8.91M
  NEXTL(l);
4270
8.91M
  cur = CUR_CHAR(l);
4271
8.91M
  if (cur == 0) {
4272
3.36k
      GROW;
4273
3.36k
      SHRINK;
4274
3.36k
      cur = CUR_CHAR(l);
4275
3.36k
  }
4276
8.91M
        if (len > maxLength) {
4277
12
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4278
12
            xmlFree(buf);
4279
12
            ctxt->instate = (xmlParserInputState) state;
4280
12
            return(NULL);
4281
12
        }
4282
8.91M
    }
4283
250k
    buf[len] = 0;
4284
250k
    ctxt->instate = (xmlParserInputState) state;
4285
250k
    if (!IS_CHAR(cur)) {
4286
4.75k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4287
245k
    } else {
4288
245k
  NEXT;
4289
245k
    }
4290
250k
    return(buf);
4291
250k
}
4292
4293
/**
4294
 * xmlParsePubidLiteral:
4295
 * @ctxt:  an XML parser context
4296
 *
4297
 * DEPRECATED: Internal function, don't use.
4298
 *
4299
 * parse an XML public literal
4300
 *
4301
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4302
 *
4303
 * Returns the PubidLiteral parsed or NULL.
4304
 */
4305
4306
xmlChar *
4307
70.9k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4308
70.9k
    xmlChar *buf = NULL;
4309
70.9k
    int len = 0;
4310
70.9k
    int size = XML_PARSER_BUFFER_SIZE;
4311
70.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4312
16.7k
                    XML_MAX_TEXT_LENGTH :
4313
70.9k
                    XML_MAX_NAME_LENGTH;
4314
70.9k
    xmlChar cur;
4315
70.9k
    xmlChar stop;
4316
70.9k
    int count = 0;
4317
70.9k
    xmlParserInputState oldstate = ctxt->instate;
4318
4319
70.9k
    SHRINK;
4320
70.9k
    if (RAW == '"') {
4321
62.5k
        NEXT;
4322
62.5k
  stop = '"';
4323
62.5k
    } else if (RAW == '\'') {
4324
7.72k
        NEXT;
4325
7.72k
  stop = '\'';
4326
7.72k
    } else {
4327
737
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4328
737
  return(NULL);
4329
737
    }
4330
70.2k
    buf = (xmlChar *) xmlMallocAtomic(size);
4331
70.2k
    if (buf == NULL) {
4332
0
  xmlErrMemory(ctxt, NULL);
4333
0
  return(NULL);
4334
0
    }
4335
70.2k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4336
70.2k
    cur = CUR;
4337
3.93M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4338
3.86M
  if (len + 1 >= size) {
4339
4.52k
      xmlChar *tmp;
4340
4341
4.52k
      size *= 2;
4342
4.52k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4343
4.52k
      if (tmp == NULL) {
4344
0
    xmlErrMemory(ctxt, NULL);
4345
0
    xmlFree(buf);
4346
0
    return(NULL);
4347
0
      }
4348
4.52k
      buf = tmp;
4349
4.52k
  }
4350
3.86M
  buf[len++] = cur;
4351
3.86M
  count++;
4352
3.86M
  if (count > 50) {
4353
40.5k
      SHRINK;
4354
40.5k
      GROW;
4355
40.5k
      count = 0;
4356
40.5k
            if (ctxt->instate == XML_PARSER_EOF) {
4357
0
    xmlFree(buf);
4358
0
    return(NULL);
4359
0
            }
4360
40.5k
  }
4361
3.86M
  NEXT;
4362
3.86M
  cur = CUR;
4363
3.86M
  if (cur == 0) {
4364
766
      GROW;
4365
766
      SHRINK;
4366
766
      cur = CUR;
4367
766
  }
4368
3.86M
        if (len > maxLength) {
4369
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4370
0
            xmlFree(buf);
4371
0
            return(NULL);
4372
0
        }
4373
3.86M
    }
4374
70.2k
    buf[len] = 0;
4375
70.2k
    if (cur != stop) {
4376
3.01k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4377
67.2k
    } else {
4378
67.2k
  NEXT;
4379
67.2k
    }
4380
70.2k
    ctxt->instate = oldstate;
4381
70.2k
    return(buf);
4382
70.2k
}
4383
4384
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4385
4386
/*
4387
 * used for the test in the inner loop of the char data testing
4388
 */
4389
static const unsigned char test_char_data[256] = {
4390
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4392
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4395
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4396
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4397
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4398
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4399
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4400
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4401
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4402
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4403
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4404
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4405
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4406
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4407
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4416
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4417
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4418
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4420
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4422
};
4423
4424
/**
4425
 * xmlParseCharData:
4426
 * @ctxt:  an XML parser context
4427
 * @cdata:  int indicating whether we are within a CDATA section
4428
 *
4429
 * DEPRECATED: Internal function, don't use.
4430
 *
4431
 * parse a CharData section.
4432
 * if we are within a CDATA section ']]>' marks an end of section.
4433
 *
4434
 * The right angle bracket (>) may be represented using the string "&gt;",
4435
 * and must, for compatibility, be escaped using "&gt;" or a character
4436
 * reference when it appears in the string "]]>" in content, when that
4437
 * string is not marking the end of a CDATA section.
4438
 *
4439
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4440
 */
4441
4442
void
4443
302M
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4444
302M
    const xmlChar *in;
4445
302M
    int nbchar = 0;
4446
302M
    int line = ctxt->input->line;
4447
302M
    int col = ctxt->input->col;
4448
302M
    int ccol;
4449
4450
302M
    SHRINK;
4451
302M
    GROW;
4452
    /*
4453
     * Accelerated common case where input don't need to be
4454
     * modified before passing it to the handler.
4455
     */
4456
302M
    if (!cdata) {
4457
302M
  in = ctxt->input->cur;
4458
328M
  do {
4459
413M
get_more_space:
4460
955M
      while (*in == 0x20) { in++; ctxt->input->col++; }
4461
413M
      if (*in == 0xA) {
4462
87.3M
    do {
4463
87.3M
        ctxt->input->line++; ctxt->input->col = 1;
4464
87.3M
        in++;
4465
87.3M
    } while (*in == 0xA);
4466
84.4M
    goto get_more_space;
4467
84.4M
      }
4468
328M
      if (*in == '<') {
4469
69.7M
    nbchar = in - ctxt->input->cur;
4470
69.7M
    if (nbchar > 0) {
4471
69.6M
        const xmlChar *tmp = ctxt->input->cur;
4472
69.6M
        ctxt->input->cur = in;
4473
4474
69.6M
        if ((ctxt->sax != NULL) &&
4475
69.6M
            (ctxt->sax->ignorableWhitespace !=
4476
69.6M
             ctxt->sax->characters)) {
4477
43.5M
      if (areBlanks(ctxt, tmp, nbchar, 1)) {
4478
3.20M
          if (ctxt->sax->ignorableWhitespace != NULL)
4479
3.20M
        ctxt->sax->ignorableWhitespace(ctxt->userData,
4480
3.20M
                   tmp, nbchar);
4481
40.3M
      } else {
4482
40.3M
          if (ctxt->sax->characters != NULL)
4483
40.3M
        ctxt->sax->characters(ctxt->userData,
4484
40.3M
                  tmp, nbchar);
4485
40.3M
          if (*ctxt->space == -1)
4486
12.6M
              *ctxt->space = -2;
4487
40.3M
      }
4488
43.5M
        } else if ((ctxt->sax != NULL) &&
4489
26.0M
                   (ctxt->sax->characters != NULL)) {
4490
26.0M
      ctxt->sax->characters(ctxt->userData,
4491
26.0M
                tmp, nbchar);
4492
26.0M
        }
4493
69.6M
    }
4494
69.7M
    return;
4495
69.7M
      }
4496
4497
328M
get_more:
4498
328M
            ccol = ctxt->input->col;
4499
3.80G
      while (test_char_data[*in]) {
4500
3.47G
    in++;
4501
3.47G
    ccol++;
4502
3.47G
      }
4503
328M
      ctxt->input->col = ccol;
4504
328M
      if (*in == 0xA) {
4505
164M
    do {
4506
164M
        ctxt->input->line++; ctxt->input->col = 1;
4507
164M
        in++;
4508
164M
    } while (*in == 0xA);
4509
66.5M
    goto get_more;
4510
66.5M
      }
4511
262M
      if (*in == ']') {
4512
3.28M
    if ((in[1] == ']') && (in[2] == '>')) {
4513
239k
        xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4514
239k
        ctxt->input->cur = in + 1;
4515
239k
        return;
4516
239k
    }
4517
3.04M
    in++;
4518
3.04M
    ctxt->input->col++;
4519
3.04M
    goto get_more;
4520
3.28M
      }
4521
259M
      nbchar = in - ctxt->input->cur;
4522
259M
      if (nbchar > 0) {
4523
137M
    if ((ctxt->sax != NULL) &&
4524
137M
        (ctxt->sax->ignorableWhitespace !=
4525
137M
         ctxt->sax->characters) &&
4526
137M
        (IS_BLANK_CH(*ctxt->input->cur))) {
4527
44.5M
        const xmlChar *tmp = ctxt->input->cur;
4528
44.5M
        ctxt->input->cur = in;
4529
4530
44.5M
        if (areBlanks(ctxt, tmp, nbchar, 0)) {
4531
1.87M
            if (ctxt->sax->ignorableWhitespace != NULL)
4532
1.87M
          ctxt->sax->ignorableWhitespace(ctxt->userData,
4533
1.87M
                 tmp, nbchar);
4534
42.6M
        } else {
4535
42.6M
            if (ctxt->sax->characters != NULL)
4536
42.6M
          ctxt->sax->characters(ctxt->userData,
4537
42.6M
              tmp, nbchar);
4538
42.6M
      if (*ctxt->space == -1)
4539
10.1M
          *ctxt->space = -2;
4540
42.6M
        }
4541
44.5M
                    line = ctxt->input->line;
4542
44.5M
                    col = ctxt->input->col;
4543
92.9M
    } else if (ctxt->sax != NULL) {
4544
92.9M
        if (ctxt->sax->characters != NULL)
4545
92.9M
      ctxt->sax->characters(ctxt->userData,
4546
92.9M
                ctxt->input->cur, nbchar);
4547
92.9M
                    line = ctxt->input->line;
4548
92.9M
                    col = ctxt->input->col;
4549
92.9M
    }
4550
                /* something really bad happened in the SAX callback */
4551
137M
                if (ctxt->instate != XML_PARSER_CONTENT)
4552
0
                    return;
4553
137M
      }
4554
259M
      ctxt->input->cur = in;
4555
259M
      if (*in == 0xD) {
4556
27.2M
    in++;
4557
27.2M
    if (*in == 0xA) {
4558
27.0M
        ctxt->input->cur = in;
4559
27.0M
        in++;
4560
27.0M
        ctxt->input->line++; ctxt->input->col = 1;
4561
27.0M
        continue; /* while */
4562
27.0M
    }
4563
221k
    in--;
4564
221k
      }
4565
232M
      if (*in == '<') {
4566
100M
    return;
4567
100M
      }
4568
131M
      if (*in == '&') {
4569
19.6M
    return;
4570
19.6M
      }
4571
112M
      SHRINK;
4572
112M
      GROW;
4573
112M
            if (ctxt->instate == XML_PARSER_EOF)
4574
0
    return;
4575
112M
      in = ctxt->input->cur;
4576
139M
  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4577
112M
  nbchar = 0;
4578
112M
    }
4579
112M
    ctxt->input->line = line;
4580
112M
    ctxt->input->col = col;
4581
112M
    xmlParseCharDataComplex(ctxt, cdata);
4582
112M
}
4583
4584
/**
4585
 * xmlParseCharDataComplex:
4586
 * @ctxt:  an XML parser context
4587
 * @cdata:  int indicating whether we are within a CDATA section
4588
 *
4589
 * parse a CharData section.this is the fallback function
4590
 * of xmlParseCharData() when the parsing requires handling
4591
 * of non-ASCII characters.
4592
 */
4593
static void
4594
112M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4595
112M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4596
112M
    int nbchar = 0;
4597
112M
    int cur, l;
4598
112M
    int count = 0;
4599
4600
112M
    SHRINK;
4601
112M
    GROW;
4602
112M
    cur = CUR_CHAR(l);
4603
2.00G
    while ((cur != '<') && /* checked */
4604
2.00G
           (cur != '&') &&
4605
2.00G
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4606
1.89G
  if ((cur == ']') && (NXT(1) == ']') &&
4607
1.89G
      (NXT(2) == '>')) {
4608
595k
      if (cdata) break;
4609
595k
      else {
4610
595k
    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4611
595k
      }
4612
595k
  }
4613
1.89G
  COPY_BUF(l,buf,nbchar,cur);
4614
  /* move current position before possible calling of ctxt->sax->characters */
4615
1.89G
  NEXTL(l);
4616
1.89G
  cur = CUR_CHAR(l);
4617
1.89G
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4618
4.08M
      buf[nbchar] = 0;
4619
4620
      /*
4621
       * OK the segment is to be consumed as chars.
4622
       */
4623
4.08M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4624
746k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4625
2.79k
        if (ctxt->sax->ignorableWhitespace != NULL)
4626
2.79k
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4627
2.79k
                                     buf, nbchar);
4628
743k
    } else {
4629
743k
        if (ctxt->sax->characters != NULL)
4630
743k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4631
743k
        if ((ctxt->sax->characters !=
4632
743k
             ctxt->sax->ignorableWhitespace) &&
4633
743k
      (*ctxt->space == -1))
4634
14.9k
      *ctxt->space = -2;
4635
743k
    }
4636
746k
      }
4637
4.08M
      nbchar = 0;
4638
            /* something really bad happened in the SAX callback */
4639
4.08M
            if (ctxt->instate != XML_PARSER_CONTENT)
4640
0
                return;
4641
4.08M
  }
4642
1.89G
  count++;
4643
1.89G
  if (count > 50) {
4644
31.0M
      SHRINK;
4645
31.0M
      GROW;
4646
31.0M
      count = 0;
4647
31.0M
            if (ctxt->instate == XML_PARSER_EOF)
4648
0
    return;
4649
31.0M
  }
4650
1.89G
    }
4651
112M
    if (nbchar != 0) {
4652
18.3M
        buf[nbchar] = 0;
4653
  /*
4654
   * OK the segment is to be consumed as chars.
4655
   */
4656
18.3M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4657
2.77M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4658
2.39k
    if (ctxt->sax->ignorableWhitespace != NULL)
4659
2.39k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4660
2.77M
      } else {
4661
2.77M
    if (ctxt->sax->characters != NULL)
4662
2.77M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4663
2.77M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4664
2.77M
        (*ctxt->space == -1))
4665
512k
        *ctxt->space = -2;
4666
2.77M
      }
4667
2.77M
  }
4668
18.3M
    }
4669
112M
    if ((cur != 0) && (!IS_CHAR(cur))) {
4670
  /* Generate the error and skip the offending character */
4671
95.1M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4672
95.1M
                          "PCDATA invalid Char value %d\n",
4673
95.1M
                    cur);
4674
95.1M
  NEXTL(l);
4675
95.1M
    }
4676
112M
}
4677
4678
/**
4679
 * xmlParseExternalID:
4680
 * @ctxt:  an XML parser context
4681
 * @publicID:  a xmlChar** receiving PubidLiteral
4682
 * @strict: indicate whether we should restrict parsing to only
4683
 *          production [75], see NOTE below
4684
 *
4685
 * DEPRECATED: Internal function, don't use.
4686
 *
4687
 * Parse an External ID or a Public ID
4688
 *
4689
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4690
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4691
 *
4692
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4693
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4694
 *
4695
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4696
 *
4697
 * Returns the function returns SystemLiteral and in the second
4698
 *                case publicID receives PubidLiteral, is strict is off
4699
 *                it is possible to return NULL and have publicID set.
4700
 */
4701
4702
xmlChar *
4703
533k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4704
533k
    xmlChar *URI = NULL;
4705
4706
533k
    SHRINK;
4707
4708
533k
    *publicID = NULL;
4709
533k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4710
188k
        SKIP(6);
4711
188k
  if (SKIP_BLANKS == 0) {
4712
548
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4713
548
                     "Space required after 'SYSTEM'\n");
4714
548
  }
4715
188k
  URI = xmlParseSystemLiteral(ctxt);
4716
188k
  if (URI == NULL) {
4717
728
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4718
728
        }
4719
344k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4720
70.9k
        SKIP(6);
4721
70.9k
  if (SKIP_BLANKS == 0) {
4722
506
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4723
506
        "Space required after 'PUBLIC'\n");
4724
506
  }
4725
70.9k
  *publicID = xmlParsePubidLiteral(ctxt);
4726
70.9k
  if (*publicID == NULL) {
4727
737
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4728
737
  }
4729
70.9k
  if (strict) {
4730
      /*
4731
       * We don't handle [83] so "S SystemLiteral" is required.
4732
       */
4733
67.3k
      if (SKIP_BLANKS == 0) {
4734
4.29k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4735
4.29k
      "Space required after the Public Identifier\n");
4736
4.29k
      }
4737
67.3k
  } else {
4738
      /*
4739
       * We handle [83] so we return immediately, if
4740
       * "S SystemLiteral" is not detected. We skip blanks if no
4741
             * system literal was found, but this is harmless since we must
4742
             * be at the end of a NotationDecl.
4743
       */
4744
3.65k
      if (SKIP_BLANKS == 0) return(NULL);
4745
297
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4746
297
  }
4747
67.4k
  URI = xmlParseSystemLiteral(ctxt);
4748
67.4k
  if (URI == NULL) {
4749
4.58k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4750
4.58k
        }
4751
67.4k
    }
4752
529k
    return(URI);
4753
533k
}
4754
4755
/**
4756
 * xmlParseCommentComplex:
4757
 * @ctxt:  an XML parser context
4758
 * @buf:  the already parsed part of the buffer
4759
 * @len:  number of bytes in the buffer
4760
 * @size:  allocated size of the buffer
4761
 *
4762
 * Skip an XML (SGML) comment <!-- .... -->
4763
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4764
 *  must not occur within comments. "
4765
 * This is the slow routine in case the accelerator for ascii didn't work
4766
 *
4767
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4768
 */
4769
static void
4770
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4771
3.13M
                       size_t len, size_t size) {
4772
3.13M
    int q, ql;
4773
3.13M
    int r, rl;
4774
3.13M
    int cur, l;
4775
3.13M
    size_t count = 0;
4776
3.13M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4777
2.86M
                       XML_MAX_HUGE_LENGTH :
4778
3.13M
                       XML_MAX_TEXT_LENGTH;
4779
3.13M
    int inputid;
4780
4781
3.13M
    inputid = ctxt->input->id;
4782
4783
3.13M
    if (buf == NULL) {
4784
74.1k
        len = 0;
4785
74.1k
  size = XML_PARSER_BUFFER_SIZE;
4786
74.1k
  buf = (xmlChar *) xmlMallocAtomic(size);
4787
74.1k
  if (buf == NULL) {
4788
0
      xmlErrMemory(ctxt, NULL);
4789
0
      return;
4790
0
  }
4791
74.1k
    }
4792
3.13M
    GROW; /* Assure there's enough input data */
4793
3.13M
    q = CUR_CHAR(ql);
4794
3.13M
    if (q == 0)
4795
287k
        goto not_terminated;
4796
2.84M
    if (!IS_CHAR(q)) {
4797
269k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4798
269k
                          "xmlParseComment: invalid xmlChar value %d\n",
4799
269k
                    q);
4800
269k
  xmlFree (buf);
4801
269k
  return;
4802
269k
    }
4803
2.57M
    NEXTL(ql);
4804
2.57M
    r = CUR_CHAR(rl);
4805
2.57M
    if (r == 0)
4806
67.5k
        goto not_terminated;
4807
2.51M
    if (!IS_CHAR(r)) {
4808
50.5k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4809
50.5k
                          "xmlParseComment: invalid xmlChar value %d\n",
4810
50.5k
                    q);
4811
50.5k
  xmlFree (buf);
4812
50.5k
  return;
4813
50.5k
    }
4814
2.46M
    NEXTL(rl);
4815
2.46M
    cur = CUR_CHAR(l);
4816
2.46M
    if (cur == 0)
4817
34.5k
        goto not_terminated;
4818
1.11G
    while (IS_CHAR(cur) && /* checked */
4819
1.11G
           ((cur != '>') ||
4820
1.11G
      (r != '-') || (q != '-'))) {
4821
1.10G
  if ((r == '-') && (q == '-')) {
4822
860k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4823
860k
  }
4824
1.10G
  if (len + 5 >= size) {
4825
3.43M
      xmlChar *new_buf;
4826
3.43M
            size_t new_size;
4827
4828
3.43M
      new_size = size * 2;
4829
3.43M
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4830
3.43M
      if (new_buf == NULL) {
4831
0
    xmlFree (buf);
4832
0
    xmlErrMemory(ctxt, NULL);
4833
0
    return;
4834
0
      }
4835
3.43M
      buf = new_buf;
4836
3.43M
            size = new_size;
4837
3.43M
  }
4838
1.10G
  COPY_BUF(ql,buf,len,q);
4839
1.10G
  q = r;
4840
1.10G
  ql = rl;
4841
1.10G
  r = cur;
4842
1.10G
  rl = l;
4843
4844
1.10G
  count++;
4845
1.10G
  if (count > 50) {
4846
20.7M
      SHRINK;
4847
20.7M
      GROW;
4848
20.7M
      count = 0;
4849
20.7M
            if (ctxt->instate == XML_PARSER_EOF) {
4850
0
    xmlFree(buf);
4851
0
    return;
4852
0
            }
4853
20.7M
  }
4854
1.10G
  NEXTL(l);
4855
1.10G
  cur = CUR_CHAR(l);
4856
1.10G
  if (cur == 0) {
4857
475k
      SHRINK;
4858
475k
      GROW;
4859
475k
      cur = CUR_CHAR(l);
4860
475k
  }
4861
4862
1.10G
        if (len > maxLength) {
4863
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4864
0
                         "Comment too big found", NULL);
4865
0
            xmlFree (buf);
4866
0
            return;
4867
0
        }
4868
1.10G
    }
4869
2.42M
    buf[len] = 0;
4870
2.42M
    if (cur == 0) {
4871
475k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872
475k
                       "Comment not terminated \n<!--%.50s\n", buf);
4873
1.95M
    } else if (!IS_CHAR(cur)) {
4874
764k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4875
764k
                          "xmlParseComment: invalid xmlChar value %d\n",
4876
764k
                    cur);
4877
1.18M
    } else {
4878
1.18M
  if (inputid != ctxt->input->id) {
4879
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4880
0
               "Comment doesn't start and stop in the same"
4881
0
                           " entity\n");
4882
0
  }
4883
1.18M
        NEXT;
4884
1.18M
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4885
1.18M
      (!ctxt->disableSAX))
4886
113k
      ctxt->sax->comment(ctxt->userData, buf);
4887
1.18M
    }
4888
2.42M
    xmlFree(buf);
4889
2.42M
    return;
4890
389k
not_terminated:
4891
389k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4892
389k
       "Comment not terminated\n", NULL);
4893
389k
    xmlFree(buf);
4894
389k
    return;
4895
2.42M
}
4896
4897
/**
4898
 * xmlParseComment:
4899
 * @ctxt:  an XML parser context
4900
 *
4901
 * DEPRECATED: Internal function, don't use.
4902
 *
4903
 * Skip an XML (SGML) comment <!-- .... -->
4904
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4905
 *  must not occur within comments. "
4906
 *
4907
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4908
 */
4909
void
4910
11.6M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4911
11.6M
    xmlChar *buf = NULL;
4912
11.6M
    size_t size = XML_PARSER_BUFFER_SIZE;
4913
11.6M
    size_t len = 0;
4914
11.6M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4915
9.46M
                       XML_MAX_HUGE_LENGTH :
4916
11.6M
                       XML_MAX_TEXT_LENGTH;
4917
11.6M
    xmlParserInputState state;
4918
11.6M
    const xmlChar *in;
4919
11.6M
    size_t nbchar = 0;
4920
11.6M
    int ccol;
4921
11.6M
    int inputid;
4922
4923
    /*
4924
     * Check that there is a comment right here.
4925
     */
4926
11.6M
    if ((RAW != '<') || (NXT(1) != '!') ||
4927
11.6M
        (NXT(2) != '-') || (NXT(3) != '-')) return;
4928
11.6M
    state = ctxt->instate;
4929
11.6M
    ctxt->instate = XML_PARSER_COMMENT;
4930
11.6M
    inputid = ctxt->input->id;
4931
11.6M
    SKIP(4);
4932
11.6M
    SHRINK;
4933
11.6M
    GROW;
4934
4935
    /*
4936
     * Accelerated common case where input don't need to be
4937
     * modified before passing it to the handler.
4938
     */
4939
11.6M
    in = ctxt->input->cur;
4940
11.6M
    do {
4941
11.6M
  if (*in == 0xA) {
4942
929k
      do {
4943
929k
    ctxt->input->line++; ctxt->input->col = 1;
4944
929k
    in++;
4945
929k
      } while (*in == 0xA);
4946
916k
  }
4947
35.6M
get_more:
4948
35.6M
        ccol = ctxt->input->col;
4949
844M
  while (((*in > '-') && (*in <= 0x7F)) ||
4950
844M
         ((*in >= 0x20) && (*in < '-')) ||
4951
844M
         (*in == 0x09)) {
4952
808M
        in++;
4953
808M
        ccol++;
4954
808M
  }
4955
35.6M
  ctxt->input->col = ccol;
4956
35.6M
  if (*in == 0xA) {
4957
12.4M
      do {
4958
12.4M
    ctxt->input->line++; ctxt->input->col = 1;
4959
12.4M
    in++;
4960
12.4M
      } while (*in == 0xA);
4961
12.0M
      goto get_more;
4962
12.0M
  }
4963
23.5M
  nbchar = in - ctxt->input->cur;
4964
  /*
4965
   * save current set of data
4966
   */
4967
23.5M
  if (nbchar > 0) {
4968
23.4M
      if ((ctxt->sax != NULL) &&
4969
23.4M
    (ctxt->sax->comment != NULL)) {
4970
23.4M
    if (buf == NULL) {
4971
11.6M
        if ((*in == '-') && (in[1] == '-'))
4972
6.81M
            size = nbchar + 1;
4973
4.80M
        else
4974
4.80M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4975
11.6M
        buf = (xmlChar *) xmlMallocAtomic(size);
4976
11.6M
        if (buf == NULL) {
4977
0
            xmlErrMemory(ctxt, NULL);
4978
0
      ctxt->instate = state;
4979
0
      return;
4980
0
        }
4981
11.6M
        len = 0;
4982
11.8M
    } else if (len + nbchar + 1 >= size) {
4983
1.25M
        xmlChar *new_buf;
4984
1.25M
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4985
1.25M
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4986
1.25M
        if (new_buf == NULL) {
4987
0
            xmlFree (buf);
4988
0
      xmlErrMemory(ctxt, NULL);
4989
0
      ctxt->instate = state;
4990
0
      return;
4991
0
        }
4992
1.25M
        buf = new_buf;
4993
1.25M
    }
4994
23.4M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4995
23.4M
    len += nbchar;
4996
23.4M
    buf[len] = 0;
4997
23.4M
      }
4998
23.4M
  }
4999
23.5M
        if (len > maxLength) {
5000
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5001
0
                         "Comment too big found", NULL);
5002
0
            xmlFree (buf);
5003
0
            return;
5004
0
        }
5005
23.5M
  ctxt->input->cur = in;
5006
23.5M
  if (*in == 0xA) {
5007
0
      in++;
5008
0
      ctxt->input->line++; ctxt->input->col = 1;
5009
0
  }
5010
23.5M
  if (*in == 0xD) {
5011
1.92M
      in++;
5012
1.92M
      if (*in == 0xA) {
5013
1.87M
    ctxt->input->cur = in;
5014
1.87M
    in++;
5015
1.87M
    ctxt->input->line++; ctxt->input->col = 1;
5016
1.87M
    goto get_more;
5017
1.87M
      }
5018
51.5k
      in--;
5019
51.5k
  }
5020
21.6M
  SHRINK;
5021
21.6M
  GROW;
5022
21.6M
        if (ctxt->instate == XML_PARSER_EOF) {
5023
0
            xmlFree(buf);
5024
0
            return;
5025
0
        }
5026
21.6M
  in = ctxt->input->cur;
5027
21.6M
  if (*in == '-') {
5028
18.5M
      if (in[1] == '-') {
5029
9.81M
          if (in[2] == '>') {
5030
8.55M
        if (ctxt->input->id != inputid) {
5031
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5032
0
                     "comment doesn't start and stop in the"
5033
0
                                       " same entity\n");
5034
0
        }
5035
8.55M
        SKIP(3);
5036
8.55M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5037
8.55M
            (!ctxt->disableSAX)) {
5038
1.67M
      if (buf != NULL)
5039
1.67M
          ctxt->sax->comment(ctxt->userData, buf);
5040
934
      else
5041
934
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5042
1.67M
        }
5043
8.55M
        if (buf != NULL)
5044
8.54M
            xmlFree(buf);
5045
8.55M
        if (ctxt->instate != XML_PARSER_EOF)
5046
8.55M
      ctxt->instate = state;
5047
8.55M
        return;
5048
8.55M
    }
5049
1.26M
    if (buf != NULL) {
5050
1.25M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5051
1.25M
                          "Double hyphen within comment: "
5052
1.25M
                                      "<!--%.50s\n",
5053
1.25M
              buf);
5054
1.25M
    } else
5055
4.56k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5056
4.56k
                          "Double hyphen within comment\n", NULL);
5057
1.26M
                if (ctxt->instate == XML_PARSER_EOF) {
5058
0
                    xmlFree(buf);
5059
0
                    return;
5060
0
                }
5061
1.26M
    in++;
5062
1.26M
    ctxt->input->col++;
5063
1.26M
      }
5064
10.0M
      in++;
5065
10.0M
      ctxt->input->col++;
5066
10.0M
      goto get_more;
5067
18.5M
  }
5068
21.6M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5069
3.13M
    xmlParseCommentComplex(ctxt, buf, len, size);
5070
3.13M
    ctxt->instate = state;
5071
3.13M
    return;
5072
11.6M
}
5073
5074
5075
/**
5076
 * xmlParsePITarget:
5077
 * @ctxt:  an XML parser context
5078
 *
5079
 * DEPRECATED: Internal function, don't use.
5080
 *
5081
 * parse the name of a PI
5082
 *
5083
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5084
 *
5085
 * Returns the PITarget name or NULL
5086
 */
5087
5088
const xmlChar *
5089
4.18M
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5090
4.18M
    const xmlChar *name;
5091
5092
4.18M
    name = xmlParseName(ctxt);
5093
4.18M
    if ((name != NULL) &&
5094
4.18M
        ((name[0] == 'x') || (name[0] == 'X')) &&
5095
4.18M
        ((name[1] == 'm') || (name[1] == 'M')) &&
5096
4.18M
        ((name[2] == 'l') || (name[2] == 'L'))) {
5097
2.04M
  int i;
5098
2.04M
  if ((name[0] == 'x') && (name[1] == 'm') &&
5099
2.04M
      (name[2] == 'l') && (name[3] == 0)) {
5100
1.51M
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5101
1.51M
     "XML declaration allowed only at the start of the document\n");
5102
1.51M
      return(name);
5103
1.51M
  } else if (name[3] == 0) {
5104
9.71k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5105
9.71k
      return(name);
5106
9.71k
  }
5107
956k
  for (i = 0;;i++) {
5108
956k
      if (xmlW3CPIs[i] == NULL) break;
5109
742k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5110
313k
          return(name);
5111
742k
  }
5112
214k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5113
214k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5114
214k
          NULL, NULL);
5115
214k
    }
5116
2.35M
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5117
125k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5118
125k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5119
125k
    }
5120
2.35M
    return(name);
5121
4.18M
}
5122
5123
#ifdef LIBXML_CATALOG_ENABLED
5124
/**
5125
 * xmlParseCatalogPI:
5126
 * @ctxt:  an XML parser context
5127
 * @catalog:  the PI value string
5128
 *
5129
 * parse an XML Catalog Processing Instruction.
5130
 *
5131
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5132
 *
5133
 * Occurs only if allowed by the user and if happening in the Misc
5134
 * part of the document before any doctype information
5135
 * This will add the given catalog to the parsing context in order
5136
 * to be used if there is a resolution need further down in the document
5137
 */
5138
5139
static void
5140
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5141
0
    xmlChar *URL = NULL;
5142
0
    const xmlChar *tmp, *base;
5143
0
    xmlChar marker;
5144
5145
0
    tmp = catalog;
5146
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5147
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5148
0
  goto error;
5149
0
    tmp += 7;
5150
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5151
0
    if (*tmp != '=') {
5152
0
  return;
5153
0
    }
5154
0
    tmp++;
5155
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5156
0
    marker = *tmp;
5157
0
    if ((marker != '\'') && (marker != '"'))
5158
0
  goto error;
5159
0
    tmp++;
5160
0
    base = tmp;
5161
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5162
0
    if (*tmp == 0)
5163
0
  goto error;
5164
0
    URL = xmlStrndup(base, tmp - base);
5165
0
    tmp++;
5166
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5167
0
    if (*tmp != 0)
5168
0
  goto error;
5169
5170
0
    if (URL != NULL) {
5171
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5172
0
  xmlFree(URL);
5173
0
    }
5174
0
    return;
5175
5176
0
error:
5177
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5178
0
            "Catalog PI syntax error: %s\n",
5179
0
      catalog, NULL);
5180
0
    if (URL != NULL)
5181
0
  xmlFree(URL);
5182
0
}
5183
#endif
5184
5185
/**
5186
 * xmlParsePI:
5187
 * @ctxt:  an XML parser context
5188
 *
5189
 * DEPRECATED: Internal function, don't use.
5190
 *
5191
 * parse an XML Processing Instruction.
5192
 *
5193
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5194
 *
5195
 * The processing is transferred to SAX once parsed.
5196
 */
5197
5198
void
5199
4.18M
xmlParsePI(xmlParserCtxtPtr ctxt) {
5200
4.18M
    xmlChar *buf = NULL;
5201
4.18M
    size_t len = 0;
5202
4.18M
    size_t size = XML_PARSER_BUFFER_SIZE;
5203
4.18M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5204
3.71M
                       XML_MAX_HUGE_LENGTH :
5205
4.18M
                       XML_MAX_TEXT_LENGTH;
5206
4.18M
    int cur, l;
5207
4.18M
    const xmlChar *target;
5208
4.18M
    xmlParserInputState state;
5209
4.18M
    int count = 0;
5210
5211
4.18M
    if ((RAW == '<') && (NXT(1) == '?')) {
5212
4.18M
  int inputid = ctxt->input->id;
5213
4.18M
  state = ctxt->instate;
5214
4.18M
        ctxt->instate = XML_PARSER_PI;
5215
  /*
5216
   * this is a Processing Instruction.
5217
   */
5218
4.18M
  SKIP(2);
5219
4.18M
  SHRINK;
5220
5221
  /*
5222
   * Parse the target name and check for special support like
5223
   * namespace.
5224
   */
5225
4.18M
        target = xmlParsePITarget(ctxt);
5226
4.18M
  if (target != NULL) {
5227
4.10M
      if ((RAW == '?') && (NXT(1) == '>')) {
5228
365k
    if (inputid != ctxt->input->id) {
5229
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5230
0
                             "PI declaration doesn't start and stop in"
5231
0
                                   " the same entity\n");
5232
0
    }
5233
365k
    SKIP(2);
5234
5235
    /*
5236
     * SAX: PI detected.
5237
     */
5238
365k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5239
365k
        (ctxt->sax->processingInstruction != NULL))
5240
55.8k
        ctxt->sax->processingInstruction(ctxt->userData,
5241
55.8k
                                         target, NULL);
5242
365k
    if (ctxt->instate != XML_PARSER_EOF)
5243
365k
        ctxt->instate = state;
5244
365k
    return;
5245
365k
      }
5246
3.74M
      buf = (xmlChar *) xmlMallocAtomic(size);
5247
3.74M
      if (buf == NULL) {
5248
0
    xmlErrMemory(ctxt, NULL);
5249
0
    ctxt->instate = state;
5250
0
    return;
5251
0
      }
5252
3.74M
      if (SKIP_BLANKS == 0) {
5253
1.31M
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5254
1.31M
        "ParsePI: PI %s space expected\n", target);
5255
1.31M
      }
5256
3.74M
      cur = CUR_CHAR(l);
5257
538M
      while (IS_CHAR(cur) && /* checked */
5258
538M
       ((cur != '?') || (NXT(1) != '>'))) {
5259
534M
    if (len + 5 >= size) {
5260
1.25M
        xmlChar *tmp;
5261
1.25M
                    size_t new_size = size * 2;
5262
1.25M
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5263
1.25M
        if (tmp == NULL) {
5264
0
      xmlErrMemory(ctxt, NULL);
5265
0
      xmlFree(buf);
5266
0
      ctxt->instate = state;
5267
0
      return;
5268
0
        }
5269
1.25M
        buf = tmp;
5270
1.25M
                    size = new_size;
5271
1.25M
    }
5272
534M
    count++;
5273
534M
    if (count > 50) {
5274
9.12M
        SHRINK;
5275
9.12M
        GROW;
5276
9.12M
                    if (ctxt->instate == XML_PARSER_EOF) {
5277
0
                        xmlFree(buf);
5278
0
                        return;
5279
0
                    }
5280
9.12M
        count = 0;
5281
9.12M
    }
5282
534M
    COPY_BUF(l,buf,len,cur);
5283
534M
    NEXTL(l);
5284
534M
    cur = CUR_CHAR(l);
5285
534M
    if (cur == 0) {
5286
263k
        SHRINK;
5287
263k
        GROW;
5288
263k
        cur = CUR_CHAR(l);
5289
263k
    }
5290
534M
                if (len > maxLength) {
5291
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5292
0
                                      "PI %s too big found", target);
5293
0
                    xmlFree(buf);
5294
0
                    ctxt->instate = state;
5295
0
                    return;
5296
0
                }
5297
534M
      }
5298
3.74M
      buf[len] = 0;
5299
3.74M
      if (cur != '?') {
5300
812k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5301
812k
          "ParsePI: PI %s never end ...\n", target);
5302
2.92M
      } else {
5303
2.92M
    if (inputid != ctxt->input->id) {
5304
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5305
0
                             "PI declaration doesn't start and stop in"
5306
0
                                   " the same entity\n");
5307
0
    }
5308
2.92M
    SKIP(2);
5309
5310
2.92M
#ifdef LIBXML_CATALOG_ENABLED
5311
2.92M
    if (((state == XML_PARSER_MISC) ||
5312
2.92M
               (state == XML_PARSER_START)) &&
5313
2.92M
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5314
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5315
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5316
0
      (allow == XML_CATA_ALLOW_ALL))
5317
0
      xmlParseCatalogPI(ctxt, buf);
5318
0
    }
5319
2.92M
#endif
5320
5321
5322
    /*
5323
     * SAX: PI detected.
5324
     */
5325
2.92M
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5326
2.92M
        (ctxt->sax->processingInstruction != NULL))
5327
181k
        ctxt->sax->processingInstruction(ctxt->userData,
5328
181k
                                         target, buf);
5329
2.92M
      }
5330
3.74M
      xmlFree(buf);
5331
3.74M
  } else {
5332
81.6k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5333
81.6k
  }
5334
3.82M
  if (ctxt->instate != XML_PARSER_EOF)
5335
3.82M
      ctxt->instate = state;
5336
3.82M
    }
5337
4.18M
}
5338
5339
/**
5340
 * xmlParseNotationDecl:
5341
 * @ctxt:  an XML parser context
5342
 *
5343
 * DEPRECATED: Internal function, don't use.
5344
 *
5345
 * parse a notation declaration
5346
 *
5347
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5348
 *
5349
 * Hence there is actually 3 choices:
5350
 *     'PUBLIC' S PubidLiteral
5351
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5352
 * and 'SYSTEM' S SystemLiteral
5353
 *
5354
 * See the NOTE on xmlParseExternalID().
5355
 */
5356
5357
void
5358
10.1k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5359
10.1k
    const xmlChar *name;
5360
10.1k
    xmlChar *Pubid;
5361
10.1k
    xmlChar *Systemid;
5362
5363
10.1k
    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5364
8.00k
  int inputid = ctxt->input->id;
5365
8.00k
  SHRINK;
5366
8.00k
  SKIP(10);
5367
8.00k
  if (SKIP_BLANKS == 0) {
5368
272
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5369
272
         "Space required after '<!NOTATION'\n");
5370
272
      return;
5371
272
  }
5372
5373
7.73k
        name = xmlParseName(ctxt);
5374
7.73k
  if (name == NULL) {
5375
176
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5376
176
      return;
5377
176
  }
5378
7.55k
  if (xmlStrchr(name, ':') != NULL) {
5379
251
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5380
251
         "colons are forbidden from notation names '%s'\n",
5381
251
         name, NULL, NULL);
5382
251
  }
5383
7.55k
  if (SKIP_BLANKS == 0) {
5384
296
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385
296
         "Space required after the NOTATION name'\n");
5386
296
      return;
5387
296
  }
5388
5389
  /*
5390
   * Parse the IDs.
5391
   */
5392
7.26k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5393
7.26k
  SKIP_BLANKS;
5394
5395
7.26k
  if (RAW == '>') {
5396
6.04k
      if (inputid != ctxt->input->id) {
5397
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5398
0
                         "Notation declaration doesn't start and stop"
5399
0
                               " in the same entity\n");
5400
0
      }
5401
6.04k
      NEXT;
5402
6.04k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5403
6.04k
    (ctxt->sax->notationDecl != NULL))
5404
5.14k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5405
6.04k
  } else {
5406
1.22k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5407
1.22k
  }
5408
7.26k
  if (Systemid != NULL) xmlFree(Systemid);
5409
7.26k
  if (Pubid != NULL) xmlFree(Pubid);
5410
7.26k
    }
5411
10.1k
}
5412
5413
/**
5414
 * xmlParseEntityDecl:
5415
 * @ctxt:  an XML parser context
5416
 *
5417
 * DEPRECATED: Internal function, don't use.
5418
 *
5419
 * parse <!ENTITY declarations
5420
 *
5421
 * [70] EntityDecl ::= GEDecl | PEDecl
5422
 *
5423
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5424
 *
5425
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5426
 *
5427
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5428
 *
5429
 * [74] PEDef ::= EntityValue | ExternalID
5430
 *
5431
 * [76] NDataDecl ::= S 'NDATA' S Name
5432
 *
5433
 * [ VC: Notation Declared ]
5434
 * The Name must match the declared name of a notation.
5435
 */
5436
5437
void
5438
1.48M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5439
1.48M
    const xmlChar *name = NULL;
5440
1.48M
    xmlChar *value = NULL;
5441
1.48M
    xmlChar *URI = NULL, *literal = NULL;
5442
1.48M
    const xmlChar *ndata = NULL;
5443
1.48M
    int isParameter = 0;
5444
1.48M
    xmlChar *orig = NULL;
5445
5446
    /* GROW; done in the caller */
5447
1.48M
    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5448
1.47M
  int inputid = ctxt->input->id;
5449
1.47M
  SHRINK;
5450
1.47M
  SKIP(8);
5451
1.47M
  if (SKIP_BLANKS == 0) {
5452
2.39k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5453
2.39k
         "Space required after '<!ENTITY'\n");
5454
2.39k
  }
5455
5456
1.47M
  if (RAW == '%') {
5457
700k
      NEXT;
5458
700k
      if (SKIP_BLANKS == 0) {
5459
409
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
409
             "Space required after '%%'\n");
5461
409
      }
5462
700k
      isParameter = 1;
5463
700k
  }
5464
5465
1.47M
        name = xmlParseName(ctxt);
5466
1.47M
  if (name == NULL) {
5467
4.70k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5468
4.70k
                     "xmlParseEntityDecl: no name\n");
5469
4.70k
            return;
5470
4.70k
  }
5471
1.47M
  if (xmlStrchr(name, ':') != NULL) {
5472
464
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5473
464
         "colons are forbidden from entities names '%s'\n",
5474
464
         name, NULL, NULL);
5475
464
  }
5476
1.47M
  if (SKIP_BLANKS == 0) {
5477
5.25k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478
5.25k
         "Space required after the entity name\n");
5479
5.25k
  }
5480
5481
1.47M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5482
  /*
5483
   * handle the various case of definitions...
5484
   */
5485
1.47M
  if (isParameter) {
5486
699k
      if ((RAW == '"') || (RAW == '\'')) {
5487
674k
          value = xmlParseEntityValue(ctxt, &orig);
5488
674k
    if (value) {
5489
669k
        if ((ctxt->sax != NULL) &&
5490
669k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5491
629k
      ctxt->sax->entityDecl(ctxt->userData, name,
5492
629k
                        XML_INTERNAL_PARAMETER_ENTITY,
5493
629k
            NULL, NULL, value);
5494
669k
    }
5495
674k
      } else {
5496
25.0k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5497
25.0k
    if ((URI == NULL) && (literal == NULL)) {
5498
1.82k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5499
1.82k
    }
5500
25.0k
    if (URI) {
5501
23.0k
        xmlURIPtr uri;
5502
5503
23.0k
        uri = xmlParseURI((const char *) URI);
5504
23.0k
        if (uri == NULL) {
5505
962
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5506
962
             "Invalid URI: %s\n", URI);
5507
      /*
5508
       * This really ought to be a well formedness error
5509
       * but the XML Core WG decided otherwise c.f. issue
5510
       * E26 of the XML erratas.
5511
       */
5512
22.0k
        } else {
5513
22.0k
      if (uri->fragment != NULL) {
5514
          /*
5515
           * Okay this is foolish to block those but not
5516
           * invalid URIs.
5517
           */
5518
221
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5519
21.8k
      } else {
5520
21.8k
          if ((ctxt->sax != NULL) &&
5521
21.8k
        (!ctxt->disableSAX) &&
5522
21.8k
        (ctxt->sax->entityDecl != NULL))
5523
21.1k
        ctxt->sax->entityDecl(ctxt->userData, name,
5524
21.1k
              XML_EXTERNAL_PARAMETER_ENTITY,
5525
21.1k
              literal, URI, NULL);
5526
21.8k
      }
5527
22.0k
      xmlFreeURI(uri);
5528
22.0k
        }
5529
23.0k
    }
5530
25.0k
      }
5531
772k
  } else {
5532
772k
      if ((RAW == '"') || (RAW == '\'')) {
5533
694k
          value = xmlParseEntityValue(ctxt, &orig);
5534
694k
    if ((ctxt->sax != NULL) &&
5535
694k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5536
600k
        ctxt->sax->entityDecl(ctxt->userData, name,
5537
600k
        XML_INTERNAL_GENERAL_ENTITY,
5538
600k
        NULL, NULL, value);
5539
    /*
5540
     * For expat compatibility in SAX mode.
5541
     */
5542
694k
    if ((ctxt->myDoc == NULL) ||
5543
694k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5544
14.2k
        if (ctxt->myDoc == NULL) {
5545
1.82k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5546
1.82k
      if (ctxt->myDoc == NULL) {
5547
0
          xmlErrMemory(ctxt, "New Doc failed");
5548
0
          return;
5549
0
      }
5550
1.82k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5551
1.82k
        }
5552
14.2k
        if (ctxt->myDoc->intSubset == NULL)
5553
1.82k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5554
1.82k
              BAD_CAST "fake", NULL, NULL);
5555
5556
14.2k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5557
14.2k
                    NULL, NULL, value);
5558
14.2k
    }
5559
694k
      } else {
5560
77.8k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5561
77.8k
    if ((URI == NULL) && (literal == NULL)) {
5562
12.0k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5563
12.0k
    }
5564
77.8k
    if (URI) {
5565
64.7k
        xmlURIPtr uri;
5566
5567
64.7k
        uri = xmlParseURI((const char *)URI);
5568
64.7k
        if (uri == NULL) {
5569
2.94k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5570
2.94k
             "Invalid URI: %s\n", URI);
5571
      /*
5572
       * This really ought to be a well formedness error
5573
       * but the XML Core WG decided otherwise c.f. issue
5574
       * E26 of the XML erratas.
5575
       */
5576
61.8k
        } else {
5577
61.8k
      if (uri->fragment != NULL) {
5578
          /*
5579
           * Okay this is foolish to block those but not
5580
           * invalid URIs.
5581
           */
5582
675
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5583
675
      }
5584
61.8k
      xmlFreeURI(uri);
5585
61.8k
        }
5586
64.7k
    }
5587
77.8k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5588
7.43k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5589
7.43k
           "Space required before 'NDATA'\n");
5590
7.43k
    }
5591
77.8k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5592
17.4k
        SKIP(5);
5593
17.4k
        if (SKIP_BLANKS == 0) {
5594
386
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5595
386
               "Space required after 'NDATA'\n");
5596
386
        }
5597
17.4k
        ndata = xmlParseName(ctxt);
5598
17.4k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5599
17.4k
            (ctxt->sax->unparsedEntityDecl != NULL))
5600
16.4k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5601
16.4k
            literal, URI, ndata);
5602
60.4k
    } else {
5603
60.4k
        if ((ctxt->sax != NULL) &&
5604
60.4k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5605
52.2k
      ctxt->sax->entityDecl(ctxt->userData, name,
5606
52.2k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5607
52.2k
            literal, URI, NULL);
5608
        /*
5609
         * For expat compatibility in SAX mode.
5610
         * assuming the entity replacement was asked for
5611
         */
5612
60.4k
        if ((ctxt->replaceEntities != 0) &&
5613
60.4k
      ((ctxt->myDoc == NULL) ||
5614
28.6k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5615
1.19k
      if (ctxt->myDoc == NULL) {
5616
568
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5617
568
          if (ctxt->myDoc == NULL) {
5618
0
              xmlErrMemory(ctxt, "New Doc failed");
5619
0
        return;
5620
0
          }
5621
568
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5622
568
      }
5623
5624
1.19k
      if (ctxt->myDoc->intSubset == NULL)
5625
568
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5626
568
            BAD_CAST "fake", NULL, NULL);
5627
1.19k
      xmlSAX2EntityDecl(ctxt, name,
5628
1.19k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5629
1.19k
                  literal, URI, NULL);
5630
1.19k
        }
5631
60.4k
    }
5632
77.8k
      }
5633
772k
  }
5634
1.47M
  if (ctxt->instate == XML_PARSER_EOF)
5635
0
      goto done;
5636
1.47M
  SKIP_BLANKS;
5637
1.47M
  if (RAW != '>') {
5638
18.2k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5639
18.2k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5640
18.2k
      xmlHaltParser(ctxt);
5641
1.45M
  } else {
5642
1.45M
      if (inputid != ctxt->input->id) {
5643
5.42k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5644
5.42k
                         "Entity declaration doesn't start and stop in"
5645
5.42k
                               " the same entity\n");
5646
5.42k
      }
5647
1.45M
      NEXT;
5648
1.45M
  }
5649
1.47M
  if (orig != NULL) {
5650
      /*
5651
       * Ugly mechanism to save the raw entity value.
5652
       */
5653
1.35M
      xmlEntityPtr cur = NULL;
5654
5655
1.35M
      if (isParameter) {
5656
670k
          if ((ctxt->sax != NULL) &&
5657
670k
        (ctxt->sax->getParameterEntity != NULL))
5658
670k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5659
687k
      } else {
5660
687k
          if ((ctxt->sax != NULL) &&
5661
687k
        (ctxt->sax->getEntity != NULL))
5662
687k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5663
687k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5664
69.4k
        cur = xmlSAX2GetEntity(ctxt, name);
5665
69.4k
    }
5666
687k
      }
5667
1.35M
            if ((cur != NULL) && (cur->orig == NULL)) {
5668
1.13M
    cur->orig = orig;
5669
1.13M
                orig = NULL;
5670
1.13M
      }
5671
1.35M
  }
5672
5673
1.47M
done:
5674
1.47M
  if (value != NULL) xmlFree(value);
5675
1.47M
  if (URI != NULL) xmlFree(URI);
5676
1.47M
  if (literal != NULL) xmlFree(literal);
5677
1.47M
        if (orig != NULL) xmlFree(orig);
5678
1.47M
    }
5679
1.48M
}
5680
5681
/**
5682
 * xmlParseDefaultDecl:
5683
 * @ctxt:  an XML parser context
5684
 * @value:  Receive a possible fixed default value for the attribute
5685
 *
5686
 * DEPRECATED: Internal function, don't use.
5687
 *
5688
 * Parse an attribute default declaration
5689
 *
5690
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5691
 *
5692
 * [ VC: Required Attribute ]
5693
 * if the default declaration is the keyword #REQUIRED, then the
5694
 * attribute must be specified for all elements of the type in the
5695
 * attribute-list declaration.
5696
 *
5697
 * [ VC: Attribute Default Legal ]
5698
 * The declared default value must meet the lexical constraints of
5699
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5700
 *
5701
 * [ VC: Fixed Attribute Default ]
5702
 * if an attribute has a default value declared with the #FIXED
5703
 * keyword, instances of that attribute must match the default value.
5704
 *
5705
 * [ WFC: No < in Attribute Values ]
5706
 * handled in xmlParseAttValue()
5707
 *
5708
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5709
 *          or XML_ATTRIBUTE_FIXED.
5710
 */
5711
5712
int
5713
2.52M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5714
2.52M
    int val;
5715
2.52M
    xmlChar *ret;
5716
5717
2.52M
    *value = NULL;
5718
2.52M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5719
476k
  SKIP(9);
5720
476k
  return(XML_ATTRIBUTE_REQUIRED);
5721
476k
    }
5722
2.05M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5723
1.81M
  SKIP(8);
5724
1.81M
  return(XML_ATTRIBUTE_IMPLIED);
5725
1.81M
    }
5726
236k
    val = XML_ATTRIBUTE_NONE;
5727
236k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5728
136k
  SKIP(6);
5729
136k
  val = XML_ATTRIBUTE_FIXED;
5730
136k
  if (SKIP_BLANKS == 0) {
5731
276
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
276
         "Space required after '#FIXED'\n");
5733
276
  }
5734
136k
    }
5735
236k
    ret = xmlParseAttValue(ctxt);
5736
236k
    ctxt->instate = XML_PARSER_DTD;
5737
236k
    if (ret == NULL) {
5738
6.02k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5739
6.02k
           "Attribute default value declaration error\n");
5740
6.02k
    } else
5741
230k
        *value = ret;
5742
236k
    return(val);
5743
2.05M
}
5744
5745
/**
5746
 * xmlParseNotationType:
5747
 * @ctxt:  an XML parser context
5748
 *
5749
 * DEPRECATED: Internal function, don't use.
5750
 *
5751
 * parse an Notation attribute type.
5752
 *
5753
 * Note: the leading 'NOTATION' S part has already being parsed...
5754
 *
5755
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5756
 *
5757
 * [ VC: Notation Attributes ]
5758
 * Values of this type must match one of the notation names included
5759
 * in the declaration; all notation names in the declaration must be declared.
5760
 *
5761
 * Returns: the notation attribute tree built while parsing
5762
 */
5763
5764
xmlEnumerationPtr
5765
4.20k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5766
4.20k
    const xmlChar *name;
5767
4.20k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5768
5769
4.20k
    if (RAW != '(') {
5770
218
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5771
218
  return(NULL);
5772
218
    }
5773
3.98k
    SHRINK;
5774
4.73k
    do {
5775
4.73k
        NEXT;
5776
4.73k
  SKIP_BLANKS;
5777
4.73k
        name = xmlParseName(ctxt);
5778
4.73k
  if (name == NULL) {
5779
232
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5780
232
         "Name expected in NOTATION declaration\n");
5781
232
            xmlFreeEnumeration(ret);
5782
232
      return(NULL);
5783
232
  }
5784
4.49k
  tmp = ret;
5785
6.53k
  while (tmp != NULL) {
5786
2.08k
      if (xmlStrEqual(name, tmp->name)) {
5787
50
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5788
50
    "standalone: attribute notation value token %s duplicated\n",
5789
50
         name, NULL);
5790
50
    if (!xmlDictOwns(ctxt->dict, name))
5791
0
        xmlFree((xmlChar *) name);
5792
50
    break;
5793
50
      }
5794
2.03k
      tmp = tmp->next;
5795
2.03k
  }
5796
4.49k
  if (tmp == NULL) {
5797
4.44k
      cur = xmlCreateEnumeration(name);
5798
4.44k
      if (cur == NULL) {
5799
0
                xmlFreeEnumeration(ret);
5800
0
                return(NULL);
5801
0
            }
5802
4.44k
      if (last == NULL) ret = last = cur;
5803
644
      else {
5804
644
    last->next = cur;
5805
644
    last = cur;
5806
644
      }
5807
4.44k
  }
5808
4.49k
  SKIP_BLANKS;
5809
4.49k
    } while (RAW == '|');
5810
3.75k
    if (RAW != ')') {
5811
303
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5812
303
        xmlFreeEnumeration(ret);
5813
303
  return(NULL);
5814
303
    }
5815
3.44k
    NEXT;
5816
3.44k
    return(ret);
5817
3.75k
}
5818
5819
/**
5820
 * xmlParseEnumerationType:
5821
 * @ctxt:  an XML parser context
5822
 *
5823
 * DEPRECATED: Internal function, don't use.
5824
 *
5825
 * parse an Enumeration attribute type.
5826
 *
5827
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5828
 *
5829
 * [ VC: Enumeration ]
5830
 * Values of this type must match one of the Nmtoken tokens in
5831
 * the declaration
5832
 *
5833
 * Returns: the enumeration attribute tree built while parsing
5834
 */
5835
5836
xmlEnumerationPtr
5837
213k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5838
213k
    xmlChar *name;
5839
213k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5840
5841
213k
    if (RAW != '(') {
5842
8.84k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5843
8.84k
  return(NULL);
5844
8.84k
    }
5845
204k
    SHRINK;
5846
618k
    do {
5847
618k
        NEXT;
5848
618k
  SKIP_BLANKS;
5849
618k
        name = xmlParseNmtoken(ctxt);
5850
618k
  if (name == NULL) {
5851
3.35k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5852
3.35k
      return(ret);
5853
3.35k
  }
5854
614k
  tmp = ret;
5855
1.54M
  while (tmp != NULL) {
5856
932k
      if (xmlStrEqual(name, tmp->name)) {
5857
504
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5858
504
    "standalone: attribute enumeration value token %s duplicated\n",
5859
504
         name, NULL);
5860
504
    if (!xmlDictOwns(ctxt->dict, name))
5861
504
        xmlFree(name);
5862
504
    break;
5863
504
      }
5864
932k
      tmp = tmp->next;
5865
932k
  }
5866
614k
  if (tmp == NULL) {
5867
614k
      cur = xmlCreateEnumeration(name);
5868
614k
      if (!xmlDictOwns(ctxt->dict, name))
5869
614k
    xmlFree(name);
5870
614k
      if (cur == NULL) {
5871
0
                xmlFreeEnumeration(ret);
5872
0
                return(NULL);
5873
0
            }
5874
614k
      if (last == NULL) ret = last = cur;
5875
410k
      else {
5876
410k
    last->next = cur;
5877
410k
    last = cur;
5878
410k
      }
5879
614k
  }
5880
614k
  SKIP_BLANKS;
5881
614k
    } while (RAW == '|');
5882
201k
    if (RAW != ')') {
5883
1.43k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5884
1.43k
  return(ret);
5885
1.43k
    }
5886
199k
    NEXT;
5887
199k
    return(ret);
5888
201k
}
5889
5890
/**
5891
 * xmlParseEnumeratedType:
5892
 * @ctxt:  an XML parser context
5893
 * @tree:  the enumeration tree built while parsing
5894
 *
5895
 * DEPRECATED: Internal function, don't use.
5896
 *
5897
 * parse an Enumerated attribute type.
5898
 *
5899
 * [57] EnumeratedType ::= NotationType | Enumeration
5900
 *
5901
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5902
 *
5903
 *
5904
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5905
 */
5906
5907
int
5908
217k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5909
217k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5910
4.47k
  SKIP(8);
5911
4.47k
  if (SKIP_BLANKS == 0) {
5912
275
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5913
275
         "Space required after 'NOTATION'\n");
5914
275
      return(0);
5915
275
  }
5916
4.20k
  *tree = xmlParseNotationType(ctxt);
5917
4.20k
  if (*tree == NULL) return(0);
5918
3.44k
  return(XML_ATTRIBUTE_NOTATION);
5919
4.20k
    }
5920
213k
    *tree = xmlParseEnumerationType(ctxt);
5921
213k
    if (*tree == NULL) return(0);
5922
204k
    return(XML_ATTRIBUTE_ENUMERATION);
5923
213k
}
5924
5925
/**
5926
 * xmlParseAttributeType:
5927
 * @ctxt:  an XML parser context
5928
 * @tree:  the enumeration tree built while parsing
5929
 *
5930
 * DEPRECATED: Internal function, don't use.
5931
 *
5932
 * parse the Attribute list def for an element
5933
 *
5934
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5935
 *
5936
 * [55] StringType ::= 'CDATA'
5937
 *
5938
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5939
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5940
 *
5941
 * Validity constraints for attribute values syntax are checked in
5942
 * xmlValidateAttributeValue()
5943
 *
5944
 * [ VC: ID ]
5945
 * Values of type ID must match the Name production. A name must not
5946
 * appear more than once in an XML document as a value of this type;
5947
 * i.e., ID values must uniquely identify the elements which bear them.
5948
 *
5949
 * [ VC: One ID per Element Type ]
5950
 * No element type may have more than one ID attribute specified.
5951
 *
5952
 * [ VC: ID Attribute Default ]
5953
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5954
 *
5955
 * [ VC: IDREF ]
5956
 * Values of type IDREF must match the Name production, and values
5957
 * of type IDREFS must match Names; each IDREF Name must match the value
5958
 * of an ID attribute on some element in the XML document; i.e. IDREF
5959
 * values must match the value of some ID attribute.
5960
 *
5961
 * [ VC: Entity Name ]
5962
 * Values of type ENTITY must match the Name production, values
5963
 * of type ENTITIES must match Names; each Entity Name must match the
5964
 * name of an unparsed entity declared in the DTD.
5965
 *
5966
 * [ VC: Name Token ]
5967
 * Values of type NMTOKEN must match the Nmtoken production; values
5968
 * of type NMTOKENS must match Nmtokens.
5969
 *
5970
 * Returns the attribute type
5971
 */
5972
int
5973
2.54M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5974
2.54M
    SHRINK;
5975
2.54M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5976
914k
  SKIP(5);
5977
914k
  return(XML_ATTRIBUTE_CDATA);
5978
1.63M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5979
12.1k
  SKIP(6);
5980
12.1k
  return(XML_ATTRIBUTE_IDREFS);
5981
1.61M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5982
64.1k
  SKIP(5);
5983
64.1k
  return(XML_ATTRIBUTE_IDREF);
5984
1.55M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5985
639k
        SKIP(2);
5986
639k
  return(XML_ATTRIBUTE_ID);
5987
916k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5988
8.14k
  SKIP(6);
5989
8.14k
  return(XML_ATTRIBUTE_ENTITY);
5990
907k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5991
2.65k
  SKIP(8);
5992
2.65k
  return(XML_ATTRIBUTE_ENTITIES);
5993
905k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5994
85.1k
  SKIP(8);
5995
85.1k
  return(XML_ATTRIBUTE_NMTOKENS);
5996
820k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5997
602k
  SKIP(7);
5998
602k
  return(XML_ATTRIBUTE_NMTOKEN);
5999
602k
     }
6000
217k
     return(xmlParseEnumeratedType(ctxt, tree));
6001
2.54M
}
6002
6003
/**
6004
 * xmlParseAttributeListDecl:
6005
 * @ctxt:  an XML parser context
6006
 *
6007
 * DEPRECATED: Internal function, don't use.
6008
 *
6009
 * : parse the Attribute list def for an element
6010
 *
6011
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6012
 *
6013
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6014
 *
6015
 */
6016
void
6017
1.16M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6018
1.16M
    const xmlChar *elemName;
6019
1.16M
    const xmlChar *attrName;
6020
1.16M
    xmlEnumerationPtr tree;
6021
6022
1.16M
    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6023
1.15M
  int inputid = ctxt->input->id;
6024
6025
1.15M
  SKIP(9);
6026
1.15M
  if (SKIP_BLANKS == 0) {
6027
2.08k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6028
2.08k
                     "Space required after '<!ATTLIST'\n");
6029
2.08k
  }
6030
1.15M
        elemName = xmlParseName(ctxt);
6031
1.15M
  if (elemName == NULL) {
6032
1.45k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6033
1.45k
         "ATTLIST: no name for Element\n");
6034
1.45k
      return;
6035
1.45k
  }
6036
1.15M
  SKIP_BLANKS;
6037
1.15M
  GROW;
6038
3.67M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6039
2.55M
      int type;
6040
2.55M
      int def;
6041
2.55M
      xmlChar *defaultValue = NULL;
6042
6043
2.55M
      GROW;
6044
2.55M
            tree = NULL;
6045
2.55M
      attrName = xmlParseName(ctxt);
6046
2.55M
      if (attrName == NULL) {
6047
8.56k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6048
8.56k
             "ATTLIST: no name for Attribute\n");
6049
8.56k
    break;
6050
8.56k
      }
6051
2.55M
      GROW;
6052
2.55M
      if (SKIP_BLANKS == 0) {
6053
3.81k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6054
3.81k
            "Space required after the attribute name\n");
6055
3.81k
    break;
6056
3.81k
      }
6057
6058
2.54M
      type = xmlParseAttributeType(ctxt, &tree);
6059
2.54M
      if (type <= 0) {
6060
10.0k
          break;
6061
10.0k
      }
6062
6063
2.53M
      GROW;
6064
2.53M
      if (SKIP_BLANKS == 0) {
6065
7.25k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6066
7.25k
             "Space required after the attribute type\n");
6067
7.25k
          if (tree != NULL)
6068
4.97k
        xmlFreeEnumeration(tree);
6069
7.25k
    break;
6070
7.25k
      }
6071
6072
2.52M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6073
2.52M
      if (def <= 0) {
6074
0
                if (defaultValue != NULL)
6075
0
        xmlFree(defaultValue);
6076
0
          if (tree != NULL)
6077
0
        xmlFreeEnumeration(tree);
6078
0
          break;
6079
0
      }
6080
2.52M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6081
92.9k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6082
6083
2.52M
      GROW;
6084
2.52M
            if (RAW != '>') {
6085
2.11M
    if (SKIP_BLANKS == 0) {
6086
11.0k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6087
11.0k
      "Space required after the attribute default value\n");
6088
11.0k
        if (defaultValue != NULL)
6089
4.70k
      xmlFree(defaultValue);
6090
11.0k
        if (tree != NULL)
6091
1.47k
      xmlFreeEnumeration(tree);
6092
11.0k
        break;
6093
11.0k
    }
6094
2.11M
      }
6095
2.51M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6096
2.51M
    (ctxt->sax->attributeDecl != NULL))
6097
2.33M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6098
2.33M
                          type, def, defaultValue, tree);
6099
181k
      else if (tree != NULL)
6100
10.4k
    xmlFreeEnumeration(tree);
6101
6102
2.51M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6103
2.51M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6104
2.51M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6105
145k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6106
145k
      }
6107
2.51M
      if (ctxt->sax2) {
6108
1.55M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6109
1.55M
      }
6110
2.51M
      if (defaultValue != NULL)
6111
225k
          xmlFree(defaultValue);
6112
2.51M
      GROW;
6113
2.51M
  }
6114
1.15M
  if (RAW == '>') {
6115
1.11M
      if (inputid != ctxt->input->id) {
6116
1.54k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6117
1.54k
                               "Attribute list declaration doesn't start and"
6118
1.54k
                               " stop in the same entity\n");
6119
1.54k
      }
6120
1.11M
      NEXT;
6121
1.11M
  }
6122
1.15M
    }
6123
1.16M
}
6124
6125
/**
6126
 * xmlParseElementMixedContentDecl:
6127
 * @ctxt:  an XML parser context
6128
 * @inputchk:  the input used for the current entity, needed for boundary checks
6129
 *
6130
 * DEPRECATED: Internal function, don't use.
6131
 *
6132
 * parse the declaration for a Mixed Element content
6133
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6134
 *
6135
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6136
 *                '(' S? '#PCDATA' S? ')'
6137
 *
6138
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6139
 *
6140
 * [ VC: No Duplicate Types ]
6141
 * The same name must not appear more than once in a single
6142
 * mixed-content declaration.
6143
 *
6144
 * returns: the list of the xmlElementContentPtr describing the element choices
6145
 */
6146
xmlElementContentPtr
6147
489k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6148
489k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6149
489k
    const xmlChar *elem = NULL;
6150
6151
489k
    GROW;
6152
489k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6153
489k
  SKIP(7);
6154
489k
  SKIP_BLANKS;
6155
489k
  SHRINK;
6156
489k
  if (RAW == ')') {
6157
355k
      if (ctxt->input->id != inputchk) {
6158
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159
0
                               "Element content declaration doesn't start and"
6160
0
                               " stop in the same entity\n");
6161
0
      }
6162
355k
      NEXT;
6163
355k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6164
355k
      if (ret == NULL)
6165
0
          return(NULL);
6166
355k
      if (RAW == '*') {
6167
10
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168
10
    NEXT;
6169
10
      }
6170
355k
      return(ret);
6171
355k
  }
6172
134k
  if ((RAW == '(') || (RAW == '|')) {
6173
133k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6174
133k
      if (ret == NULL) return(NULL);
6175
133k
  }
6176
1.52M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6177
1.39M
      NEXT;
6178
1.39M
      if (elem == NULL) {
6179
133k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6180
133k
    if (ret == NULL) {
6181
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6182
0
                    return(NULL);
6183
0
                }
6184
133k
    ret->c1 = cur;
6185
133k
    if (cur != NULL)
6186
133k
        cur->parent = ret;
6187
133k
    cur = ret;
6188
1.25M
      } else {
6189
1.25M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6190
1.25M
    if (n == NULL) {
6191
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6192
0
                    return(NULL);
6193
0
                }
6194
1.25M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6195
1.25M
    if (n->c1 != NULL)
6196
1.25M
        n->c1->parent = n;
6197
1.25M
          cur->c2 = n;
6198
1.25M
    if (n != NULL)
6199
1.25M
        n->parent = cur;
6200
1.25M
    cur = n;
6201
1.25M
      }
6202
1.39M
      SKIP_BLANKS;
6203
1.39M
      elem = xmlParseName(ctxt);
6204
1.39M
      if (elem == NULL) {
6205
495
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6206
495
      "xmlParseElementMixedContentDecl : Name expected\n");
6207
495
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6208
495
    return(NULL);
6209
495
      }
6210
1.39M
      SKIP_BLANKS;
6211
1.39M
      GROW;
6212
1.39M
  }
6213
133k
  if ((RAW == ')') && (NXT(1) == '*')) {
6214
131k
      if (elem != NULL) {
6215
131k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6216
131k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6217
131k
    if (cur->c2 != NULL)
6218
131k
        cur->c2->parent = cur;
6219
131k
            }
6220
131k
            if (ret != NULL)
6221
131k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6222
131k
      if (ctxt->input->id != inputchk) {
6223
22
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6224
22
                               "Element content declaration doesn't start and"
6225
22
                               " stop in the same entity\n");
6226
22
      }
6227
131k
      SKIP(2);
6228
131k
  } else {
6229
2.56k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6230
2.56k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6231
2.56k
      return(NULL);
6232
2.56k
  }
6233
6234
133k
    } else {
6235
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6236
0
    }
6237
131k
    return(ret);
6238
489k
}
6239
6240
/**
6241
 * xmlParseElementChildrenContentDeclPriv:
6242
 * @ctxt:  an XML parser context
6243
 * @inputchk:  the input used for the current entity, needed for boundary checks
6244
 * @depth: the level of recursion
6245
 *
6246
 * parse the declaration for a Mixed Element content
6247
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6248
 *
6249
 *
6250
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6251
 *
6252
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6253
 *
6254
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6255
 *
6256
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6257
 *
6258
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6259
 * TODO Parameter-entity replacement text must be properly nested
6260
 *  with parenthesized groups. That is to say, if either of the
6261
 *  opening or closing parentheses in a choice, seq, or Mixed
6262
 *  construct is contained in the replacement text for a parameter
6263
 *  entity, both must be contained in the same replacement text. For
6264
 *  interoperability, if a parameter-entity reference appears in a
6265
 *  choice, seq, or Mixed construct, its replacement text should not
6266
 *  be empty, and neither the first nor last non-blank character of
6267
 *  the replacement text should be a connector (| or ,).
6268
 *
6269
 * Returns the tree of xmlElementContentPtr describing the element
6270
 *          hierarchy.
6271
 */
6272
static xmlElementContentPtr
6273
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6274
920k
                                       int depth) {
6275
920k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6276
920k
    const xmlChar *elem;
6277
920k
    xmlChar type = 0;
6278
6279
920k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6280
920k
        (depth >  2048)) {
6281
131
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6282
131
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6283
131
                          depth);
6284
131
  return(NULL);
6285
131
    }
6286
920k
    SKIP_BLANKS;
6287
920k
    GROW;
6288
920k
    if (RAW == '(') {
6289
142k
  int inputid = ctxt->input->id;
6290
6291
        /* Recurse on first child */
6292
142k
  NEXT;
6293
142k
  SKIP_BLANKS;
6294
142k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6295
142k
                                                           depth + 1);
6296
142k
        if (cur == NULL)
6297
99.1k
            return(NULL);
6298
43.0k
  SKIP_BLANKS;
6299
43.0k
  GROW;
6300
777k
    } else {
6301
777k
  elem = xmlParseName(ctxt);
6302
777k
  if (elem == NULL) {
6303
4.22k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6304
4.22k
      return(NULL);
6305
4.22k
  }
6306
773k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6307
773k
  if (cur == NULL) {
6308
0
      xmlErrMemory(ctxt, NULL);
6309
0
      return(NULL);
6310
0
  }
6311
773k
  GROW;
6312
773k
  if (RAW == '?') {
6313
34.8k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6314
34.8k
      NEXT;
6315
738k
  } else if (RAW == '*') {
6316
64.8k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6317
64.8k
      NEXT;
6318
673k
  } else if (RAW == '+') {
6319
108k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6320
108k
      NEXT;
6321
564k
  } else {
6322
564k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6323
564k
  }
6324
773k
  GROW;
6325
773k
    }
6326
816k
    SKIP_BLANKS;
6327
816k
    SHRINK;
6328
3.22M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6329
        /*
6330
   * Each loop we parse one separator and one element.
6331
   */
6332
2.41M
        if (RAW == ',') {
6333
604k
      if (type == 0) type = CUR;
6334
6335
      /*
6336
       * Detect "Name | Name , Name" error
6337
       */
6338
351k
      else if (type != CUR) {
6339
159
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6340
159
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6341
159
                      type);
6342
159
    if ((last != NULL) && (last != ret))
6343
159
        xmlFreeDocElementContent(ctxt->myDoc, last);
6344
159
    if (ret != NULL)
6345
159
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6346
159
    return(NULL);
6347
159
      }
6348
604k
      NEXT;
6349
6350
604k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6351
604k
      if (op == NULL) {
6352
0
    if ((last != NULL) && (last != ret))
6353
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6354
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6355
0
    return(NULL);
6356
0
      }
6357
604k
      if (last == NULL) {
6358
253k
    op->c1 = ret;
6359
253k
    if (ret != NULL)
6360
253k
        ret->parent = op;
6361
253k
    ret = cur = op;
6362
351k
      } else {
6363
351k
          cur->c2 = op;
6364
351k
    if (op != NULL)
6365
351k
        op->parent = cur;
6366
351k
    op->c1 = last;
6367
351k
    if (last != NULL)
6368
351k
        last->parent = op;
6369
351k
    cur =op;
6370
351k
    last = NULL;
6371
351k
      }
6372
1.81M
  } else if (RAW == '|') {
6373
1.80M
      if (type == 0) type = CUR;
6374
6375
      /*
6376
       * Detect "Name , Name | Name" error
6377
       */
6378
1.55M
      else if (type != CUR) {
6379
170
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6380
170
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6381
170
          type);
6382
170
    if ((last != NULL) && (last != ret))
6383
170
        xmlFreeDocElementContent(ctxt->myDoc, last);
6384
170
    if (ret != NULL)
6385
170
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6386
170
    return(NULL);
6387
170
      }
6388
1.80M
      NEXT;
6389
6390
1.80M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6391
1.80M
      if (op == NULL) {
6392
0
    if ((last != NULL) && (last != ret))
6393
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6394
0
    if (ret != NULL)
6395
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6396
0
    return(NULL);
6397
0
      }
6398
1.80M
      if (last == NULL) {
6399
243k
    op->c1 = ret;
6400
243k
    if (ret != NULL)
6401
243k
        ret->parent = op;
6402
243k
    ret = cur = op;
6403
1.55M
      } else {
6404
1.55M
          cur->c2 = op;
6405
1.55M
    if (op != NULL)
6406
1.55M
        op->parent = cur;
6407
1.55M
    op->c1 = last;
6408
1.55M
    if (last != NULL)
6409
1.55M
        last->parent = op;
6410
1.55M
    cur =op;
6411
1.55M
    last = NULL;
6412
1.55M
      }
6413
1.80M
  } else {
6414
11.6k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6415
11.6k
      if ((last != NULL) && (last != ret))
6416
5.15k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6417
11.6k
      if (ret != NULL)
6418
11.6k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
11.6k
      return(NULL);
6420
11.6k
  }
6421
2.40M
  GROW;
6422
2.40M
  SKIP_BLANKS;
6423
2.40M
  GROW;
6424
2.40M
  if (RAW == '(') {
6425
110k
      int inputid = ctxt->input->id;
6426
      /* Recurse on second child */
6427
110k
      NEXT;
6428
110k
      SKIP_BLANKS;
6429
110k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6430
110k
                                                          depth + 1);
6431
110k
            if (last == NULL) {
6432
1.64k
    if (ret != NULL)
6433
1.64k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6434
1.64k
    return(NULL);
6435
1.64k
            }
6436
109k
      SKIP_BLANKS;
6437
2.29M
  } else {
6438
2.29M
      elem = xmlParseName(ctxt);
6439
2.29M
      if (elem == NULL) {
6440
1.07k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6441
1.07k
    if (ret != NULL)
6442
1.07k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
1.07k
    return(NULL);
6444
1.07k
      }
6445
2.29M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6446
2.29M
      if (last == NULL) {
6447
0
    if (ret != NULL)
6448
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6449
0
    return(NULL);
6450
0
      }
6451
2.29M
      if (RAW == '?') {
6452
234k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6453
234k
    NEXT;
6454
2.05M
      } else if (RAW == '*') {
6455
101k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6456
101k
    NEXT;
6457
1.95M
      } else if (RAW == '+') {
6458
29.3k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6459
29.3k
    NEXT;
6460
1.92M
      } else {
6461
1.92M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6462
1.92M
      }
6463
2.29M
  }
6464
2.40M
  SKIP_BLANKS;
6465
2.40M
  GROW;
6466
2.40M
    }
6467
801k
    if ((cur != NULL) && (last != NULL)) {
6468
488k
        cur->c2 = last;
6469
488k
  if (last != NULL)
6470
488k
      last->parent = cur;
6471
488k
    }
6472
801k
    if (ctxt->input->id != inputchk) {
6473
473
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6474
473
                       "Element content declaration doesn't start and stop in"
6475
473
                       " the same entity\n");
6476
473
    }
6477
801k
    NEXT;
6478
801k
    if (RAW == '?') {
6479
11.9k
  if (ret != NULL) {
6480
11.9k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6481
11.9k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6482
55
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6483
11.9k
      else
6484
11.9k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6485
11.9k
  }
6486
11.9k
  NEXT;
6487
790k
    } else if (RAW == '*') {
6488
281k
  if (ret != NULL) {
6489
281k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6490
281k
      cur = ret;
6491
      /*
6492
       * Some normalization:
6493
       * (a | b* | c?)* == (a | b | c)*
6494
       */
6495
1.40M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6496
1.12M
    if ((cur->c1 != NULL) &&
6497
1.12M
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6498
1.12M
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6499
51.2k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6500
1.12M
    if ((cur->c2 != NULL) &&
6501
1.12M
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6502
1.12M
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6503
8.47k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6504
1.12M
    cur = cur->c2;
6505
1.12M
      }
6506
281k
  }
6507
281k
  NEXT;
6508
508k
    } else if (RAW == '+') {
6509
86.3k
  if (ret != NULL) {
6510
86.3k
      int found = 0;
6511
6512
86.3k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6513
86.3k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6514
16
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6515
86.3k
      else
6516
86.3k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6517
      /*
6518
       * Some normalization:
6519
       * (a | b*)+ == (a | b)*
6520
       * (a | b?)+ == (a | b)*
6521
       */
6522
147k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6523
61.4k
    if ((cur->c1 != NULL) &&
6524
61.4k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6525
61.4k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6526
244
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6527
244
        found = 1;
6528
244
    }
6529
61.4k
    if ((cur->c2 != NULL) &&
6530
61.4k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6531
61.4k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6532
147
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6533
147
        found = 1;
6534
147
    }
6535
61.4k
    cur = cur->c2;
6536
61.4k
      }
6537
86.3k
      if (found)
6538
258
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6539
86.3k
  }
6540
86.3k
  NEXT;
6541
86.3k
    }
6542
801k
    return(ret);
6543
816k
}
6544
6545
/**
6546
 * xmlParseElementChildrenContentDecl:
6547
 * @ctxt:  an XML parser context
6548
 * @inputchk:  the input used for the current entity, needed for boundary checks
6549
 *
6550
 * DEPRECATED: Internal function, don't use.
6551
 *
6552
 * parse the declaration for a Mixed Element content
6553
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6554
 *
6555
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6556
 *
6557
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6558
 *
6559
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6560
 *
6561
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6562
 *
6563
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6564
 * TODO Parameter-entity replacement text must be properly nested
6565
 *  with parenthesized groups. That is to say, if either of the
6566
 *  opening or closing parentheses in a choice, seq, or Mixed
6567
 *  construct is contained in the replacement text for a parameter
6568
 *  entity, both must be contained in the same replacement text. For
6569
 *  interoperability, if a parameter-entity reference appears in a
6570
 *  choice, seq, or Mixed construct, its replacement text should not
6571
 *  be empty, and neither the first nor last non-blank character of
6572
 *  the replacement text should be a connector (| or ,).
6573
 *
6574
 * Returns the tree of xmlElementContentPtr describing the element
6575
 *          hierarchy.
6576
 */
6577
xmlElementContentPtr
6578
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6579
    /* stub left for API/ABI compat */
6580
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6581
0
}
6582
6583
/**
6584
 * xmlParseElementContentDecl:
6585
 * @ctxt:  an XML parser context
6586
 * @name:  the name of the element being defined.
6587
 * @result:  the Element Content pointer will be stored here if any
6588
 *
6589
 * DEPRECATED: Internal function, don't use.
6590
 *
6591
 * parse the declaration for an Element content either Mixed or Children,
6592
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6593
 *
6594
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6595
 *
6596
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6597
 */
6598
6599
int
6600
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6601
1.15M
                           xmlElementContentPtr *result) {
6602
6603
1.15M
    xmlElementContentPtr tree = NULL;
6604
1.15M
    int inputid = ctxt->input->id;
6605
1.15M
    int res;
6606
6607
1.15M
    *result = NULL;
6608
6609
1.15M
    if (RAW != '(') {
6610
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6611
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6612
0
  return(-1);
6613
0
    }
6614
1.15M
    NEXT;
6615
1.15M
    GROW;
6616
1.15M
    if (ctxt->instate == XML_PARSER_EOF)
6617
0
        return(-1);
6618
1.15M
    SKIP_BLANKS;
6619
1.15M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6620
489k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6621
489k
  res = XML_ELEMENT_TYPE_MIXED;
6622
667k
    } else {
6623
667k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6624
667k
  res = XML_ELEMENT_TYPE_ELEMENT;
6625
667k
    }
6626
1.15M
    SKIP_BLANKS;
6627
1.15M
    *result = tree;
6628
1.15M
    return(res);
6629
1.15M
}
6630
6631
/**
6632
 * xmlParseElementDecl:
6633
 * @ctxt:  an XML parser context
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse an Element declaration.
6638
 *
6639
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6640
 *
6641
 * [ VC: Unique Element Type Declaration ]
6642
 * No element type may be declared more than once
6643
 *
6644
 * Returns the type of the element, or -1 in case of error
6645
 */
6646
int
6647
1.53M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6648
1.53M
    const xmlChar *name;
6649
1.53M
    int ret = -1;
6650
1.53M
    xmlElementContentPtr content  = NULL;
6651
6652
    /* GROW; done in the caller */
6653
1.53M
    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6654
1.52M
  int inputid = ctxt->input->id;
6655
6656
1.52M
  SKIP(9);
6657
1.52M
  if (SKIP_BLANKS == 0) {
6658
1.91k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6659
1.91k
               "Space required after 'ELEMENT'\n");
6660
1.91k
      return(-1);
6661
1.91k
  }
6662
1.52M
        name = xmlParseName(ctxt);
6663
1.52M
  if (name == NULL) {
6664
1.18k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6665
1.18k
         "xmlParseElementDecl: no name for Element\n");
6666
1.18k
      return(-1);
6667
1.18k
  }
6668
1.52M
  if (SKIP_BLANKS == 0) {
6669
6.82k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6670
6.82k
         "Space required after the element name\n");
6671
6.82k
  }
6672
1.52M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6673
347k
      SKIP(5);
6674
      /*
6675
       * Element must always be empty.
6676
       */
6677
347k
      ret = XML_ELEMENT_TYPE_EMPTY;
6678
1.17M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6679
1.17M
             (NXT(2) == 'Y')) {
6680
8.80k
      SKIP(3);
6681
      /*
6682
       * Element is a generic container.
6683
       */
6684
8.80k
      ret = XML_ELEMENT_TYPE_ANY;
6685
1.16M
  } else if (RAW == '(') {
6686
1.15M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6687
1.15M
  } else {
6688
      /*
6689
       * [ WFC: PEs in Internal Subset ] error handling.
6690
       */
6691
9.62k
      if ((RAW == '%') && (ctxt->external == 0) &&
6692
9.62k
          (ctxt->inputNr == 1)) {
6693
259
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6694
259
    "PEReference: forbidden within markup decl in internal subset\n");
6695
9.36k
      } else {
6696
9.36k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6697
9.36k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6698
9.36k
            }
6699
9.62k
      return(-1);
6700
9.62k
  }
6701
6702
1.51M
  SKIP_BLANKS;
6703
6704
1.51M
  if (RAW != '>') {
6705
19.5k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6706
19.5k
      if (content != NULL) {
6707
1.71k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6708
1.71k
      }
6709
1.49M
  } else {
6710
1.49M
      if (inputid != ctxt->input->id) {
6711
301
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
301
                               "Element declaration doesn't start and stop in"
6713
301
                               " the same entity\n");
6714
301
      }
6715
6716
1.49M
      NEXT;
6717
1.49M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6718
1.49M
    (ctxt->sax->elementDecl != NULL)) {
6719
1.35M
    if (content != NULL)
6720
1.02M
        content->parent = NULL;
6721
1.35M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6722
1.35M
                           content);
6723
1.35M
    if ((content != NULL) && (content->parent == NULL)) {
6724
        /*
6725
         * this is a trick: if xmlAddElementDecl is called,
6726
         * instead of copying the full tree it is plugged directly
6727
         * if called from the parser. Avoid duplicating the
6728
         * interfaces or change the API/ABI
6729
         */
6730
31.7k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6731
31.7k
    }
6732
1.35M
      } else if (content != NULL) {
6733
105k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6734
105k
      }
6735
1.49M
  }
6736
1.51M
    }
6737
1.52M
    return(ret);
6738
1.53M
}
6739
6740
/**
6741
 * xmlParseConditionalSections
6742
 * @ctxt:  an XML parser context
6743
 *
6744
 * [61] conditionalSect ::= includeSect | ignoreSect
6745
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6746
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6747
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6748
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6749
 */
6750
6751
static void
6752
12.9k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6753
12.9k
    int *inputIds = NULL;
6754
12.9k
    size_t inputIdsSize = 0;
6755
12.9k
    size_t depth = 0;
6756
6757
71.6k
    while (ctxt->instate != XML_PARSER_EOF) {
6758
71.4k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6759
38.2k
            int id = ctxt->input->id;
6760
6761
38.2k
            SKIP(3);
6762
38.2k
            SKIP_BLANKS;
6763
6764
38.2k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6765
31.2k
                SKIP(7);
6766
31.2k
                SKIP_BLANKS;
6767
31.2k
                if (RAW != '[') {
6768
265
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6769
265
                    xmlHaltParser(ctxt);
6770
265
                    goto error;
6771
265
                }
6772
31.0k
                if (ctxt->input->id != id) {
6773
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6774
0
                                   "All markup of the conditional section is"
6775
0
                                   " not in the same entity\n");
6776
0
                }
6777
31.0k
                NEXT;
6778
6779
31.0k
                if (inputIdsSize <= depth) {
6780
9.16k
                    int *tmp;
6781
6782
9.16k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6783
9.16k
                    tmp = (int *) xmlRealloc(inputIds,
6784
9.16k
                            inputIdsSize * sizeof(int));
6785
9.16k
                    if (tmp == NULL) {
6786
0
                        xmlErrMemory(ctxt, NULL);
6787
0
                        goto error;
6788
0
                    }
6789
9.16k
                    inputIds = tmp;
6790
9.16k
                }
6791
31.0k
                inputIds[depth] = id;
6792
31.0k
                depth++;
6793
31.0k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6794
4.40k
                int state;
6795
4.40k
                xmlParserInputState instate;
6796
4.40k
                size_t ignoreDepth = 0;
6797
6798
4.40k
                SKIP(6);
6799
4.40k
                SKIP_BLANKS;
6800
4.40k
                if (RAW != '[') {
6801
174
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6802
174
                    xmlHaltParser(ctxt);
6803
174
                    goto error;
6804
174
                }
6805
4.23k
                if (ctxt->input->id != id) {
6806
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6807
0
                                   "All markup of the conditional section is"
6808
0
                                   " not in the same entity\n");
6809
0
                }
6810
4.23k
                NEXT;
6811
6812
                /*
6813
                 * Parse up to the end of the conditional section but disable
6814
                 * SAX event generating DTD building in the meantime
6815
                 */
6816
4.23k
                state = ctxt->disableSAX;
6817
4.23k
                instate = ctxt->instate;
6818
4.23k
                if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6819
4.23k
                ctxt->instate = XML_PARSER_IGNORE;
6820
6821
4.22M
                while (RAW != 0) {
6822
4.22M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6823
11.6k
                        SKIP(3);
6824
11.6k
                        ignoreDepth++;
6825
                        /* Check for integer overflow */
6826
11.6k
                        if (ignoreDepth == 0) {
6827
0
                            xmlErrMemory(ctxt, NULL);
6828
0
                            goto error;
6829
0
                        }
6830
4.21M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6831
4.21M
                               (NXT(2) == '>')) {
6832
9.39k
                        if (ignoreDepth == 0)
6833
2.03k
                            break;
6834
7.35k
                        SKIP(3);
6835
7.35k
                        ignoreDepth--;
6836
4.20M
                    } else {
6837
4.20M
                        NEXT;
6838
4.20M
                    }
6839
4.22M
                }
6840
6841
4.23k
                ctxt->disableSAX = state;
6842
4.23k
                ctxt->instate = instate;
6843
6844
4.23k
    if (RAW == 0) {
6845
2.20k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6846
2.20k
                    goto error;
6847
2.20k
    }
6848
2.03k
                if (ctxt->input->id != id) {
6849
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6850
0
                                   "All markup of the conditional section is"
6851
0
                                   " not in the same entity\n");
6852
0
                }
6853
2.03k
                SKIP(3);
6854
2.53k
            } else {
6855
2.53k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6856
2.53k
                xmlHaltParser(ctxt);
6857
2.53k
                goto error;
6858
2.53k
            }
6859
38.2k
        } else if ((depth > 0) &&
6860
33.1k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6861
16.8k
            depth--;
6862
16.8k
            if (ctxt->input->id != inputIds[depth]) {
6863
372
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6864
372
                               "All markup of the conditional section is not"
6865
372
                               " in the same entity\n");
6866
372
            }
6867
16.8k
            SKIP(3);
6868
16.8k
        } else {
6869
16.3k
            int id = ctxt->input->id;
6870
16.3k
            unsigned long cons = CUR_CONSUMED;
6871
6872
16.3k
            xmlParseMarkupDecl(ctxt);
6873
6874
16.3k
            if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6875
2.77k
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6876
2.77k
                xmlHaltParser(ctxt);
6877
2.77k
                goto error;
6878
2.77k
            }
6879
16.3k
        }
6880
6881
63.4k
        if (depth == 0)
6882
4.75k
            break;
6883
6884
58.7k
        SKIP_BLANKS;
6885
58.7k
        GROW;
6886
58.7k
    }
6887
6888
12.9k
error:
6889
12.9k
    xmlFree(inputIds);
6890
12.9k
}
6891
6892
/**
6893
 * xmlParseMarkupDecl:
6894
 * @ctxt:  an XML parser context
6895
 *
6896
 * DEPRECATED: Internal function, don't use.
6897
 *
6898
 * parse Markup declarations
6899
 *
6900
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6901
 *                     NotationDecl | PI | Comment
6902
 *
6903
 * [ VC: Proper Declaration/PE Nesting ]
6904
 * Parameter-entity replacement text must be properly nested with
6905
 * markup declarations. That is to say, if either the first character
6906
 * or the last character of a markup declaration (markupdecl above) is
6907
 * contained in the replacement text for a parameter-entity reference,
6908
 * both must be contained in the same replacement text.
6909
 *
6910
 * [ WFC: PEs in Internal Subset ]
6911
 * In the internal DTD subset, parameter-entity references can occur
6912
 * only where markup declarations can occur, not within markup declarations.
6913
 * (This does not apply to references that occur in external parameter
6914
 * entities or to the external subset.)
6915
 */
6916
void
6917
5.60M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6918
5.60M
    GROW;
6919
5.60M
    if (CUR == '<') {
6920
5.33M
        if (NXT(1) == '!') {
6921
5.31M
      switch (NXT(2)) {
6922
3.01M
          case 'E':
6923
3.01M
        if (NXT(3) == 'L')
6924
1.53M
      xmlParseElementDecl(ctxt);
6925
1.48M
        else if (NXT(3) == 'N')
6926
1.48M
      xmlParseEntityDecl(ctxt);
6927
3.01M
        break;
6928
1.16M
          case 'A':
6929
1.16M
        xmlParseAttributeListDecl(ctxt);
6930
1.16M
        break;
6931
10.1k
          case 'N':
6932
10.1k
        xmlParseNotationDecl(ctxt);
6933
10.1k
        break;
6934
1.12M
          case '-':
6935
1.12M
        xmlParseComment(ctxt);
6936
1.12M
        break;
6937
4.71k
    default:
6938
        /* there is an error but it will be detected later */
6939
4.71k
        break;
6940
5.31M
      }
6941
5.31M
  } else if (NXT(1) == '?') {
6942
5.37k
      xmlParsePI(ctxt);
6943
5.37k
  }
6944
5.33M
    }
6945
6946
    /*
6947
     * detect requirement to exit there and act accordingly
6948
     * and avoid having instate overridden later on
6949
     */
6950
5.60M
    if (ctxt->instate == XML_PARSER_EOF)
6951
18.2k
        return;
6952
6953
5.58M
    ctxt->instate = XML_PARSER_DTD;
6954
5.58M
}
6955
6956
/**
6957
 * xmlParseTextDecl:
6958
 * @ctxt:  an XML parser context
6959
 *
6960
 * DEPRECATED: Internal function, don't use.
6961
 *
6962
 * parse an XML declaration header for external entities
6963
 *
6964
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6965
 */
6966
6967
void
6968
22.5k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6969
22.5k
    xmlChar *version;
6970
22.5k
    const xmlChar *encoding;
6971
22.5k
    int oldstate;
6972
6973
    /*
6974
     * We know that '<?xml' is here.
6975
     */
6976
22.5k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6977
22.1k
  SKIP(5);
6978
22.1k
    } else {
6979
423
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6980
423
  return;
6981
423
    }
6982
6983
    /* Avoid expansion of parameter entities when skipping blanks. */
6984
22.1k
    oldstate = ctxt->instate;
6985
22.1k
    ctxt->instate = XML_PARSER_START;
6986
6987
22.1k
    if (SKIP_BLANKS == 0) {
6988
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6989
0
           "Space needed after '<?xml'\n");
6990
0
    }
6991
6992
    /*
6993
     * We may have the VersionInfo here.
6994
     */
6995
22.1k
    version = xmlParseVersionInfo(ctxt);
6996
22.1k
    if (version == NULL)
6997
1.84k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6998
20.3k
    else {
6999
20.3k
  if (SKIP_BLANKS == 0) {
7000
932
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7001
932
               "Space needed here\n");
7002
932
  }
7003
20.3k
    }
7004
22.1k
    ctxt->input->version = version;
7005
7006
    /*
7007
     * We must have the encoding declaration
7008
     */
7009
22.1k
    encoding = xmlParseEncodingDecl(ctxt);
7010
22.1k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7011
  /*
7012
   * The XML REC instructs us to stop parsing right here
7013
   */
7014
216
        ctxt->instate = oldstate;
7015
216
        return;
7016
216
    }
7017
21.9k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7018
2.76k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7019
2.76k
           "Missing encoding in text declaration\n");
7020
2.76k
    }
7021
7022
21.9k
    SKIP_BLANKS;
7023
21.9k
    if ((RAW == '?') && (NXT(1) == '>')) {
7024
18.0k
        SKIP(2);
7025
18.0k
    } else if (RAW == '>') {
7026
        /* Deprecated old WD ... */
7027
162
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7028
162
  NEXT;
7029
3.71k
    } else {
7030
3.71k
        int c;
7031
7032
3.71k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7033
350k
        while ((c = CUR) != 0) {
7034
350k
            NEXT;
7035
350k
            if (c == '>')
7036
3.43k
                break;
7037
350k
        }
7038
3.71k
    }
7039
7040
21.9k
    ctxt->instate = oldstate;
7041
21.9k
}
7042
7043
/**
7044
 * xmlParseExternalSubset:
7045
 * @ctxt:  an XML parser context
7046
 * @ExternalID: the external identifier
7047
 * @SystemID: the system identifier (or URL)
7048
 *
7049
 * parse Markup declarations from an external subset
7050
 *
7051
 * [30] extSubset ::= textDecl? extSubsetDecl
7052
 *
7053
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7054
 */
7055
void
7056
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7057
56.7k
                       const xmlChar *SystemID) {
7058
56.7k
    xmlDetectSAX2(ctxt);
7059
56.7k
    GROW;
7060
7061
56.7k
    if ((ctxt->encoding == NULL) &&
7062
56.7k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7063
56.5k
        xmlChar start[4];
7064
56.5k
  xmlCharEncoding enc;
7065
7066
56.5k
  start[0] = RAW;
7067
56.5k
  start[1] = NXT(1);
7068
56.5k
  start[2] = NXT(2);
7069
56.5k
  start[3] = NXT(3);
7070
56.5k
  enc = xmlDetectCharEncoding(start, 4);
7071
56.5k
  if (enc != XML_CHAR_ENCODING_NONE)
7072
12.8k
      xmlSwitchEncoding(ctxt, enc);
7073
56.5k
    }
7074
7075
56.7k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7076
12.4k
  xmlParseTextDecl(ctxt);
7077
12.4k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7078
      /*
7079
       * The XML REC instructs us to stop parsing right here
7080
       */
7081
176
      xmlHaltParser(ctxt);
7082
176
      return;
7083
176
  }
7084
12.4k
    }
7085
56.5k
    if (ctxt->myDoc == NULL) {
7086
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7087
0
  if (ctxt->myDoc == NULL) {
7088
0
      xmlErrMemory(ctxt, "New Doc failed");
7089
0
      return;
7090
0
  }
7091
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7092
0
    }
7093
56.5k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7094
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7095
7096
56.5k
    ctxt->instate = XML_PARSER_DTD;
7097
56.5k
    ctxt->external = 1;
7098
56.5k
    SKIP_BLANKS;
7099
2.65M
    while (((RAW == '<') && (NXT(1) == '?')) ||
7100
2.65M
           ((RAW == '<') && (NXT(1) == '!')) ||
7101
2.65M
     (RAW == '%')) {
7102
2.60M
  int id = ctxt->input->id;
7103
2.60M
  unsigned long cons = CUR_CONSUMED;
7104
7105
2.60M
  GROW;
7106
2.60M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7107
12.9k
      xmlParseConditionalSections(ctxt);
7108
12.9k
  } else
7109
2.59M
      xmlParseMarkupDecl(ctxt);
7110
2.60M
        SKIP_BLANKS;
7111
7112
2.60M
  if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7113
7.31k
      xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7114
7.31k
      break;
7115
7.31k
  }
7116
2.60M
    }
7117
7118
56.5k
    if (RAW != 0) {
7119
22.7k
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7120
22.7k
    }
7121
7122
56.5k
}
7123
7124
/**
7125
 * xmlParseReference:
7126
 * @ctxt:  an XML parser context
7127
 *
7128
 * DEPRECATED: Internal function, don't use.
7129
 *
7130
 * parse and handle entity references in content, depending on the SAX
7131
 * interface, this may end-up in a call to character() if this is a
7132
 * CharRef, a predefined entity, if there is no reference() callback.
7133
 * or if the parser was asked to switch to that mode.
7134
 *
7135
 * [67] Reference ::= EntityRef | CharRef
7136
 */
7137
void
7138
55.5M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7139
55.5M
    xmlEntityPtr ent;
7140
55.5M
    xmlChar *val;
7141
55.5M
    int was_checked;
7142
55.5M
    xmlNodePtr list = NULL;
7143
55.5M
    xmlParserErrors ret = XML_ERR_OK;
7144
7145
7146
55.5M
    if (RAW != '&')
7147
0
        return;
7148
7149
    /*
7150
     * Simple case of a CharRef
7151
     */
7152
55.5M
    if (NXT(1) == '#') {
7153
8.54M
  int i = 0;
7154
8.54M
  xmlChar out[16];
7155
8.54M
  int hex = NXT(2);
7156
8.54M
  int value = xmlParseCharRef(ctxt);
7157
7158
8.54M
  if (value == 0)
7159
690k
      return;
7160
7.85M
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7161
      /*
7162
       * So we are using non-UTF-8 buffers
7163
       * Check that the char fit on 8bits, if not
7164
       * generate a CharRef.
7165
       */
7166
6.78M
      if (value <= 0xFF) {
7167
6.71M
    out[0] = value;
7168
6.71M
    out[1] = 0;
7169
6.71M
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7170
6.71M
        (!ctxt->disableSAX))
7171
768k
        ctxt->sax->characters(ctxt->userData, out, 1);
7172
6.71M
      } else {
7173
66.9k
    if ((hex == 'x') || (hex == 'X'))
7174
1.01k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7175
65.9k
    else
7176
65.9k
        snprintf((char *)out, sizeof(out), "#%d", value);
7177
66.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7178
66.9k
        (!ctxt->disableSAX))
7179
8.17k
        ctxt->sax->reference(ctxt->userData, out);
7180
66.9k
      }
7181
6.78M
  } else {
7182
      /*
7183
       * Just encode the value in UTF-8
7184
       */
7185
1.06M
      COPY_BUF(0 ,out, i, value);
7186
1.06M
      out[i] = 0;
7187
1.06M
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7188
1.06M
    (!ctxt->disableSAX))
7189
820k
    ctxt->sax->characters(ctxt->userData, out, i);
7190
1.06M
  }
7191
7.85M
  return;
7192
8.54M
    }
7193
7194
    /*
7195
     * We are seeing an entity reference
7196
     */
7197
46.9M
    ent = xmlParseEntityRef(ctxt);
7198
46.9M
    if (ent == NULL) return;
7199
14.1M
    if (!ctxt->wellFormed)
7200
8.74M
  return;
7201
5.44M
    was_checked = ent->checked;
7202
7203
    /* special case of predefined entities */
7204
5.44M
    if ((ent->name == NULL) ||
7205
5.44M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7206
118k
  val = ent->content;
7207
118k
  if (val == NULL) return;
7208
  /*
7209
   * inline the entity.
7210
   */
7211
118k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7212
118k
      (!ctxt->disableSAX))
7213
118k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7214
118k
  return;
7215
118k
    }
7216
7217
    /*
7218
     * The first reference to the entity trigger a parsing phase
7219
     * where the ent->children is filled with the result from
7220
     * the parsing.
7221
     * Note: external parsed entities will not be loaded, it is not
7222
     * required for a non-validating parser, unless the parsing option
7223
     * of validating, or substituting entities were given. Doing so is
7224
     * far more secure as the parser will only process data coming from
7225
     * the document entity by default.
7226
     */
7227
5.32M
    if (((ent->checked == 0) ||
7228
5.32M
         ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7229
5.32M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7230
5.23M
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7231
5.23M
  unsigned long oldnbent = ctxt->nbentities, diff;
7232
7233
  /*
7234
   * This is a bit hackish but this seems the best
7235
   * way to make sure both SAX and DOM entity support
7236
   * behaves okay.
7237
   */
7238
5.23M
  void *user_data;
7239
5.23M
  if (ctxt->userData == ctxt)
7240
5.23M
      user_data = NULL;
7241
0
  else
7242
0
      user_data = ctxt->userData;
7243
7244
  /*
7245
   * Check that this entity is well formed
7246
   * 4.3.2: An internal general parsed entity is well-formed
7247
   * if its replacement text matches the production labeled
7248
   * content.
7249
   */
7250
5.23M
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7251
276k
      ctxt->depth++;
7252
276k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7253
276k
                                                user_data, &list);
7254
276k
      ctxt->depth--;
7255
7256
4.95M
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7257
4.95M
      ctxt->depth++;
7258
4.95M
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7259
4.95M
                                     user_data, ctxt->depth, ent->URI,
7260
4.95M
             ent->ExternalID, &list);
7261
4.95M
      ctxt->depth--;
7262
4.95M
  } else {
7263
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7264
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7265
0
       "invalid entity type found\n", NULL);
7266
0
  }
7267
7268
  /*
7269
   * Store the number of entities needing parsing for this entity
7270
   * content and do checkings
7271
   */
7272
5.23M
        diff = ctxt->nbentities - oldnbent + 1;
7273
5.23M
        if (diff > INT_MAX / 2)
7274
0
            diff = INT_MAX / 2;
7275
5.23M
        ent->checked = diff * 2;
7276
5.23M
  if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7277
67.1k
      ent->checked |= 1;
7278
5.23M
  if (ret == XML_ERR_ENTITY_LOOP) {
7279
603k
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7280
603k
            xmlHaltParser(ctxt);
7281
603k
      xmlFreeNodeList(list);
7282
603k
      return;
7283
603k
  }
7284
4.62M
  if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7285
1.97k
      xmlFreeNodeList(list);
7286
1.97k
      return;
7287
1.97k
  }
7288
7289
4.62M
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7290
38.5k
      if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7291
38.5k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7292
38.5k
    (ent->children == NULL)) {
7293
38.1k
    ent->children = list;
7294
                /*
7295
                 * Prune it directly in the generated document
7296
                 * except for single text nodes.
7297
                 */
7298
38.1k
                if ((ctxt->replaceEntities == 0) ||
7299
38.1k
                    (ctxt->parseMode == XML_PARSE_READER) ||
7300
38.1k
                    ((list->type == XML_TEXT_NODE) &&
7301
35.4k
                     (list->next == NULL))) {
7302
35.4k
                    ent->owner = 1;
7303
81.3k
                    while (list != NULL) {
7304
45.8k
                        list->parent = (xmlNodePtr) ent;
7305
45.8k
                        if (list->doc != ent->doc)
7306
0
                            xmlSetTreeDoc(list, ent->doc);
7307
45.8k
                        if (list->next == NULL)
7308
35.4k
                            ent->last = list;
7309
45.8k
                        list = list->next;
7310
45.8k
                    }
7311
35.4k
                    list = NULL;
7312
35.4k
                } else {
7313
2.69k
                    ent->owner = 0;
7314
9.41k
                    while (list != NULL) {
7315
6.71k
                        list->parent = (xmlNodePtr) ctxt->node;
7316
6.71k
                        list->doc = ctxt->myDoc;
7317
6.71k
                        if (list->next == NULL)
7318
2.69k
                            ent->last = list;
7319
6.71k
                        list = list->next;
7320
6.71k
                    }
7321
2.69k
                    list = ent->children;
7322
#ifdef LIBXML_LEGACY_ENABLED
7323
                    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7324
                        xmlAddEntityReference(ent, list, NULL);
7325
#endif /* LIBXML_LEGACY_ENABLED */
7326
2.69k
                }
7327
38.1k
      } else {
7328
369
    xmlFreeNodeList(list);
7329
369
    list = NULL;
7330
369
      }
7331
4.58M
  } else if ((ret != XML_ERR_OK) &&
7332
4.58M
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7333
4.55M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7334
4.55M
         "Entity '%s' failed to parse\n", ent->name);
7335
4.55M
            if (ent->content != NULL)
7336
139k
                ent->content[0] = 0;
7337
4.55M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
7338
4.55M
  } else if (list != NULL) {
7339
0
      xmlFreeNodeList(list);
7340
0
      list = NULL;
7341
0
  }
7342
4.62M
  if (ent->checked == 0)
7343
0
      ent->checked = 2;
7344
7345
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7346
4.62M
        was_checked = 0;
7347
4.62M
    } else if (ent->checked != 1) {
7348
94.1k
  ctxt->nbentities += ent->checked / 2;
7349
94.1k
    }
7350
7351
    /*
7352
     * Now that the entity content has been gathered
7353
     * provide it to the application, this can take different forms based
7354
     * on the parsing modes.
7355
     */
7356
4.72M
    if (ent->children == NULL) {
7357
  /*
7358
   * Probably running in SAX mode and the callbacks don't
7359
   * build the entity content. So unless we already went
7360
   * though parsing for first checking go though the entity
7361
   * content to generate callbacks associated to the entity
7362
   */
7363
4.61M
  if (was_checked != 0) {
7364
34.4k
      void *user_data;
7365
      /*
7366
       * This is a bit hackish but this seems the best
7367
       * way to make sure both SAX and DOM entity support
7368
       * behaves okay.
7369
       */
7370
34.4k
      if (ctxt->userData == ctxt)
7371
34.4k
    user_data = NULL;
7372
0
      else
7373
0
    user_data = ctxt->userData;
7374
7375
34.4k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7376
22.2k
    ctxt->depth++;
7377
22.2k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7378
22.2k
           ent->content, user_data, NULL);
7379
22.2k
    ctxt->depth--;
7380
22.2k
      } else if (ent->etype ==
7381
12.1k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7382
12.1k
    ctxt->depth++;
7383
12.1k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7384
12.1k
         ctxt->sax, user_data, ctxt->depth,
7385
12.1k
         ent->URI, ent->ExternalID, NULL);
7386
12.1k
    ctxt->depth--;
7387
12.1k
      } else {
7388
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7389
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7390
0
           "invalid entity type found\n", NULL);
7391
0
      }
7392
34.4k
      if (ret == XML_ERR_ENTITY_LOOP) {
7393
189
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7394
189
    return;
7395
189
      }
7396
34.4k
  }
7397
4.61M
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7398
4.61M
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7399
      /*
7400
       * Entity reference callback comes second, it's somewhat
7401
       * superfluous but a compatibility to historical behaviour
7402
       */
7403
50.4k
      ctxt->sax->reference(ctxt->userData, ent->name);
7404
50.4k
  }
7405
4.61M
  return;
7406
4.61M
    }
7407
7408
    /*
7409
     * If we didn't get any children for the entity being built
7410
     */
7411
107k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7412
107k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7413
  /*
7414
   * Create a node.
7415
   */
7416
50.9k
  ctxt->sax->reference(ctxt->userData, ent->name);
7417
50.9k
  return;
7418
50.9k
    }
7419
7420
56.6k
    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7421
  /*
7422
   * There is a problem on the handling of _private for entities
7423
   * (bug 155816): Should we copy the content of the field from
7424
   * the entity (possibly overwriting some value set by the user
7425
   * when a copy is created), should we leave it alone, or should
7426
   * we try to take care of different situations?  The problem
7427
   * is exacerbated by the usage of this field by the xmlReader.
7428
   * To fix this bug, we look at _private on the created node
7429
   * and, if it's NULL, we copy in whatever was in the entity.
7430
   * If it's not NULL we leave it alone.  This is somewhat of a
7431
   * hack - maybe we should have further tests to determine
7432
   * what to do.
7433
   */
7434
45.4k
  if ((ctxt->node != NULL) && (ent->children != NULL)) {
7435
      /*
7436
       * Seems we are generating the DOM content, do
7437
       * a simple tree copy for all references except the first
7438
       * In the first occurrence list contains the replacement.
7439
       */
7440
45.4k
      if (((list == NULL) && (ent->owner == 0)) ||
7441
45.4k
    (ctxt->parseMode == XML_PARSE_READER)) {
7442
14.1k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7443
7444
    /*
7445
     * We are copying here, make sure there is no abuse
7446
     */
7447
14.1k
    ctxt->sizeentcopy += ent->length + 5;
7448
14.1k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7449
0
        return;
7450
7451
    /*
7452
     * when operating on a reader, the entities definitions
7453
     * are always owning the entities subtree.
7454
    if (ctxt->parseMode == XML_PARSE_READER)
7455
        ent->owner = 1;
7456
     */
7457
7458
14.1k
    cur = ent->children;
7459
18.5k
    while (cur != NULL) {
7460
18.5k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7461
18.5k
        if (nw != NULL) {
7462
18.5k
      if (nw->_private == NULL)
7463
18.5k
          nw->_private = cur->_private;
7464
18.5k
      if (firstChild == NULL){
7465
14.1k
          firstChild = nw;
7466
14.1k
      }
7467
18.5k
      nw = xmlAddChild(ctxt->node, nw);
7468
18.5k
        }
7469
18.5k
        if (cur == ent->last) {
7470
      /*
7471
       * needed to detect some strange empty
7472
       * node cases in the reader tests
7473
       */
7474
14.1k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7475
14.1k
          (nw != NULL) &&
7476
14.1k
          (nw->type == XML_ELEMENT_NODE) &&
7477
14.1k
          (nw->children == NULL))
7478
558
          nw->extra = 1;
7479
7480
14.1k
      break;
7481
14.1k
        }
7482
4.38k
        cur = cur->next;
7483
4.38k
    }
7484
#ifdef LIBXML_LEGACY_ENABLED
7485
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7486
      xmlAddEntityReference(ent, firstChild, nw);
7487
#endif /* LIBXML_LEGACY_ENABLED */
7488
31.2k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7489
31.2k
    xmlNodePtr nw = NULL, cur, next, last,
7490
31.2k
         firstChild = NULL;
7491
7492
    /*
7493
     * We are copying here, make sure there is no abuse
7494
     */
7495
31.2k
    ctxt->sizeentcopy += ent->length + 5;
7496
31.2k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7497
0
        return;
7498
7499
    /*
7500
     * Copy the entity child list and make it the new
7501
     * entity child list. The goal is to make sure any
7502
     * ID or REF referenced will be the one from the
7503
     * document content and not the entity copy.
7504
     */
7505
31.2k
    cur = ent->children;
7506
31.2k
    ent->children = NULL;
7507
31.2k
    last = ent->last;
7508
31.2k
    ent->last = NULL;
7509
43.1k
    while (cur != NULL) {
7510
43.1k
        next = cur->next;
7511
43.1k
        cur->next = NULL;
7512
43.1k
        cur->parent = NULL;
7513
43.1k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7514
43.1k
        if (nw != NULL) {
7515
43.1k
      if (nw->_private == NULL)
7516
43.1k
          nw->_private = cur->_private;
7517
43.1k
      if (firstChild == NULL){
7518
31.2k
          firstChild = cur;
7519
31.2k
      }
7520
43.1k
      xmlAddChild((xmlNodePtr) ent, nw);
7521
43.1k
      xmlAddChild(ctxt->node, cur);
7522
43.1k
        }
7523
43.1k
        if (cur == last)
7524
31.2k
      break;
7525
11.9k
        cur = next;
7526
11.9k
    }
7527
31.2k
    if (ent->owner == 0)
7528
2.69k
        ent->owner = 1;
7529
#ifdef LIBXML_LEGACY_ENABLED
7530
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7531
      xmlAddEntityReference(ent, firstChild, nw);
7532
#endif /* LIBXML_LEGACY_ENABLED */
7533
31.2k
      } else {
7534
0
    const xmlChar *nbktext;
7535
7536
    /*
7537
     * the name change is to avoid coalescing of the
7538
     * node with a possible previous text one which
7539
     * would make ent->children a dangling pointer
7540
     */
7541
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7542
0
          -1);
7543
0
    if (ent->children->type == XML_TEXT_NODE)
7544
0
        ent->children->name = nbktext;
7545
0
    if ((ent->last != ent->children) &&
7546
0
        (ent->last->type == XML_TEXT_NODE))
7547
0
        ent->last->name = nbktext;
7548
0
    xmlAddChildList(ctxt->node, ent->children);
7549
0
      }
7550
7551
      /*
7552
       * This is to avoid a nasty side effect, see
7553
       * characters() in SAX.c
7554
       */
7555
45.4k
      ctxt->nodemem = 0;
7556
45.4k
      ctxt->nodelen = 0;
7557
45.4k
      return;
7558
45.4k
  }
7559
45.4k
    }
7560
56.6k
}
7561
7562
/**
7563
 * xmlParseEntityRef:
7564
 * @ctxt:  an XML parser context
7565
 *
7566
 * DEPRECATED: Internal function, don't use.
7567
 *
7568
 * parse ENTITY references declarations
7569
 *
7570
 * [68] EntityRef ::= '&' Name ';'
7571
 *
7572
 * [ WFC: Entity Declared ]
7573
 * In a document without any DTD, a document with only an internal DTD
7574
 * subset which contains no parameter entity references, or a document
7575
 * with "standalone='yes'", the Name given in the entity reference
7576
 * must match that in an entity declaration, except that well-formed
7577
 * documents need not declare any of the following entities: amp, lt,
7578
 * gt, apos, quot.  The declaration of a parameter entity must precede
7579
 * any reference to it.  Similarly, the declaration of a general entity
7580
 * must precede any reference to it which appears in a default value in an
7581
 * attribute-list declaration. Note that if entities are declared in the
7582
 * external subset or in external parameter entities, a non-validating
7583
 * processor is not obligated to read and process their declarations;
7584
 * for such documents, the rule that an entity must be declared is a
7585
 * well-formedness constraint only if standalone='yes'.
7586
 *
7587
 * [ WFC: Parsed Entity ]
7588
 * An entity reference must not contain the name of an unparsed entity
7589
 *
7590
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7591
 */
7592
xmlEntityPtr
7593
63.3M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7594
63.3M
    const xmlChar *name;
7595
63.3M
    xmlEntityPtr ent = NULL;
7596
7597
63.3M
    GROW;
7598
63.3M
    if (ctxt->instate == XML_PARSER_EOF)
7599
0
        return(NULL);
7600
7601
63.3M
    if (RAW != '&')
7602
0
        return(NULL);
7603
63.3M
    NEXT;
7604
63.3M
    name = xmlParseName(ctxt);
7605
63.3M
    if (name == NULL) {
7606
35.3M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7607
35.3M
           "xmlParseEntityRef: no name\n");
7608
35.3M
        return(NULL);
7609
35.3M
    }
7610
27.9M
    if (RAW != ';') {
7611
3.45M
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7612
3.45M
  return(NULL);
7613
3.45M
    }
7614
24.5M
    NEXT;
7615
7616
    /*
7617
     * Predefined entities override any extra definition
7618
     */
7619
24.5M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7620
8.89M
        ent = xmlGetPredefinedEntity(name);
7621
8.89M
        if (ent != NULL)
7622
3.60M
            return(ent);
7623
8.89M
    }
7624
7625
    /*
7626
     * Increase the number of entity references parsed
7627
     */
7628
20.9M
    ctxt->nbentities++;
7629
7630
    /*
7631
     * Ask first SAX for entity resolution, otherwise try the
7632
     * entities which may have stored in the parser context.
7633
     */
7634
20.9M
    if (ctxt->sax != NULL) {
7635
20.9M
  if (ctxt->sax->getEntity != NULL)
7636
20.9M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7637
20.9M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7638
20.9M
      (ctxt->options & XML_PARSE_OLDSAX))
7639
4.29k
      ent = xmlGetPredefinedEntity(name);
7640
20.9M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7641
20.9M
      (ctxt->userData==ctxt)) {
7642
17.1k
      ent = xmlSAX2GetEntity(ctxt, name);
7643
17.1k
  }
7644
20.9M
    }
7645
20.9M
    if (ctxt->instate == XML_PARSER_EOF)
7646
0
  return(NULL);
7647
    /*
7648
     * [ WFC: Entity Declared ]
7649
     * In a document without any DTD, a document with only an
7650
     * internal DTD subset which contains no parameter entity
7651
     * references, or a document with "standalone='yes'", the
7652
     * Name given in the entity reference must match that in an
7653
     * entity declaration, except that well-formed documents
7654
     * need not declare any of the following entities: amp, lt,
7655
     * gt, apos, quot.
7656
     * The declaration of a parameter entity must precede any
7657
     * reference to it.
7658
     * Similarly, the declaration of a general entity must
7659
     * precede any reference to it which appears in a default
7660
     * value in an attribute-list declaration. Note that if
7661
     * entities are declared in the external subset or in
7662
     * external parameter entities, a non-validating processor
7663
     * is not obligated to read and process their declarations;
7664
     * for such documents, the rule that an entity must be
7665
     * declared is a well-formedness constraint only if
7666
     * standalone='yes'.
7667
     */
7668
20.9M
    if (ent == NULL) {
7669
4.60M
  if ((ctxt->standalone == 1) ||
7670
4.60M
      ((ctxt->hasExternalSubset == 0) &&
7671
4.59M
       (ctxt->hasPErefs == 0))) {
7672
4.51M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7673
4.51M
         "Entity '%s' not defined\n", name);
7674
4.51M
  } else {
7675
84.6k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7676
84.6k
         "Entity '%s' not defined\n", name);
7677
84.6k
      if ((ctxt->inSubset == 0) &&
7678
84.6k
    (ctxt->sax != NULL) &&
7679
84.6k
    (ctxt->sax->reference != NULL)) {
7680
83.9k
    ctxt->sax->reference(ctxt->userData, name);
7681
83.9k
      }
7682
84.6k
  }
7683
4.60M
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7684
4.60M
  ctxt->valid = 0;
7685
4.60M
    }
7686
7687
    /*
7688
     * [ WFC: Parsed Entity ]
7689
     * An entity reference must not contain the name of an
7690
     * unparsed entity
7691
     */
7692
16.3M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7693
916
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7694
916
     "Entity reference to unparsed entity %s\n", name);
7695
916
    }
7696
7697
    /*
7698
     * [ WFC: No External Entity References ]
7699
     * Attribute values cannot contain direct or indirect
7700
     * entity references to external entities.
7701
     */
7702
16.3M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7703
16.3M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7704
34.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7705
34.5k
       "Attribute references external entity '%s'\n", name);
7706
34.5k
    }
7707
    /*
7708
     * [ WFC: No < in Attribute Values ]
7709
     * The replacement text of any entity referred to directly or
7710
     * indirectly in an attribute value (other than "&lt;") must
7711
     * not contain a <.
7712
     */
7713
16.2M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7714
16.2M
       (ent != NULL) && 
7715
16.2M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7716
167k
  if (((ent->checked & 1) || (ent->checked == 0)) &&
7717
167k
       (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7718
4.23k
      xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7719
4.23k
  "'<' in entity '%s' is not allowed in attributes values\n", name);
7720
4.23k
        }
7721
167k
    }
7722
7723
    /*
7724
     * Internal check, no parameter entities here ...
7725
     */
7726
16.1M
    else {
7727
16.1M
  switch (ent->etype) {
7728
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7729
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7730
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7731
0
       "Attempt to reference the parameter entity '%s'\n",
7732
0
            name);
7733
0
      break;
7734
16.1M
      default:
7735
16.1M
      break;
7736
16.1M
  }
7737
16.1M
    }
7738
7739
    /*
7740
     * [ WFC: No Recursion ]
7741
     * A parsed entity must not contain a recursive reference
7742
     * to itself, either directly or indirectly.
7743
     * Done somewhere else
7744
     */
7745
20.9M
    return(ent);
7746
20.9M
}
7747
7748
/**
7749
 * xmlParseStringEntityRef:
7750
 * @ctxt:  an XML parser context
7751
 * @str:  a pointer to an index in the string
7752
 *
7753
 * parse ENTITY references declarations, but this version parses it from
7754
 * a string value.
7755
 *
7756
 * [68] EntityRef ::= '&' Name ';'
7757
 *
7758
 * [ WFC: Entity Declared ]
7759
 * In a document without any DTD, a document with only an internal DTD
7760
 * subset which contains no parameter entity references, or a document
7761
 * with "standalone='yes'", the Name given in the entity reference
7762
 * must match that in an entity declaration, except that well-formed
7763
 * documents need not declare any of the following entities: amp, lt,
7764
 * gt, apos, quot.  The declaration of a parameter entity must precede
7765
 * any reference to it.  Similarly, the declaration of a general entity
7766
 * must precede any reference to it which appears in a default value in an
7767
 * attribute-list declaration. Note that if entities are declared in the
7768
 * external subset or in external parameter entities, a non-validating
7769
 * processor is not obligated to read and process their declarations;
7770
 * for such documents, the rule that an entity must be declared is a
7771
 * well-formedness constraint only if standalone='yes'.
7772
 *
7773
 * [ WFC: Parsed Entity ]
7774
 * An entity reference must not contain the name of an unparsed entity
7775
 *
7776
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7777
 * is updated to the current location in the string.
7778
 */
7779
static xmlEntityPtr
7780
686k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7781
686k
    xmlChar *name;
7782
686k
    const xmlChar *ptr;
7783
686k
    xmlChar cur;
7784
686k
    xmlEntityPtr ent = NULL;
7785
7786
686k
    if ((str == NULL) || (*str == NULL))
7787
0
        return(NULL);
7788
686k
    ptr = *str;
7789
686k
    cur = *ptr;
7790
686k
    if (cur != '&')
7791
0
  return(NULL);
7792
7793
686k
    ptr++;
7794
686k
    name = xmlParseStringName(ctxt, &ptr);
7795
686k
    if (name == NULL) {
7796
26.0k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7797
26.0k
           "xmlParseStringEntityRef: no name\n");
7798
26.0k
  *str = ptr;
7799
26.0k
  return(NULL);
7800
26.0k
    }
7801
660k
    if (*ptr != ';') {
7802
20.6k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7803
20.6k
        xmlFree(name);
7804
20.6k
  *str = ptr;
7805
20.6k
  return(NULL);
7806
20.6k
    }
7807
639k
    ptr++;
7808
7809
7810
    /*
7811
     * Predefined entities override any extra definition
7812
     */
7813
639k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7814
223k
        ent = xmlGetPredefinedEntity(name);
7815
223k
        if (ent != NULL) {
7816
25.8k
            xmlFree(name);
7817
25.8k
            *str = ptr;
7818
25.8k
            return(ent);
7819
25.8k
        }
7820
223k
    }
7821
7822
    /*
7823
     * Increase the number of entity references parsed
7824
     */
7825
614k
    ctxt->nbentities++;
7826
7827
    /*
7828
     * Ask first SAX for entity resolution, otherwise try the
7829
     * entities which may have stored in the parser context.
7830
     */
7831
614k
    if (ctxt->sax != NULL) {
7832
614k
  if (ctxt->sax->getEntity != NULL)
7833
614k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7834
614k
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7835
51.4k
      ent = xmlGetPredefinedEntity(name);
7836
614k
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7837
77.0k
      ent = xmlSAX2GetEntity(ctxt, name);
7838
77.0k
  }
7839
614k
    }
7840
614k
    if (ctxt->instate == XML_PARSER_EOF) {
7841
0
  xmlFree(name);
7842
0
  return(NULL);
7843
0
    }
7844
7845
    /*
7846
     * [ WFC: Entity Declared ]
7847
     * In a document without any DTD, a document with only an
7848
     * internal DTD subset which contains no parameter entity
7849
     * references, or a document with "standalone='yes'", the
7850
     * Name given in the entity reference must match that in an
7851
     * entity declaration, except that well-formed documents
7852
     * need not declare any of the following entities: amp, lt,
7853
     * gt, apos, quot.
7854
     * The declaration of a parameter entity must precede any
7855
     * reference to it.
7856
     * Similarly, the declaration of a general entity must
7857
     * precede any reference to it which appears in a default
7858
     * value in an attribute-list declaration. Note that if
7859
     * entities are declared in the external subset or in
7860
     * external parameter entities, a non-validating processor
7861
     * is not obligated to read and process their declarations;
7862
     * for such documents, the rule that an entity must be
7863
     * declared is a well-formedness constraint only if
7864
     * standalone='yes'.
7865
     */
7866
614k
    if (ent == NULL) {
7867
77.0k
  if ((ctxt->standalone == 1) ||
7868
77.0k
      ((ctxt->hasExternalSubset == 0) &&
7869
76.5k
       (ctxt->hasPErefs == 0))) {
7870
73.4k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7871
73.4k
         "Entity '%s' not defined\n", name);
7872
73.4k
  } else {
7873
3.62k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7874
3.62k
        "Entity '%s' not defined\n",
7875
3.62k
        name);
7876
3.62k
  }
7877
77.0k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7878
  /* TODO ? check regressions ctxt->valid = 0; */
7879
77.0k
    }
7880
7881
    /*
7882
     * [ WFC: Parsed Entity ]
7883
     * An entity reference must not contain the name of an
7884
     * unparsed entity
7885
     */
7886
537k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7887
18.8k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7888
18.8k
     "Entity reference to unparsed entity %s\n", name);
7889
18.8k
    }
7890
7891
    /*
7892
     * [ WFC: No External Entity References ]
7893
     * Attribute values cannot contain direct or indirect
7894
     * entity references to external entities.
7895
     */
7896
518k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7897
518k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7898
2.87k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7899
2.87k
   "Attribute references external entity '%s'\n", name);
7900
2.87k
    }
7901
    /*
7902
     * [ WFC: No < in Attribute Values ]
7903
     * The replacement text of any entity referred to directly or
7904
     * indirectly in an attribute value (other than "&lt;") must
7905
     * not contain a <.
7906
     */
7907
515k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7908
515k
       (ent != NULL) && (ent->content != NULL) &&
7909
515k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7910
515k
       (xmlStrchr(ent->content, '<'))) {
7911
94.6k
  xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7912
94.6k
     "'<' in entity '%s' is not allowed in attributes values\n",
7913
94.6k
        name);
7914
94.6k
    }
7915
7916
    /*
7917
     * Internal check, no parameter entities here ...
7918
     */
7919
420k
    else {
7920
420k
  switch (ent->etype) {
7921
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7922
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7923
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7924
0
       "Attempt to reference the parameter entity '%s'\n",
7925
0
          name);
7926
0
      break;
7927
420k
      default:
7928
420k
      break;
7929
420k
  }
7930
420k
    }
7931
7932
    /*
7933
     * [ WFC: No Recursion ]
7934
     * A parsed entity must not contain a recursive reference
7935
     * to itself, either directly or indirectly.
7936
     * Done somewhere else
7937
     */
7938
7939
614k
    xmlFree(name);
7940
614k
    *str = ptr;
7941
614k
    return(ent);
7942
614k
}
7943
7944
/**
7945
 * xmlParsePEReference:
7946
 * @ctxt:  an XML parser context
7947
 *
7948
 * DEPRECATED: Internal function, don't use.
7949
 *
7950
 * parse PEReference declarations
7951
 * The entity content is handled directly by pushing it's content as
7952
 * a new input stream.
7953
 *
7954
 * [69] PEReference ::= '%' Name ';'
7955
 *
7956
 * [ WFC: No Recursion ]
7957
 * A parsed entity must not contain a recursive
7958
 * reference to itself, either directly or indirectly.
7959
 *
7960
 * [ WFC: Entity Declared ]
7961
 * In a document without any DTD, a document with only an internal DTD
7962
 * subset which contains no parameter entity references, or a document
7963
 * with "standalone='yes'", ...  ... The declaration of a parameter
7964
 * entity must precede any reference to it...
7965
 *
7966
 * [ VC: Entity Declared ]
7967
 * In a document with an external subset or external parameter entities
7968
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7969
 * must precede any reference to it...
7970
 *
7971
 * [ WFC: In DTD ]
7972
 * Parameter-entity references may only appear in the DTD.
7973
 * NOTE: misleading but this is handled.
7974
 */
7975
void
7976
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7977
4.27M
{
7978
4.27M
    const xmlChar *name;
7979
4.27M
    xmlEntityPtr entity = NULL;
7980
4.27M
    xmlParserInputPtr input;
7981
7982
4.27M
    if (RAW != '%')
7983
2.95M
        return;
7984
1.32M
    NEXT;
7985
1.32M
    name = xmlParseName(ctxt);
7986
1.32M
    if (name == NULL) {
7987
132k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7988
132k
  return;
7989
132k
    }
7990
1.19M
    if (xmlParserDebugEntities)
7991
0
  xmlGenericError(xmlGenericErrorContext,
7992
0
    "PEReference: %s\n", name);
7993
1.19M
    if (RAW != ';') {
7994
5.00k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7995
5.00k
        return;
7996
5.00k
    }
7997
7998
1.18M
    NEXT;
7999
8000
    /*
8001
     * Increase the number of entity references parsed
8002
     */
8003
1.18M
    ctxt->nbentities++;
8004
8005
    /*
8006
     * Request the entity from SAX
8007
     */
8008
1.18M
    if ((ctxt->sax != NULL) &&
8009
1.18M
  (ctxt->sax->getParameterEntity != NULL))
8010
1.18M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8011
1.18M
    if (ctxt->instate == XML_PARSER_EOF)
8012
0
  return;
8013
1.18M
    if (entity == NULL) {
8014
  /*
8015
   * [ WFC: Entity Declared ]
8016
   * In a document without any DTD, a document with only an
8017
   * internal DTD subset which contains no parameter entity
8018
   * references, or a document with "standalone='yes'", ...
8019
   * ... The declaration of a parameter entity must precede
8020
   * any reference to it...
8021
   */
8022
79.2k
  if ((ctxt->standalone == 1) ||
8023
79.2k
      ((ctxt->hasExternalSubset == 0) &&
8024
79.1k
       (ctxt->hasPErefs == 0))) {
8025
1.42k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8026
1.42k
            "PEReference: %%%s; not found\n",
8027
1.42k
            name);
8028
77.8k
  } else {
8029
      /*
8030
       * [ VC: Entity Declared ]
8031
       * In a document with an external subset or external
8032
       * parameter entities with "standalone='no'", ...
8033
       * ... The declaration of a parameter entity must
8034
       * precede any reference to it...
8035
       */
8036
77.8k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8037
14.0k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8038
14.0k
                                 "PEReference: %%%s; not found\n",
8039
14.0k
                                 name, NULL);
8040
14.0k
            } else
8041
63.8k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8042
63.8k
                              "PEReference: %%%s; not found\n",
8043
63.8k
                              name, NULL);
8044
77.8k
            ctxt->valid = 0;
8045
77.8k
  }
8046
79.2k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8047
1.10M
    } else {
8048
  /*
8049
   * Internal checking in case the entity quest barfed
8050
   */
8051
1.10M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8052
1.10M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8053
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8054
0
      "Internal: %%%s; is not a parameter entity\n",
8055
0
        name, NULL);
8056
1.10M
  } else {
8057
1.10M
            xmlChar start[4];
8058
1.10M
            xmlCharEncoding enc;
8059
8060
1.10M
      if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8061
1
          return;
8062
8063
1.10M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8064
1.10M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8065
1.10M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8066
1.10M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8067
1.10M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8068
1.10M
    (ctxt->replaceEntities == 0) &&
8069
1.10M
    (ctxt->validate == 0))
8070
63
    return;
8071
8072
1.10M
      input = xmlNewEntityInputStream(ctxt, entity);
8073
1.10M
      if (xmlPushInput(ctxt, input) < 0) {
8074
1.14k
                xmlFreeInputStream(input);
8075
1.14k
    return;
8076
1.14k
            }
8077
8078
1.10M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8079
                /*
8080
                 * Get the 4 first bytes and decode the charset
8081
                 * if enc != XML_CHAR_ENCODING_NONE
8082
                 * plug some encoding conversion routines.
8083
                 * Note that, since we may have some non-UTF8
8084
                 * encoding (like UTF16, bug 135229), the 'length'
8085
                 * is not known, but we can calculate based upon
8086
                 * the amount of data in the buffer.
8087
                 */
8088
23.6k
                GROW
8089
23.6k
                if (ctxt->instate == XML_PARSER_EOF)
8090
0
                    return;
8091
23.6k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8092
23.6k
                    start[0] = RAW;
8093
23.6k
                    start[1] = NXT(1);
8094
23.6k
                    start[2] = NXT(2);
8095
23.6k
                    start[3] = NXT(3);
8096
23.6k
                    enc = xmlDetectCharEncoding(start, 4);
8097
23.6k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8098
1.15k
                        xmlSwitchEncoding(ctxt, enc);
8099
1.15k
                    }
8100
23.6k
                }
8101
8102
23.6k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8103
23.6k
                    (IS_BLANK_CH(NXT(5)))) {
8104
1.13k
                    xmlParseTextDecl(ctxt);
8105
1.13k
                }
8106
23.6k
            }
8107
1.10M
  }
8108
1.10M
    }
8109
1.18M
    ctxt->hasPErefs = 1;
8110
1.18M
}
8111
8112
/**
8113
 * xmlLoadEntityContent:
8114
 * @ctxt:  an XML parser context
8115
 * @entity: an unloaded system entity
8116
 *
8117
 * Load the original content of the given system entity from the
8118
 * ExternalID/SystemID given. This is to be used for Included in Literal
8119
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8120
 *
8121
 * Returns 0 in case of success and -1 in case of failure
8122
 */
8123
static int
8124
6.91k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8125
6.91k
    xmlParserInputPtr input;
8126
6.91k
    xmlBufferPtr buf;
8127
6.91k
    int l, c;
8128
6.91k
    int count = 0;
8129
8130
6.91k
    if ((ctxt == NULL) || (entity == NULL) ||
8131
6.91k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8132
6.91k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8133
6.91k
  (entity->content != NULL)) {
8134
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8135
0
              "xmlLoadEntityContent parameter error");
8136
0
        return(-1);
8137
0
    }
8138
8139
6.91k
    if (xmlParserDebugEntities)
8140
0
  xmlGenericError(xmlGenericErrorContext,
8141
0
    "Reading %s entity content input\n", entity->name);
8142
8143
6.91k
    buf = xmlBufferCreate();
8144
6.91k
    if (buf == NULL) {
8145
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8146
0
              "xmlLoadEntityContent parameter error");
8147
0
        return(-1);
8148
0
    }
8149
6.91k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8150
8151
6.91k
    input = xmlNewEntityInputStream(ctxt, entity);
8152
6.91k
    if (input == NULL) {
8153
449
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8154
449
              "xmlLoadEntityContent input error");
8155
449
  xmlBufferFree(buf);
8156
449
        return(-1);
8157
449
    }
8158
8159
    /*
8160
     * Push the entity as the current input, read char by char
8161
     * saving to the buffer until the end of the entity or an error
8162
     */
8163
6.46k
    if (xmlPushInput(ctxt, input) < 0) {
8164
0
        xmlBufferFree(buf);
8165
0
  xmlFreeInputStream(input);
8166
0
  return(-1);
8167
0
    }
8168
8169
6.46k
    GROW;
8170
6.46k
    c = CUR_CHAR(l);
8171
14.3M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8172
14.3M
           (IS_CHAR(c))) {
8173
14.3M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8174
14.3M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8175
137k
      count = 0;
8176
137k
      GROW;
8177
137k
            if (ctxt->instate == XML_PARSER_EOF) {
8178
0
                xmlBufferFree(buf);
8179
0
                return(-1);
8180
0
            }
8181
137k
  }
8182
14.3M
  NEXTL(l);
8183
14.3M
  c = CUR_CHAR(l);
8184
14.3M
  if (c == 0) {
8185
5.14k
      count = 0;
8186
5.14k
      GROW;
8187
5.14k
            if (ctxt->instate == XML_PARSER_EOF) {
8188
0
                xmlBufferFree(buf);
8189
0
                return(-1);
8190
0
            }
8191
5.14k
      c = CUR_CHAR(l);
8192
5.14k
  }
8193
14.3M
    }
8194
8195
6.46k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8196
4.07k
        xmlPopInput(ctxt);
8197
4.07k
    } else if (!IS_CHAR(c)) {
8198
2.38k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8199
2.38k
                          "xmlLoadEntityContent: invalid char value %d\n",
8200
2.38k
                    c);
8201
2.38k
  xmlBufferFree(buf);
8202
2.38k
  return(-1);
8203
2.38k
    }
8204
4.07k
    entity->content = buf->content;
8205
4.07k
    buf->content = NULL;
8206
4.07k
    xmlBufferFree(buf);
8207
8208
4.07k
    return(0);
8209
6.46k
}
8210
8211
/**
8212
 * xmlParseStringPEReference:
8213
 * @ctxt:  an XML parser context
8214
 * @str:  a pointer to an index in the string
8215
 *
8216
 * parse PEReference declarations
8217
 *
8218
 * [69] PEReference ::= '%' Name ';'
8219
 *
8220
 * [ WFC: No Recursion ]
8221
 * A parsed entity must not contain a recursive
8222
 * reference to itself, either directly or indirectly.
8223
 *
8224
 * [ WFC: Entity Declared ]
8225
 * In a document without any DTD, a document with only an internal DTD
8226
 * subset which contains no parameter entity references, or a document
8227
 * with "standalone='yes'", ...  ... The declaration of a parameter
8228
 * entity must precede any reference to it...
8229
 *
8230
 * [ VC: Entity Declared ]
8231
 * In a document with an external subset or external parameter entities
8232
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8233
 * must precede any reference to it...
8234
 *
8235
 * [ WFC: In DTD ]
8236
 * Parameter-entity references may only appear in the DTD.
8237
 * NOTE: misleading but this is handled.
8238
 *
8239
 * Returns the string of the entity content.
8240
 *         str is updated to the current value of the index
8241
 */
8242
static xmlEntityPtr
8243
2.22M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8244
2.22M
    const xmlChar *ptr;
8245
2.22M
    xmlChar cur;
8246
2.22M
    xmlChar *name;
8247
2.22M
    xmlEntityPtr entity = NULL;
8248
8249
2.22M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8250
2.22M
    ptr = *str;
8251
2.22M
    cur = *ptr;
8252
2.22M
    if (cur != '%')
8253
0
        return(NULL);
8254
2.22M
    ptr++;
8255
2.22M
    name = xmlParseStringName(ctxt, &ptr);
8256
2.22M
    if (name == NULL) {
8257
1.06M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8258
1.06M
           "xmlParseStringPEReference: no name\n");
8259
1.06M
  *str = ptr;
8260
1.06M
  return(NULL);
8261
1.06M
    }
8262
1.16M
    cur = *ptr;
8263
1.16M
    if (cur != ';') {
8264
89.0k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8265
89.0k
  xmlFree(name);
8266
89.0k
  *str = ptr;
8267
89.0k
  return(NULL);
8268
89.0k
    }
8269
1.07M
    ptr++;
8270
8271
    /*
8272
     * Increase the number of entity references parsed
8273
     */
8274
1.07M
    ctxt->nbentities++;
8275
8276
    /*
8277
     * Request the entity from SAX
8278
     */
8279
1.07M
    if ((ctxt->sax != NULL) &&
8280
1.07M
  (ctxt->sax->getParameterEntity != NULL))
8281
1.07M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8282
1.07M
    if (ctxt->instate == XML_PARSER_EOF) {
8283
0
  xmlFree(name);
8284
0
  *str = ptr;
8285
0
  return(NULL);
8286
0
    }
8287
1.07M
    if (entity == NULL) {
8288
  /*
8289
   * [ WFC: Entity Declared ]
8290
   * In a document without any DTD, a document with only an
8291
   * internal DTD subset which contains no parameter entity
8292
   * references, or a document with "standalone='yes'", ...
8293
   * ... The declaration of a parameter entity must precede
8294
   * any reference to it...
8295
   */
8296
135k
  if ((ctxt->standalone == 1) ||
8297
135k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8298
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8299
0
     "PEReference: %%%s; not found\n", name);
8300
135k
  } else {
8301
      /*
8302
       * [ VC: Entity Declared ]
8303
       * In a document with an external subset or external
8304
       * parameter entities with "standalone='no'", ...
8305
       * ... The declaration of a parameter entity must
8306
       * precede any reference to it...
8307
       */
8308
135k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8309
135k
        "PEReference: %%%s; not found\n",
8310
135k
        name, NULL);
8311
135k
      ctxt->valid = 0;
8312
135k
  }
8313
135k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8314
941k
    } else {
8315
  /*
8316
   * Internal checking in case the entity quest barfed
8317
   */
8318
941k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8319
941k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8320
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8321
0
        "%%%s; is not a parameter entity\n",
8322
0
        name, NULL);
8323
0
  }
8324
941k
    }
8325
1.07M
    ctxt->hasPErefs = 1;
8326
1.07M
    xmlFree(name);
8327
1.07M
    *str = ptr;
8328
1.07M
    return(entity);
8329
1.07M
}
8330
8331
/**
8332
 * xmlParseDocTypeDecl:
8333
 * @ctxt:  an XML parser context
8334
 *
8335
 * DEPRECATED: Internal function, don't use.
8336
 *
8337
 * parse a DOCTYPE declaration
8338
 *
8339
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8340
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8341
 *
8342
 * [ VC: Root Element Type ]
8343
 * The Name in the document type declaration must match the element
8344
 * type of the root element.
8345
 */
8346
8347
void
8348
423k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8349
423k
    const xmlChar *name = NULL;
8350
423k
    xmlChar *ExternalID = NULL;
8351
423k
    xmlChar *URI = NULL;
8352
8353
    /*
8354
     * We know that '<!DOCTYPE' has been detected.
8355
     */
8356
423k
    SKIP(9);
8357
8358
423k
    SKIP_BLANKS;
8359
8360
    /*
8361
     * Parse the DOCTYPE name.
8362
     */
8363
423k
    name = xmlParseName(ctxt);
8364
423k
    if (name == NULL) {
8365
2.41k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8366
2.41k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8367
2.41k
    }
8368
423k
    ctxt->intSubName = name;
8369
8370
423k
    SKIP_BLANKS;
8371
8372
    /*
8373
     * Check for SystemID and ExternalID
8374
     */
8375
423k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8376
8377
423k
    if ((URI != NULL) || (ExternalID != NULL)) {
8378
162k
        ctxt->hasExternalSubset = 1;
8379
162k
    }
8380
423k
    ctxt->extSubURI = URI;
8381
423k
    ctxt->extSubSystem = ExternalID;
8382
8383
423k
    SKIP_BLANKS;
8384
8385
    /*
8386
     * Create and update the internal subset.
8387
     */
8388
423k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8389
423k
  (!ctxt->disableSAX))
8390
408k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8391
423k
    if (ctxt->instate == XML_PARSER_EOF)
8392
0
  return;
8393
8394
    /*
8395
     * Is there any internal subset declarations ?
8396
     * they are handled separately in xmlParseInternalSubset()
8397
     */
8398
423k
    if (RAW == '[')
8399
305k
  return;
8400
8401
    /*
8402
     * We should be at the end of the DOCTYPE declaration.
8403
     */
8404
117k
    if (RAW != '>') {
8405
16.0k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8406
16.0k
    }
8407
117k
    NEXT;
8408
117k
}
8409
8410
/**
8411
 * xmlParseInternalSubset:
8412
 * @ctxt:  an XML parser context
8413
 *
8414
 * parse the internal subset declaration
8415
 *
8416
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8417
 */
8418
8419
static void
8420
262k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8421
    /*
8422
     * Is there any DTD definition ?
8423
     */
8424
262k
    if (RAW == '[') {
8425
262k
        int baseInputNr = ctxt->inputNr;
8426
262k
        ctxt->instate = XML_PARSER_DTD;
8427
262k
        NEXT;
8428
  /*
8429
   * Parse the succession of Markup declarations and
8430
   * PEReferences.
8431
   * Subsequence (markupdecl | PEReference | S)*
8432
   */
8433
3.18M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8434
3.18M
               (ctxt->instate != XML_PARSER_EOF)) {
8435
2.99M
      int id = ctxt->input->id;
8436
2.99M
      unsigned long cons = CUR_CONSUMED;
8437
8438
2.99M
      SKIP_BLANKS;
8439
2.99M
      xmlParseMarkupDecl(ctxt);
8440
2.99M
      xmlParsePEReference(ctxt);
8441
8442
            /*
8443
             * Conditional sections are allowed from external entities included
8444
             * by PE References in the internal subset.
8445
             */
8446
2.99M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8447
2.99M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8448
0
                xmlParseConditionalSections(ctxt);
8449
0
            }
8450
8451
2.99M
      if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8452
72.1k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8453
72.1k
       "xmlParseInternalSubset: error detected in Markup declaration\n");
8454
72.1k
                if (ctxt->inputNr > baseInputNr)
8455
3.41k
                    xmlPopInput(ctxt);
8456
68.7k
                else
8457
68.7k
        break;
8458
72.1k
      }
8459
2.99M
  }
8460
262k
  if (RAW == ']') {
8461
180k
      NEXT;
8462
180k
      SKIP_BLANKS;
8463
180k
  }
8464
262k
    }
8465
8466
    /*
8467
     * We should be at the end of the DOCTYPE declaration.
8468
     */
8469
262k
    if (RAW != '>') {
8470
81.8k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8471
81.8k
  return;
8472
81.8k
    }
8473
180k
    NEXT;
8474
180k
}
8475
8476
#ifdef LIBXML_SAX1_ENABLED
8477
/**
8478
 * xmlParseAttribute:
8479
 * @ctxt:  an XML parser context
8480
 * @value:  a xmlChar ** used to store the value of the attribute
8481
 *
8482
 * DEPRECATED: Internal function, don't use.
8483
 *
8484
 * parse an attribute
8485
 *
8486
 * [41] Attribute ::= Name Eq AttValue
8487
 *
8488
 * [ WFC: No External Entity References ]
8489
 * Attribute values cannot contain direct or indirect entity references
8490
 * to external entities.
8491
 *
8492
 * [ WFC: No < in Attribute Values ]
8493
 * The replacement text of any entity referred to directly or indirectly in
8494
 * an attribute value (other than "&lt;") must not contain a <.
8495
 *
8496
 * [ VC: Attribute Value Type ]
8497
 * The attribute must have been declared; the value must be of the type
8498
 * declared for it.
8499
 *
8500
 * [25] Eq ::= S? '=' S?
8501
 *
8502
 * With namespace:
8503
 *
8504
 * [NS 11] Attribute ::= QName Eq AttValue
8505
 *
8506
 * Also the case QName == xmlns:??? is handled independently as a namespace
8507
 * definition.
8508
 *
8509
 * Returns the attribute name, and the value in *value.
8510
 */
8511
8512
const xmlChar *
8513
107M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8514
107M
    const xmlChar *name;
8515
107M
    xmlChar *val;
8516
8517
107M
    *value = NULL;
8518
107M
    GROW;
8519
107M
    name = xmlParseName(ctxt);
8520
107M
    if (name == NULL) {
8521
11.6M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8522
11.6M
                 "error parsing attribute name\n");
8523
11.6M
        return(NULL);
8524
11.6M
    }
8525
8526
    /*
8527
     * read the value
8528
     */
8529
95.7M
    SKIP_BLANKS;
8530
95.7M
    if (RAW == '=') {
8531
89.4M
        NEXT;
8532
89.4M
  SKIP_BLANKS;
8533
89.4M
  val = xmlParseAttValue(ctxt);
8534
89.4M
  ctxt->instate = XML_PARSER_CONTENT;
8535
89.4M
    } else {
8536
6.33M
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8537
6.33M
         "Specification mandates value for attribute %s\n", name);
8538
6.33M
  return(NULL);
8539
6.33M
    }
8540
8541
    /*
8542
     * Check that xml:lang conforms to the specification
8543
     * No more registered as an error, just generate a warning now
8544
     * since this was deprecated in XML second edition
8545
     */
8546
89.4M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8547
25.6k
  if (!xmlCheckLanguageID(val)) {
8548
6.93k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8549
6.93k
              "Malformed value for xml:lang : %s\n",
8550
6.93k
        val, NULL);
8551
6.93k
  }
8552
25.6k
    }
8553
8554
    /*
8555
     * Check that xml:space conforms to the specification
8556
     */
8557
89.4M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8558
905
  if (xmlStrEqual(val, BAD_CAST "default"))
8559
0
      *(ctxt->space) = 0;
8560
905
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8561
430
      *(ctxt->space) = 1;
8562
475
  else {
8563
475
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8564
475
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8565
475
                                 val, NULL);
8566
475
  }
8567
905
    }
8568
8569
89.4M
    *value = val;
8570
89.4M
    return(name);
8571
95.7M
}
8572
8573
/**
8574
 * xmlParseStartTag:
8575
 * @ctxt:  an XML parser context
8576
 *
8577
 * DEPRECATED: Internal function, don't use.
8578
 *
8579
 * parse a start of tag either for rule element or
8580
 * EmptyElement. In both case we don't parse the tag closing chars.
8581
 *
8582
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8583
 *
8584
 * [ WFC: Unique Att Spec ]
8585
 * No attribute name may appear more than once in the same start-tag or
8586
 * empty-element tag.
8587
 *
8588
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8589
 *
8590
 * [ WFC: Unique Att Spec ]
8591
 * No attribute name may appear more than once in the same start-tag or
8592
 * empty-element tag.
8593
 *
8594
 * With namespace:
8595
 *
8596
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8597
 *
8598
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8599
 *
8600
 * Returns the element name parsed
8601
 */
8602
8603
const xmlChar *
8604
148M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8605
148M
    const xmlChar *name;
8606
148M
    const xmlChar *attname;
8607
148M
    xmlChar *attvalue;
8608
148M
    const xmlChar **atts = ctxt->atts;
8609
148M
    int nbatts = 0;
8610
148M
    int maxatts = ctxt->maxatts;
8611
148M
    int i;
8612
8613
148M
    if (RAW != '<') return(NULL);
8614
148M
    NEXT1;
8615
8616
148M
    name = xmlParseName(ctxt);
8617
148M
    if (name == NULL) {
8618
31.9M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8619
31.9M
       "xmlParseStartTag: invalid element name\n");
8620
31.9M
        return(NULL);
8621
31.9M
    }
8622
8623
    /*
8624
     * Now parse the attributes, it ends up with the ending
8625
     *
8626
     * (S Attribute)* S?
8627
     */
8628
116M
    SKIP_BLANKS;
8629
116M
    GROW;
8630
8631
154M
    while (((RAW != '>') &&
8632
154M
     ((RAW != '/') || (NXT(1) != '>')) &&
8633
154M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8634
107M
        int id = ctxt->input->id;
8635
107M
  unsigned long cons = CUR_CONSUMED;
8636
8637
107M
  attname = xmlParseAttribute(ctxt, &attvalue);
8638
107M
        if ((attname != NULL) && (attvalue != NULL)) {
8639
      /*
8640
       * [ WFC: Unique Att Spec ]
8641
       * No attribute name may appear more than once in the same
8642
       * start-tag or empty-element tag.
8643
       */
8644
113M
      for (i = 0; i < nbatts;i += 2) {
8645
25.5M
          if (xmlStrEqual(atts[i], attname)) {
8646
67.8k
        xmlErrAttributeDup(ctxt, NULL, attname);
8647
67.8k
        xmlFree(attvalue);
8648
67.8k
        goto failed;
8649
67.8k
    }
8650
25.5M
      }
8651
      /*
8652
       * Add the pair to atts
8653
       */
8654
88.2M
      if (atts == NULL) {
8655
4.86M
          maxatts = 22; /* allow for 10 attrs by default */
8656
4.86M
          atts = (const xmlChar **)
8657
4.86M
           xmlMalloc(maxatts * sizeof(xmlChar *));
8658
4.86M
    if (atts == NULL) {
8659
0
        xmlErrMemory(ctxt, NULL);
8660
0
        if (attvalue != NULL)
8661
0
      xmlFree(attvalue);
8662
0
        goto failed;
8663
0
    }
8664
4.86M
    ctxt->atts = atts;
8665
4.86M
    ctxt->maxatts = maxatts;
8666
83.3M
      } else if (nbatts + 4 > maxatts) {
8667
2.34k
          const xmlChar **n;
8668
8669
2.34k
          maxatts *= 2;
8670
2.34k
          n = (const xmlChar **) xmlRealloc((void *) atts,
8671
2.34k
               maxatts * sizeof(const xmlChar *));
8672
2.34k
    if (n == NULL) {
8673
0
        xmlErrMemory(ctxt, NULL);
8674
0
        if (attvalue != NULL)
8675
0
      xmlFree(attvalue);
8676
0
        goto failed;
8677
0
    }
8678
2.34k
    atts = n;
8679
2.34k
    ctxt->atts = atts;
8680
2.34k
    ctxt->maxatts = maxatts;
8681
2.34k
      }
8682
88.2M
      atts[nbatts++] = attname;
8683
88.2M
      atts[nbatts++] = attvalue;
8684
88.2M
      atts[nbatts] = NULL;
8685
88.2M
      atts[nbatts + 1] = NULL;
8686
88.2M
  } else {
8687
19.1M
      if (attvalue != NULL)
8688
0
    xmlFree(attvalue);
8689
19.1M
  }
8690
8691
107M
failed:
8692
8693
107M
  GROW
8694
107M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8695
58.3M
      break;
8696
49.0M
  if (SKIP_BLANKS == 0) {
8697
23.9M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8698
23.9M
         "attributes construct error\n");
8699
23.9M
  }
8700
49.0M
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8701
49.0M
            (attname == NULL) && (attvalue == NULL)) {
8702
11.6M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8703
11.6M
         "xmlParseStartTag: problem parsing attributes\n");
8704
11.6M
      break;
8705
11.6M
  }
8706
37.4M
  SHRINK;
8707
37.4M
        GROW;
8708
37.4M
    }
8709
8710
    /*
8711
     * SAX: Start of Element !
8712
     */
8713
116M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8714
116M
  (!ctxt->disableSAX)) {
8715
16.7M
  if (nbatts > 0)
8716
10.7M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8717
6.03M
  else
8718
6.03M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8719
16.7M
    }
8720
8721
116M
    if (atts != NULL) {
8722
        /* Free only the content strings */
8723
203M
        for (i = 1;i < nbatts;i+=2)
8724
88.2M
      if (atts[i] != NULL)
8725
88.2M
         xmlFree((xmlChar *) atts[i]);
8726
115M
    }
8727
116M
    return(name);
8728
116M
}
8729
8730
/**
8731
 * xmlParseEndTag1:
8732
 * @ctxt:  an XML parser context
8733
 * @line:  line of the start tag
8734
 * @nsNr:  number of namespaces on the start tag
8735
 *
8736
 * parse an end of tag
8737
 *
8738
 * [42] ETag ::= '</' Name S? '>'
8739
 *
8740
 * With namespace
8741
 *
8742
 * [NS 9] ETag ::= '</' QName S? '>'
8743
 */
8744
8745
static void
8746
41.0M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8747
41.0M
    const xmlChar *name;
8748
8749
41.0M
    GROW;
8750
41.0M
    if ((RAW != '<') || (NXT(1) != '/')) {
8751
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8752
0
           "xmlParseEndTag: '</' not found\n");
8753
0
  return;
8754
0
    }
8755
41.0M
    SKIP(2);
8756
8757
41.0M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8758
8759
    /*
8760
     * We should definitely be at the ending "S? '>'" part
8761
     */
8762
41.0M
    GROW;
8763
41.0M
    SKIP_BLANKS;
8764
41.0M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8765
1.93M
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8766
1.93M
    } else
8767
39.1M
  NEXT1;
8768
8769
    /*
8770
     * [ WFC: Element Type Match ]
8771
     * The Name in an element's end-tag must match the element type in the
8772
     * start-tag.
8773
     *
8774
     */
8775
41.0M
    if (name != (xmlChar*)1) {
8776
6.17M
        if (name == NULL) name = BAD_CAST "unparsable";
8777
6.17M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8778
6.17M
         "Opening and ending tag mismatch: %s line %d and %s\n",
8779
6.17M
                    ctxt->name, line, name);
8780
6.17M
    }
8781
8782
    /*
8783
     * SAX: End of Tag
8784
     */
8785
41.0M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8786
41.0M
  (!ctxt->disableSAX))
8787
3.58M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8788
8789
41.0M
    namePop(ctxt);
8790
41.0M
    spacePop(ctxt);
8791
41.0M
    return;
8792
41.0M
}
8793
8794
/**
8795
 * xmlParseEndTag:
8796
 * @ctxt:  an XML parser context
8797
 *
8798
 * DEPRECATED: Internal function, don't use.
8799
 *
8800
 * parse an end of tag
8801
 *
8802
 * [42] ETag ::= '</' Name S? '>'
8803
 *
8804
 * With namespace
8805
 *
8806
 * [NS 9] ETag ::= '</' QName S? '>'
8807
 */
8808
8809
void
8810
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8811
0
    xmlParseEndTag1(ctxt, 0);
8812
0
}
8813
#endif /* LIBXML_SAX1_ENABLED */
8814
8815
/************************************************************************
8816
 *                  *
8817
 *          SAX 2 specific operations       *
8818
 *                  *
8819
 ************************************************************************/
8820
8821
/*
8822
 * xmlGetNamespace:
8823
 * @ctxt:  an XML parser context
8824
 * @prefix:  the prefix to lookup
8825
 *
8826
 * Lookup the namespace name for the @prefix (which ca be NULL)
8827
 * The prefix must come from the @ctxt->dict dictionary
8828
 *
8829
 * Returns the namespace name or NULL if not bound
8830
 */
8831
static const xmlChar *
8832
10.7M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8833
10.7M
    int i;
8834
8835
10.7M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8836
15.1M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8837
6.51M
        if (ctxt->nsTab[i] == prefix) {
8838
2.02M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8839
3.02k
          return(NULL);
8840
2.02M
      return(ctxt->nsTab[i + 1]);
8841
2.02M
  }
8842
8.64M
    return(NULL);
8843
10.6M
}
8844
8845
/**
8846
 * xmlParseQName:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  pointer to store the prefix part
8849
 *
8850
 * parse an XML Namespace QName
8851
 *
8852
 * [6]  QName  ::= (Prefix ':')? LocalPart
8853
 * [7]  Prefix  ::= NCName
8854
 * [8]  LocalPart  ::= NCName
8855
 *
8856
 * Returns the Name parsed or NULL
8857
 */
8858
8859
static const xmlChar *
8860
22.3M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8861
22.3M
    const xmlChar *l, *p;
8862
8863
22.3M
    GROW;
8864
8865
22.3M
    l = xmlParseNCName(ctxt);
8866
22.3M
    if (l == NULL) {
8867
2.30M
        if (CUR == ':') {
8868
5.54k
      l = xmlParseName(ctxt);
8869
5.54k
      if (l != NULL) {
8870
5.54k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8871
5.54k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8872
5.54k
    *prefix = NULL;
8873
5.54k
    return(l);
8874
5.54k
      }
8875
5.54k
  }
8876
2.30M
        return(NULL);
8877
2.30M
    }
8878
20.0M
    if (CUR == ':') {
8879
3.38M
        NEXT;
8880
3.38M
  p = l;
8881
3.38M
  l = xmlParseNCName(ctxt);
8882
3.38M
  if (l == NULL) {
8883
47.7k
      xmlChar *tmp;
8884
8885
47.7k
            if (ctxt->instate == XML_PARSER_EOF)
8886
0
                return(NULL);
8887
47.7k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8888
47.7k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8889
47.7k
      l = xmlParseNmtoken(ctxt);
8890
47.7k
      if (l == NULL) {
8891
37.8k
                if (ctxt->instate == XML_PARSER_EOF)
8892
0
                    return(NULL);
8893
37.8k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8894
37.8k
            } else {
8895
9.84k
    tmp = xmlBuildQName(l, p, NULL, 0);
8896
9.84k
    xmlFree((char *)l);
8897
9.84k
      }
8898
47.7k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8899
47.7k
      if (tmp != NULL) xmlFree(tmp);
8900
47.7k
      *prefix = NULL;
8901
47.7k
      return(p);
8902
47.7k
  }
8903
3.33M
  if (CUR == ':') {
8904
18.3k
      xmlChar *tmp;
8905
8906
18.3k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8907
18.3k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8908
18.3k
      NEXT;
8909
18.3k
      tmp = (xmlChar *) xmlParseName(ctxt);
8910
18.3k
      if (tmp != NULL) {
8911
13.5k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8912
13.5k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8913
13.5k
    if (tmp != NULL) xmlFree(tmp);
8914
13.5k
    *prefix = p;
8915
13.5k
    return(l);
8916
13.5k
      }
8917
4.80k
            if (ctxt->instate == XML_PARSER_EOF)
8918
0
                return(NULL);
8919
4.80k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8920
4.80k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8921
4.80k
      if (tmp != NULL) xmlFree(tmp);
8922
4.80k
      *prefix = p;
8923
4.80k
      return(l);
8924
4.80k
  }
8925
3.32M
  *prefix = p;
8926
3.32M
    } else
8927
16.6M
        *prefix = NULL;
8928
20.0M
    return(l);
8929
20.0M
}
8930
8931
/**
8932
 * xmlParseQNameAndCompare:
8933
 * @ctxt:  an XML parser context
8934
 * @name:  the localname
8935
 * @prefix:  the prefix, if any.
8936
 *
8937
 * parse an XML name and compares for match
8938
 * (specialized for endtag parsing)
8939
 *
8940
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8941
 * and the name for mismatch
8942
 */
8943
8944
static const xmlChar *
8945
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8946
623k
                        xmlChar const *prefix) {
8947
623k
    const xmlChar *cmp;
8948
623k
    const xmlChar *in;
8949
623k
    const xmlChar *ret;
8950
623k
    const xmlChar *prefix2;
8951
8952
623k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8953
8954
623k
    GROW;
8955
623k
    in = ctxt->input->cur;
8956
8957
623k
    cmp = prefix;
8958
2.05M
    while (*in != 0 && *in == *cmp) {
8959
1.43M
  ++in;
8960
1.43M
  ++cmp;
8961
1.43M
    }
8962
623k
    if ((*cmp == 0) && (*in == ':')) {
8963
557k
        in++;
8964
557k
  cmp = name;
8965
4.17M
  while (*in != 0 && *in == *cmp) {
8966
3.61M
      ++in;
8967
3.61M
      ++cmp;
8968
3.61M
  }
8969
557k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8970
      /* success */
8971
453k
            ctxt->input->col += in - ctxt->input->cur;
8972
453k
      ctxt->input->cur = in;
8973
453k
      return((const xmlChar*) 1);
8974
453k
  }
8975
557k
    }
8976
    /*
8977
     * all strings coms from the dictionary, equality can be done directly
8978
     */
8979
170k
    ret = xmlParseQName (ctxt, &prefix2);
8980
170k
    if ((ret == name) && (prefix == prefix2))
8981
2.15k
  return((const xmlChar*) 1);
8982
168k
    return ret;
8983
170k
}
8984
8985
/**
8986
 * xmlParseAttValueInternal:
8987
 * @ctxt:  an XML parser context
8988
 * @len:  attribute len result
8989
 * @alloc:  whether the attribute was reallocated as a new string
8990
 * @normalize:  if 1 then further non-CDATA normalization must be done
8991
 *
8992
 * parse a value for an attribute.
8993
 * NOTE: if no normalization is needed, the routine will return pointers
8994
 *       directly from the data buffer.
8995
 *
8996
 * 3.3.3 Attribute-Value Normalization:
8997
 * Before the value of an attribute is passed to the application or
8998
 * checked for validity, the XML processor must normalize it as follows:
8999
 * - a character reference is processed by appending the referenced
9000
 *   character to the attribute value
9001
 * - an entity reference is processed by recursively processing the
9002
 *   replacement text of the entity
9003
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9004
 *   appending #x20 to the normalized value, except that only a single
9005
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9006
 *   parsed entity or the literal entity value of an internal parsed entity
9007
 * - other characters are processed by appending them to the normalized value
9008
 * If the declared value is not CDATA, then the XML processor must further
9009
 * process the normalized attribute value by discarding any leading and
9010
 * trailing space (#x20) characters, and by replacing sequences of space
9011
 * (#x20) characters by a single space (#x20) character.
9012
 * All attributes for which no declaration has been read should be treated
9013
 * by a non-validating parser as if declared CDATA.
9014
 *
9015
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9016
 *     caller if it was copied, this can be detected by val[*len] == 0.
9017
 */
9018
9019
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9020
27.0k
    const xmlChar *oldbase = ctxt->input->base;\
9021
27.0k
    GROW;\
9022
27.0k
    if (ctxt->instate == XML_PARSER_EOF)\
9023
27.0k
        return(NULL);\
9024
27.0k
    if (oldbase != ctxt->input->base) {\
9025
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9026
0
        start = start + delta;\
9027
0
        in = in + delta;\
9028
0
    }\
9029
27.0k
    end = ctxt->input->end;
9030
9031
static xmlChar *
9032
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9033
                         int normalize)
9034
99.4M
{
9035
99.4M
    xmlChar limit = 0;
9036
99.4M
    const xmlChar *in = NULL, *start, *end, *last;
9037
99.4M
    xmlChar *ret = NULL;
9038
99.4M
    int line, col;
9039
99.4M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9040
62.9M
                    XML_MAX_HUGE_LENGTH :
9041
99.4M
                    XML_MAX_TEXT_LENGTH;
9042
9043
99.4M
    GROW;
9044
99.4M
    in = (xmlChar *) CUR_PTR;
9045
99.4M
    line = ctxt->input->line;
9046
99.4M
    col = ctxt->input->col;
9047
99.4M
    if (*in != '"' && *in != '\'') {
9048
1.16M
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9049
1.16M
        return (NULL);
9050
1.16M
    }
9051
98.2M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9052
9053
    /*
9054
     * try to handle in this routine the most common case where no
9055
     * allocation of a new string is required and where content is
9056
     * pure ASCII.
9057
     */
9058
98.2M
    limit = *in++;
9059
98.2M
    col++;
9060
98.2M
    end = ctxt->input->end;
9061
98.2M
    start = in;
9062
98.2M
    if (in >= end) {
9063
2.86k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9064
2.86k
    }
9065
98.2M
    if (normalize) {
9066
        /*
9067
   * Skip any leading spaces
9068
   */
9069
415k
  while ((in < end) && (*in != limit) &&
9070
415k
         ((*in == 0x20) || (*in == 0x9) ||
9071
412k
          (*in == 0xA) || (*in == 0xD))) {
9072
128k
      if (*in == 0xA) {
9073
49.5k
          line++; col = 1;
9074
78.8k
      } else {
9075
78.8k
          col++;
9076
78.8k
      }
9077
128k
      in++;
9078
128k
      start = in;
9079
128k
      if (in >= end) {
9080
102
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9081
102
                if ((in - start) > maxLength) {
9082
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9083
0
                                   "AttValue length too long\n");
9084
0
                    return(NULL);
9085
0
                }
9086
102
      }
9087
128k
  }
9088
1.67M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9089
1.67M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9090
1.39M
      col++;
9091
1.39M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9092
1.39M
      if (in >= end) {
9093
200
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9094
200
                if ((in - start) > maxLength) {
9095
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9096
0
                                   "AttValue length too long\n");
9097
0
                    return(NULL);
9098
0
                }
9099
200
      }
9100
1.39M
  }
9101
287k
  last = in;
9102
  /*
9103
   * skip the trailing blanks
9104
   */
9105
297k
  while ((last[-1] == 0x20) && (last > start)) last--;
9106
380k
  while ((in < end) && (*in != limit) &&
9107
380k
         ((*in == 0x20) || (*in == 0x9) ||
9108
152k
          (*in == 0xA) || (*in == 0xD))) {
9109
93.5k
      if (*in == 0xA) {
9110
35.4k
          line++, col = 1;
9111
58.0k
      } else {
9112
58.0k
          col++;
9113
58.0k
      }
9114
93.5k
      in++;
9115
93.5k
      if (in >= end) {
9116
186
    const xmlChar *oldbase = ctxt->input->base;
9117
186
    GROW;
9118
186
                if (ctxt->instate == XML_PARSER_EOF)
9119
0
                    return(NULL);
9120
186
    if (oldbase != ctxt->input->base) {
9121
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9122
0
        start = start + delta;
9123
0
        in = in + delta;
9124
0
        last = last + delta;
9125
0
    }
9126
186
    end = ctxt->input->end;
9127
186
                if ((in - start) > maxLength) {
9128
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9129
0
                                   "AttValue length too long\n");
9130
0
                    return(NULL);
9131
0
                }
9132
186
      }
9133
93.5k
  }
9134
287k
        if ((in - start) > maxLength) {
9135
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9136
0
                           "AttValue length too long\n");
9137
0
            return(NULL);
9138
0
        }
9139
287k
  if (*in != limit) goto need_complex;
9140
97.9M
    } else {
9141
935M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9142
935M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9143
837M
      in++;
9144
837M
      col++;
9145
837M
      if (in >= end) {
9146
23.8k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9147
23.8k
                if ((in - start) > maxLength) {
9148
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9149
0
                                   "AttValue length too long\n");
9150
0
                    return(NULL);
9151
0
                }
9152
23.8k
      }
9153
837M
  }
9154
97.9M
  last = in;
9155
97.9M
        if ((in - start) > maxLength) {
9156
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9157
0
                           "AttValue length too long\n");
9158
0
            return(NULL);
9159
0
        }
9160
97.9M
  if (*in != limit) goto need_complex;
9161
97.9M
    }
9162
84.0M
    in++;
9163
84.0M
    col++;
9164
84.0M
    if (len != NULL) {
9165
8.84M
        if (alloc) *alloc = 0;
9166
8.84M
        *len = last - start;
9167
8.84M
        ret = (xmlChar *) start;
9168
75.1M
    } else {
9169
75.1M
        if (alloc) *alloc = 1;
9170
75.1M
        ret = xmlStrndup(start, last - start);
9171
75.1M
    }
9172
84.0M
    CUR_PTR = in;
9173
84.0M
    ctxt->input->line = line;
9174
84.0M
    ctxt->input->col = col;
9175
84.0M
    return ret;
9176
14.2M
need_complex:
9177
14.2M
    if (alloc) *alloc = 1;
9178
14.2M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9179
98.2M
}
9180
9181
/**
9182
 * xmlParseAttribute2:
9183
 * @ctxt:  an XML parser context
9184
 * @pref:  the element prefix
9185
 * @elem:  the element name
9186
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9187
 * @value:  a xmlChar ** used to store the value of the attribute
9188
 * @len:  an int * to save the length of the attribute
9189
 * @alloc:  an int * to indicate if the attribute was allocated
9190
 *
9191
 * parse an attribute in the new SAX2 framework.
9192
 *
9193
 * Returns the attribute name, and the value in *value, .
9194
 */
9195
9196
static const xmlChar *
9197
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9198
                   const xmlChar * pref, const xmlChar * elem,
9199
                   const xmlChar ** prefix, xmlChar ** value,
9200
                   int *len, int *alloc)
9201
10.4M
{
9202
10.4M
    const xmlChar *name;
9203
10.4M
    xmlChar *val, *internal_val = NULL;
9204
10.4M
    int normalize = 0;
9205
9206
10.4M
    *value = NULL;
9207
10.4M
    GROW;
9208
10.4M
    name = xmlParseQName(ctxt, prefix);
9209
10.4M
    if (name == NULL) {
9210
436k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9211
436k
                       "error parsing attribute name\n");
9212
436k
        return (NULL);
9213
436k
    }
9214
9215
    /*
9216
     * get the type if needed
9217
     */
9218
10.0M
    if (ctxt->attsSpecial != NULL) {
9219
1.79M
        int type;
9220
9221
1.79M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9222
1.79M
                                                 pref, elem, *prefix, name);
9223
1.79M
        if (type != 0)
9224
289k
            normalize = 1;
9225
1.79M
    }
9226
9227
    /*
9228
     * read the value
9229
     */
9230
10.0M
    SKIP_BLANKS;
9231
10.0M
    if (RAW == '=') {
9232
9.74M
        NEXT;
9233
9.74M
        SKIP_BLANKS;
9234
9.74M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9235
9.74M
  if (normalize) {
9236
      /*
9237
       * Sometimes a second normalisation pass for spaces is needed
9238
       * but that only happens if charrefs or entities references
9239
       * have been used in the attribute value, i.e. the attribute
9240
       * value have been extracted in an allocated string already.
9241
       */
9242
288k
      if (*alloc) {
9243
59.5k
          const xmlChar *val2;
9244
9245
59.5k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9246
59.5k
    if ((val2 != NULL) && (val2 != val)) {
9247
10.8k
        xmlFree(val);
9248
10.8k
        val = (xmlChar *) val2;
9249
10.8k
    }
9250
59.5k
      }
9251
288k
  }
9252
9.74M
        ctxt->instate = XML_PARSER_CONTENT;
9253
9.74M
    } else {
9254
263k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9255
263k
                          "Specification mandates value for attribute %s\n",
9256
263k
                          name);
9257
263k
        return (NULL);
9258
263k
    }
9259
9260
9.74M
    if (*prefix == ctxt->str_xml) {
9261
        /*
9262
         * Check that xml:lang conforms to the specification
9263
         * No more registered as an error, just generate a warning now
9264
         * since this was deprecated in XML second edition
9265
         */
9266
67.4k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9267
11.8k
            internal_val = xmlStrndup(val, *len);
9268
11.8k
            if (!xmlCheckLanguageID(internal_val)) {
9269
6.20k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9270
6.20k
                              "Malformed value for xml:lang : %s\n",
9271
6.20k
                              internal_val, NULL);
9272
6.20k
            }
9273
11.8k
        }
9274
9275
        /*
9276
         * Check that xml:space conforms to the specification
9277
         */
9278
67.4k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9279
905
            internal_val = xmlStrndup(val, *len);
9280
905
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9281
0
                *(ctxt->space) = 0;
9282
905
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9283
326
                *(ctxt->space) = 1;
9284
579
            else {
9285
579
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9286
579
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9287
579
                              internal_val, NULL);
9288
579
            }
9289
905
        }
9290
67.4k
        if (internal_val) {
9291
12.1k
            xmlFree(internal_val);
9292
12.1k
        }
9293
67.4k
    }
9294
9295
9.74M
    *value = val;
9296
9.74M
    return (name);
9297
10.0M
}
9298
/**
9299
 * xmlParseStartTag2:
9300
 * @ctxt:  an XML parser context
9301
 *
9302
 * parse a start of tag either for rule element or
9303
 * EmptyElement. In both case we don't parse the tag closing chars.
9304
 * This routine is called when running SAX2 parsing
9305
 *
9306
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9307
 *
9308
 * [ WFC: Unique Att Spec ]
9309
 * No attribute name may appear more than once in the same start-tag or
9310
 * empty-element tag.
9311
 *
9312
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9313
 *
9314
 * [ WFC: Unique Att Spec ]
9315
 * No attribute name may appear more than once in the same start-tag or
9316
 * empty-element tag.
9317
 *
9318
 * With namespace:
9319
 *
9320
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9321
 *
9322
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9323
 *
9324
 * Returns the element name parsed
9325
 */
9326
9327
static const xmlChar *
9328
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9329
11.7M
                  const xmlChar **URI, int *tlen) {
9330
11.7M
    const xmlChar *localname;
9331
11.7M
    const xmlChar *prefix;
9332
11.7M
    const xmlChar *attname;
9333
11.7M
    const xmlChar *aprefix;
9334
11.7M
    const xmlChar *nsname;
9335
11.7M
    xmlChar *attvalue;
9336
11.7M
    const xmlChar **atts = ctxt->atts;
9337
11.7M
    int maxatts = ctxt->maxatts;
9338
11.7M
    int nratts, nbatts, nbdef, inputid;
9339
11.7M
    int i, j, nbNs, attval;
9340
11.7M
    unsigned long cur;
9341
11.7M
    int nsNr = ctxt->nsNr;
9342
9343
11.7M
    if (RAW != '<') return(NULL);
9344
11.7M
    NEXT1;
9345
9346
    /*
9347
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9348
     *       point since the attribute values may be stored as pointers to
9349
     *       the buffer and calling SHRINK would destroy them !
9350
     *       The Shrinking is only possible once the full set of attribute
9351
     *       callbacks have been done.
9352
     */
9353
11.7M
    SHRINK;
9354
11.7M
    cur = ctxt->input->cur - ctxt->input->base;
9355
11.7M
    inputid = ctxt->input->id;
9356
11.7M
    nbatts = 0;
9357
11.7M
    nratts = 0;
9358
11.7M
    nbdef = 0;
9359
11.7M
    nbNs = 0;
9360
11.7M
    attval = 0;
9361
    /* Forget any namespaces added during an earlier parse of this element. */
9362
11.7M
    ctxt->nsNr = nsNr;
9363
9364
11.7M
    localname = xmlParseQName(ctxt, &prefix);
9365
11.7M
    if (localname == NULL) {
9366
1.85M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9367
1.85M
           "StartTag: invalid element name\n");
9368
1.85M
        return(NULL);
9369
1.85M
    }
9370
9.90M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9371
9372
    /*
9373
     * Now parse the attributes, it ends up with the ending
9374
     *
9375
     * (S Attribute)* S?
9376
     */
9377
9.90M
    SKIP_BLANKS;
9378
9.90M
    GROW;
9379
9380
13.5M
    while (((RAW != '>') &&
9381
13.5M
     ((RAW != '/') || (NXT(1) != '>')) &&
9382
13.5M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9383
10.4M
  int id = ctxt->input->id;
9384
10.4M
  unsigned long cons = CUR_CONSUMED;
9385
10.4M
  int len = -1, alloc = 0;
9386
9387
10.4M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9388
10.4M
                               &aprefix, &attvalue, &len, &alloc);
9389
10.4M
        if ((attname == NULL) || (attvalue == NULL))
9390
739k
            goto next_attr;
9391
9.70M
  if (len < 0) len = xmlStrlen(attvalue);
9392
9393
9.70M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9394
62.9k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9395
62.9k
            xmlURIPtr uri;
9396
9397
62.9k
            if (URL == NULL) {
9398
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9399
0
                if ((attvalue != NULL) && (alloc != 0))
9400
0
                    xmlFree(attvalue);
9401
0
                localname = NULL;
9402
0
                goto done;
9403
0
            }
9404
62.9k
            if (*URL != 0) {
9405
61.7k
                uri = xmlParseURI((const char *) URL);
9406
61.7k
                if (uri == NULL) {
9407
19.1k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9408
19.1k
                             "xmlns: '%s' is not a valid URI\n",
9409
19.1k
                                       URL, NULL, NULL);
9410
42.5k
                } else {
9411
42.5k
                    if (uri->scheme == NULL) {
9412
19.7k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9413
19.7k
                                  "xmlns: URI %s is not absolute\n",
9414
19.7k
                                  URL, NULL, NULL);
9415
19.7k
                    }
9416
42.5k
                    xmlFreeURI(uri);
9417
42.5k
                }
9418
61.7k
                if (URL == ctxt->str_xml_ns) {
9419
0
                    if (attname != ctxt->str_xml) {
9420
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9421
0
                     "xml namespace URI cannot be the default namespace\n",
9422
0
                                 NULL, NULL, NULL);
9423
0
                    }
9424
0
                    goto next_attr;
9425
0
                }
9426
61.7k
                if ((len == 29) &&
9427
61.7k
                    (xmlStrEqual(URL,
9428
401
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9429
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9430
0
                         "reuse of the xmlns namespace name is forbidden\n",
9431
0
                             NULL, NULL, NULL);
9432
0
                    goto next_attr;
9433
0
                }
9434
61.7k
            }
9435
            /*
9436
             * check that it's not a defined namespace
9437
             */
9438
76.0k
            for (j = 1;j <= nbNs;j++)
9439
15.3k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9440
2.14k
                    break;
9441
62.9k
            if (j <= nbNs)
9442
2.14k
                xmlErrAttributeDup(ctxt, NULL, attname);
9443
60.7k
            else
9444
60.7k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9445
9446
9.64M
        } else if (aprefix == ctxt->str_xmlns) {
9447
470k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9448
470k
            xmlURIPtr uri;
9449
9450
470k
            if (attname == ctxt->str_xml) {
9451
817
                if (URL != ctxt->str_xml_ns) {
9452
817
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9453
817
                             "xml namespace prefix mapped to wrong URI\n",
9454
817
                             NULL, NULL, NULL);
9455
817
                }
9456
                /*
9457
                 * Do not keep a namespace definition node
9458
                 */
9459
817
                goto next_attr;
9460
817
            }
9461
470k
            if (URL == ctxt->str_xml_ns) {
9462
0
                if (attname != ctxt->str_xml) {
9463
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9464
0
                             "xml namespace URI mapped to wrong prefix\n",
9465
0
                             NULL, NULL, NULL);
9466
0
                }
9467
0
                goto next_attr;
9468
0
            }
9469
470k
            if (attname == ctxt->str_xmlns) {
9470
373
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9471
373
                         "redefinition of the xmlns prefix is forbidden\n",
9472
373
                         NULL, NULL, NULL);
9473
373
                goto next_attr;
9474
373
            }
9475
469k
            if ((len == 29) &&
9476
469k
                (xmlStrEqual(URL,
9477
2.25k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9478
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9479
0
                         "reuse of the xmlns namespace name is forbidden\n",
9480
0
                         NULL, NULL, NULL);
9481
0
                goto next_attr;
9482
0
            }
9483
469k
            if ((URL == NULL) || (URL[0] == 0)) {
9484
1.39k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9485
1.39k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9486
1.39k
                              attname, NULL, NULL);
9487
1.39k
                goto next_attr;
9488
468k
            } else {
9489
468k
                uri = xmlParseURI((const char *) URL);
9490
468k
                if (uri == NULL) {
9491
159k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9492
159k
                         "xmlns:%s: '%s' is not a valid URI\n",
9493
159k
                                       attname, URL, NULL);
9494
309k
                } else {
9495
309k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9496
41.2k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9497
41.2k
                                  "xmlns:%s: URI %s is not absolute\n",
9498
41.2k
                                  attname, URL, NULL);
9499
41.2k
                    }
9500
309k
                    xmlFreeURI(uri);
9501
309k
                }
9502
468k
            }
9503
9504
            /*
9505
             * check that it's not a defined namespace
9506
             */
9507
521k
            for (j = 1;j <= nbNs;j++)
9508
55.6k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9509
2.31k
                    break;
9510
468k
            if (j <= nbNs)
9511
2.31k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9512
465k
            else
9513
465k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9514
9515
9.17M
        } else {
9516
            /*
9517
             * Add the pair to atts
9518
             */
9519
9.17M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9520
267k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9521
0
                    goto next_attr;
9522
0
                }
9523
267k
                maxatts = ctxt->maxatts;
9524
267k
                atts = ctxt->atts;
9525
267k
            }
9526
9.17M
            ctxt->attallocs[nratts++] = alloc;
9527
9.17M
            atts[nbatts++] = attname;
9528
9.17M
            atts[nbatts++] = aprefix;
9529
            /*
9530
             * The namespace URI field is used temporarily to point at the
9531
             * base of the current input buffer for non-alloced attributes.
9532
             * When the input buffer is reallocated, all the pointers become
9533
             * invalid, but they can be reconstructed later.
9534
             */
9535
9.17M
            if (alloc)
9536
685k
                atts[nbatts++] = NULL;
9537
8.48M
            else
9538
8.48M
                atts[nbatts++] = ctxt->input->base;
9539
9.17M
            atts[nbatts++] = attvalue;
9540
9.17M
            attvalue += len;
9541
9.17M
            atts[nbatts++] = attvalue;
9542
            /*
9543
             * tag if some deallocation is needed
9544
             */
9545
9.17M
            if (alloc != 0) attval = 1;
9546
9.17M
            attvalue = NULL; /* moved into atts */
9547
9.17M
        }
9548
9549
10.4M
next_attr:
9550
10.4M
        if ((attvalue != NULL) && (alloc != 0)) {
9551
178k
            xmlFree(attvalue);
9552
178k
            attvalue = NULL;
9553
178k
        }
9554
9555
10.4M
  GROW
9556
10.4M
        if (ctxt->instate == XML_PARSER_EOF)
9557
0
            break;
9558
10.4M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9559
5.57M
      break;
9560
4.87M
  if (SKIP_BLANKS == 0) {
9561
1.19M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9562
1.19M
         "attributes construct error\n");
9563
1.19M
      break;
9564
1.19M
  }
9565
3.67M
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9566
3.67M
            (attname == NULL) && (attvalue == NULL)) {
9567
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9568
0
           "xmlParseStartTag: problem parsing attributes\n");
9569
0
      break;
9570
0
  }
9571
3.67M
        GROW;
9572
3.67M
    }
9573
9574
9.90M
    if (ctxt->input->id != inputid) {
9575
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9576
0
                    "Unexpected change of input\n");
9577
0
        localname = NULL;
9578
0
        goto done;
9579
0
    }
9580
9581
    /* Reconstruct attribute value pointers. */
9582
19.0M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9583
9.17M
        if (atts[i+2] != NULL) {
9584
            /*
9585
             * Arithmetic on dangling pointers is technically undefined
9586
             * behavior, but well...
9587
             */
9588
8.48M
            ptrdiff_t offset = ctxt->input->base - atts[i+2];
9589
8.48M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9590
8.48M
            atts[i+3] += offset;  /* value */
9591
8.48M
            atts[i+4] += offset;  /* valuend */
9592
8.48M
        }
9593
9.17M
    }
9594
9595
    /*
9596
     * The attributes defaulting
9597
     */
9598
9.90M
    if (ctxt->attsDefault != NULL) {
9599
1.04M
        xmlDefAttrsPtr defaults;
9600
9601
1.04M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9602
1.04M
  if (defaults != NULL) {
9603
181k
      for (i = 0;i < defaults->nbAttrs;i++) {
9604
123k
          attname = defaults->values[5 * i];
9605
123k
    aprefix = defaults->values[5 * i + 1];
9606
9607
                /*
9608
     * special work for namespaces defaulted defs
9609
     */
9610
123k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9611
        /*
9612
         * check that it's not a defined namespace
9613
         */
9614
1.74k
        for (j = 1;j <= nbNs;j++)
9615
1.17k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9616
604
          break;
9617
1.17k
              if (j <= nbNs) continue;
9618
9619
570
        nsname = xmlGetNamespace(ctxt, NULL);
9620
570
        if (nsname != defaults->values[5 * i + 2]) {
9621
521
      if (nsPush(ctxt, NULL,
9622
521
                 defaults->values[5 * i + 2]) > 0)
9623
521
          nbNs++;
9624
521
        }
9625
122k
    } else if (aprefix == ctxt->str_xmlns) {
9626
        /*
9627
         * check that it's not a defined namespace
9628
         */
9629
13.8k
        for (j = 1;j <= nbNs;j++)
9630
10.9k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9631
10.2k
          break;
9632
13.1k
              if (j <= nbNs) continue;
9633
9634
2.94k
        nsname = xmlGetNamespace(ctxt, attname);
9635
2.94k
        if (nsname != defaults->values[2]) {
9636
2.35k
      if (nsPush(ctxt, attname,
9637
2.35k
                 defaults->values[5 * i + 2]) > 0)
9638
2.22k
          nbNs++;
9639
2.35k
        }
9640
109k
    } else {
9641
        /*
9642
         * check that it's not a defined attribute
9643
         */
9644
304k
        for (j = 0;j < nbatts;j+=5) {
9645
196k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9646
719
          break;
9647
196k
        }
9648
109k
        if (j < nbatts) continue;
9649
9650
108k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9651
2.35k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9652
0
                            localname = NULL;
9653
0
                            goto done;
9654
0
      }
9655
2.35k
      maxatts = ctxt->maxatts;
9656
2.35k
      atts = ctxt->atts;
9657
2.35k
        }
9658
108k
        atts[nbatts++] = attname;
9659
108k
        atts[nbatts++] = aprefix;
9660
108k
        if (aprefix == NULL)
9661
79.2k
      atts[nbatts++] = NULL;
9662
29.2k
        else
9663
29.2k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9664
108k
        atts[nbatts++] = defaults->values[5 * i + 2];
9665
108k
        atts[nbatts++] = defaults->values[5 * i + 3];
9666
108k
        if ((ctxt->standalone == 1) &&
9667
108k
            (defaults->values[5 * i + 4] != NULL)) {
9668
10
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9669
10
    "standalone: attribute %s on %s defaulted from external subset\n",
9670
10
                                   attname, localname);
9671
10
        }
9672
108k
        nbdef++;
9673
108k
    }
9674
123k
      }
9675
57.7k
  }
9676
1.04M
    }
9677
9678
    /*
9679
     * The attributes checkings
9680
     */
9681
19.1M
    for (i = 0; i < nbatts;i += 5) {
9682
        /*
9683
  * The default namespace does not apply to attribute names.
9684
  */
9685
9.28M
  if (atts[i + 1] != NULL) {
9686
852k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9687
852k
      if (nsname == NULL) {
9688
226k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9689
226k
        "Namespace prefix %s for %s on %s is not defined\n",
9690
226k
        atts[i + 1], atts[i], localname);
9691
226k
      }
9692
852k
      atts[i + 2] = nsname;
9693
852k
  } else
9694
8.42M
      nsname = NULL;
9695
  /*
9696
   * [ WFC: Unique Att Spec ]
9697
   * No attribute name may appear more than once in the same
9698
   * start-tag or empty-element tag.
9699
   * As extended by the Namespace in XML REC.
9700
   */
9701
13.0M
        for (j = 0; j < i;j += 5) {
9702
3.82M
      if (atts[i] == atts[j]) {
9703
19.3k
          if (atts[i+1] == atts[j+1]) {
9704
6.80k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9705
6.80k
        break;
9706
6.80k
    }
9707
12.5k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9708
120
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9709
120
           "Namespaced Attribute %s in '%s' redefined\n",
9710
120
           atts[i], nsname, NULL);
9711
120
        break;
9712
120
    }
9713
12.5k
      }
9714
3.82M
  }
9715
9.28M
    }
9716
9717
9.90M
    nsname = xmlGetNamespace(ctxt, prefix);
9718
9.90M
    if ((prefix != NULL) && (nsname == NULL)) {
9719
649k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9720
649k
           "Namespace prefix %s on %s is not defined\n",
9721
649k
     prefix, localname, NULL);
9722
649k
    }
9723
9.90M
    *pref = prefix;
9724
9.90M
    *URI = nsname;
9725
9726
    /*
9727
     * SAX: Start of Element !
9728
     */
9729
9.90M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9730
9.90M
  (!ctxt->disableSAX)) {
9731
8.96M
  if (nbNs > 0)
9732
435k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9733
435k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9734
435k
        nbatts / 5, nbdef, atts);
9735
8.53M
  else
9736
8.53M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9737
8.53M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9738
8.96M
    }
9739
9740
9.90M
done:
9741
    /*
9742
     * Free up attribute allocated strings if needed
9743
     */
9744
9.90M
    if (attval != 0) {
9745
1.38M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9746
742k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9747
685k
          xmlFree((xmlChar *) atts[i]);
9748
643k
    }
9749
9750
9.90M
    return(localname);
9751
9.90M
}
9752
9753
/**
9754
 * xmlParseEndTag2:
9755
 * @ctxt:  an XML parser context
9756
 * @line:  line of the start tag
9757
 * @nsNr:  number of namespaces on the start tag
9758
 *
9759
 * parse an end of tag
9760
 *
9761
 * [42] ETag ::= '</' Name S? '>'
9762
 *
9763
 * With namespace
9764
 *
9765
 * [NS 9] ETag ::= '</' QName S? '>'
9766
 */
9767
9768
static void
9769
3.17M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9770
3.17M
    const xmlChar *name;
9771
9772
3.17M
    GROW;
9773
3.17M
    if ((RAW != '<') || (NXT(1) != '/')) {
9774
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9775
0
  return;
9776
0
    }
9777
3.17M
    SKIP(2);
9778
9779
3.17M
    if (tag->prefix == NULL)
9780
2.54M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9781
623k
    else
9782
623k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9783
9784
    /*
9785
     * We should definitely be at the ending "S? '>'" part
9786
     */
9787
3.17M
    GROW;
9788
3.17M
    if (ctxt->instate == XML_PARSER_EOF)
9789
0
        return;
9790
3.17M
    SKIP_BLANKS;
9791
3.17M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9792
127k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9793
127k
    } else
9794
3.04M
  NEXT1;
9795
9796
    /*
9797
     * [ WFC: Element Type Match ]
9798
     * The Name in an element's end-tag must match the element type in the
9799
     * start-tag.
9800
     *
9801
     */
9802
3.17M
    if (name != (xmlChar*)1) {
9803
398k
        if (name == NULL) name = BAD_CAST "unparsable";
9804
398k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9805
398k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9806
398k
                    ctxt->name, tag->line, name);
9807
398k
    }
9808
9809
    /*
9810
     * SAX: End of Tag
9811
     */
9812
3.17M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9813
3.17M
  (!ctxt->disableSAX))
9814
2.71M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9815
2.71M
                                tag->URI);
9816
9817
3.17M
    spacePop(ctxt);
9818
3.17M
    if (tag->nsNr != 0)
9819
101k
  nsPop(ctxt, tag->nsNr);
9820
3.17M
}
9821
9822
/**
9823
 * xmlParseCDSect:
9824
 * @ctxt:  an XML parser context
9825
 *
9826
 * DEPRECATED: Internal function, don't use.
9827
 *
9828
 * Parse escaped pure raw content.
9829
 *
9830
 * [18] CDSect ::= CDStart CData CDEnd
9831
 *
9832
 * [19] CDStart ::= '<![CDATA['
9833
 *
9834
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9835
 *
9836
 * [21] CDEnd ::= ']]>'
9837
 */
9838
void
9839
2.15M
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9840
2.15M
    xmlChar *buf = NULL;
9841
2.15M
    int len = 0;
9842
2.15M
    int size = XML_PARSER_BUFFER_SIZE;
9843
2.15M
    int r, rl;
9844
2.15M
    int s, sl;
9845
2.15M
    int cur, l;
9846
2.15M
    int count = 0;
9847
2.15M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9848
2.03M
                    XML_MAX_HUGE_LENGTH :
9849
2.15M
                    XML_MAX_TEXT_LENGTH;
9850
9851
    /* Check 2.6.0 was NXT(0) not RAW */
9852
2.15M
    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9853
2.15M
  SKIP(9);
9854
2.15M
    } else
9855
0
        return;
9856
9857
2.15M
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9858
2.15M
    r = CUR_CHAR(rl);
9859
2.15M
    if (!IS_CHAR(r)) {
9860
27.8k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9861
27.8k
  ctxt->instate = XML_PARSER_CONTENT;
9862
27.8k
        return;
9863
27.8k
    }
9864
2.12M
    NEXTL(rl);
9865
2.12M
    s = CUR_CHAR(sl);
9866
2.12M
    if (!IS_CHAR(s)) {
9867
11.5k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9868
11.5k
  ctxt->instate = XML_PARSER_CONTENT;
9869
11.5k
        return;
9870
11.5k
    }
9871
2.11M
    NEXTL(sl);
9872
2.11M
    cur = CUR_CHAR(l);
9873
2.11M
    buf = (xmlChar *) xmlMallocAtomic(size);
9874
2.11M
    if (buf == NULL) {
9875
0
  xmlErrMemory(ctxt, NULL);
9876
0
  return;
9877
0
    }
9878
515M
    while (IS_CHAR(cur) &&
9879
515M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9880
513M
  if (len + 5 >= size) {
9881
2.17M
      xmlChar *tmp;
9882
9883
2.17M
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9884
2.17M
      if (tmp == NULL) {
9885
0
          xmlFree(buf);
9886
0
    xmlErrMemory(ctxt, NULL);
9887
0
    return;
9888
0
      }
9889
2.17M
      buf = tmp;
9890
2.17M
      size *= 2;
9891
2.17M
  }
9892
513M
  COPY_BUF(rl,buf,len,r);
9893
513M
  r = s;
9894
513M
  rl = sl;
9895
513M
  s = cur;
9896
513M
  sl = l;
9897
513M
  count++;
9898
513M
  if (count > 50) {
9899
9.64M
      SHRINK;
9900
9.64M
      GROW;
9901
9.64M
            if (ctxt->instate == XML_PARSER_EOF) {
9902
0
    xmlFree(buf);
9903
0
    return;
9904
0
            }
9905
9.64M
      count = 0;
9906
9.64M
  }
9907
513M
  NEXTL(l);
9908
513M
  cur = CUR_CHAR(l);
9909
513M
        if (len > maxLength) {
9910
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9911
0
                           "CData section too big found\n");
9912
0
            xmlFree(buf);
9913
0
            return;
9914
0
        }
9915
513M
    }
9916
2.11M
    buf[len] = 0;
9917
2.11M
    ctxt->instate = XML_PARSER_CONTENT;
9918
2.11M
    if (cur != '>') {
9919
360k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9920
360k
                       "CData section not finished\n%.50s\n", buf);
9921
360k
  xmlFree(buf);
9922
360k
        return;
9923
360k
    }
9924
1.75M
    NEXTL(l);
9925
9926
    /*
9927
     * OK the buffer is to be consumed as cdata.
9928
     */
9929
1.75M
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9930
86.8k
  if (ctxt->sax->cdataBlock != NULL)
9931
54.9k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9932
31.8k
  else if (ctxt->sax->characters != NULL)
9933
31.8k
      ctxt->sax->characters(ctxt->userData, buf, len);
9934
86.8k
    }
9935
1.75M
    xmlFree(buf);
9936
1.75M
}
9937
9938
/**
9939
 * xmlParseContentInternal:
9940
 * @ctxt:  an XML parser context
9941
 *
9942
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9943
 * unexpected EOF to the caller.
9944
 */
9945
9946
static void
9947
5.46M
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9948
5.46M
    int nameNr = ctxt->nameNr;
9949
9950
5.46M
    GROW;
9951
554M
    while ((RAW != 0) &&
9952
554M
     (ctxt->instate != XML_PARSER_EOF)) {
9953
549M
        int id = ctxt->input->id;
9954
549M
  unsigned long cons = CUR_CONSUMED;
9955
549M
  const xmlChar *cur = ctxt->input->cur;
9956
9957
  /*
9958
   * First case : a Processing Instruction.
9959
   */
9960
549M
  if ((*cur == '<') && (cur[1] == '?')) {
9961
4.01M
      xmlParsePI(ctxt);
9962
4.01M
  }
9963
9964
  /*
9965
   * Second case : a CDSection
9966
   */
9967
  /* 2.6.0 test was *cur not RAW */
9968
545M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9969
2.15M
      xmlParseCDSect(ctxt);
9970
2.15M
  }
9971
9972
  /*
9973
   * Third case :  a comment
9974
   */
9975
543M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9976
543M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9977
10.3M
      xmlParseComment(ctxt);
9978
10.3M
      ctxt->instate = XML_PARSER_CONTENT;
9979
10.3M
  }
9980
9981
  /*
9982
   * Fourth case :  a sub-element.
9983
   */
9984
533M
  else if (*cur == '<') {
9985
196M
            if (NXT(1) == '/') {
9986
42.8M
                if (ctxt->nameNr <= nameNr)
9987
690k
                    break;
9988
42.1M
          xmlParseElementEnd(ctxt);
9989
153M
            } else {
9990
153M
          xmlParseElementStart(ctxt);
9991
153M
            }
9992
196M
  }
9993
9994
  /*
9995
   * Fifth case : a reference. If if has not been resolved,
9996
   *    parsing returns it's Name, create the node
9997
   */
9998
9999
337M
  else if (*cur == '&') {
10000
46.1M
      xmlParseReference(ctxt);
10001
46.1M
  }
10002
10003
  /*
10004
   * Last case, text. Note that References are handled directly.
10005
   */
10006
291M
  else {
10007
291M
      xmlParseCharData(ctxt, 0);
10008
291M
  }
10009
10010
549M
  GROW;
10011
549M
  SHRINK;
10012
10013
549M
  if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
10014
92.5k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10015
92.5k
                  "detected an error in element content\n");
10016
92.5k
      xmlHaltParser(ctxt);
10017
92.5k
            break;
10018
92.5k
  }
10019
549M
    }
10020
5.46M
}
10021
10022
/**
10023
 * xmlParseContent:
10024
 * @ctxt:  an XML parser context
10025
 *
10026
 * Parse a content sequence. Stops at EOF or '</'.
10027
 *
10028
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10029
 */
10030
10031
void
10032
5.20M
xmlParseContent(xmlParserCtxtPtr ctxt) {
10033
5.20M
    int nameNr = ctxt->nameNr;
10034
10035
5.20M
    xmlParseContentInternal(ctxt);
10036
10037
5.20M
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10038
2.89M
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10039
2.89M
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10040
2.89M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10041
2.89M
                "Premature end of data in tag %s line %d\n",
10042
2.89M
    name, line, NULL);
10043
2.89M
    }
10044
5.20M
}
10045
10046
/**
10047
 * xmlParseElement:
10048
 * @ctxt:  an XML parser context
10049
 *
10050
 * DEPRECATED: Internal function, don't use.
10051
 *
10052
 * parse an XML element
10053
 *
10054
 * [39] element ::= EmptyElemTag | STag content ETag
10055
 *
10056
 * [ WFC: Element Type Match ]
10057
 * The Name in an element's end-tag must match the element type in the
10058
 * start-tag.
10059
 *
10060
 */
10061
10062
void
10063
337k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10064
337k
    if (xmlParseElementStart(ctxt) != 0)
10065
85.1k
        return;
10066
10067
251k
    xmlParseContentInternal(ctxt);
10068
251k
    if (ctxt->instate == XML_PARSER_EOF)
10069
2.33k
  return;
10070
10071
249k
    if (CUR == 0) {
10072
148k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10073
148k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10074
148k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10075
148k
                "Premature end of data in tag %s line %d\n",
10076
148k
    name, line, NULL);
10077
148k
        return;
10078
148k
    }
10079
10080
100k
    xmlParseElementEnd(ctxt);
10081
100k
}
10082
10083
/**
10084
 * xmlParseElementStart:
10085
 * @ctxt:  an XML parser context
10086
 *
10087
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10088
 * opening tag was parsed, 1 if an empty element was parsed.
10089
 */
10090
static int
10091
153M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10092
153M
    const xmlChar *name;
10093
153M
    const xmlChar *prefix = NULL;
10094
153M
    const xmlChar *URI = NULL;
10095
153M
    xmlParserNodeInfo node_info;
10096
153M
    int line, tlen = 0;
10097
153M
    xmlNodePtr ret;
10098
153M
    int nsNr = ctxt->nsNr;
10099
10100
153M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10101
153M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10102
21
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10103
21
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10104
21
        xmlParserMaxDepth);
10105
21
  xmlHaltParser(ctxt);
10106
21
  return(-1);
10107
21
    }
10108
10109
    /* Capture start position */
10110
153M
    if (ctxt->record_info) {
10111
0
        node_info.begin_pos = ctxt->input->consumed +
10112
0
                          (CUR_PTR - ctxt->input->base);
10113
0
  node_info.begin_line = ctxt->input->line;
10114
0
    }
10115
10116
153M
    if (ctxt->spaceNr == 0)
10117
0
  spacePush(ctxt, -1);
10118
153M
    else if (*ctxt->space == -2)
10119
93.9M
  spacePush(ctxt, -1);
10120
59.7M
    else
10121
59.7M
  spacePush(ctxt, *ctxt->space);
10122
10123
153M
    line = ctxt->input->line;
10124
153M
#ifdef LIBXML_SAX1_ENABLED
10125
153M
    if (ctxt->sax2)
10126
8.17M
#endif /* LIBXML_SAX1_ENABLED */
10127
8.17M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10128
145M
#ifdef LIBXML_SAX1_ENABLED
10129
145M
    else
10130
145M
  name = xmlParseStartTag(ctxt);
10131
153M
#endif /* LIBXML_SAX1_ENABLED */
10132
153M
    if (ctxt->instate == XML_PARSER_EOF)
10133
8.14k
  return(-1);
10134
153M
    if (name == NULL) {
10135
33.7M
  spacePop(ctxt);
10136
33.7M
        return(-1);
10137
33.7M
    }
10138
119M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10139
119M
    ret = ctxt->node;
10140
10141
119M
#ifdef LIBXML_VALID_ENABLED
10142
    /*
10143
     * [ VC: Root Element Type ]
10144
     * The Name in the document type declaration must match the element
10145
     * type of the root element.
10146
     */
10147
119M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10148
119M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10149
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10150
119M
#endif /* LIBXML_VALID_ENABLED */
10151
10152
    /*
10153
     * Check for an Empty Element.
10154
     */
10155
119M
    if ((RAW == '/') && (NXT(1) == '>')) {
10156
46.3M
        SKIP(2);
10157
46.3M
  if (ctxt->sax2) {
10158
2.59M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10159
2.59M
    (!ctxt->disableSAX))
10160
2.24M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10161
2.59M
#ifdef LIBXML_SAX1_ENABLED
10162
43.7M
  } else {
10163
43.7M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10164
43.7M
    (!ctxt->disableSAX))
10165
4.57M
    ctxt->sax->endElement(ctxt->userData, name);
10166
43.7M
#endif /* LIBXML_SAX1_ENABLED */
10167
43.7M
  }
10168
46.3M
  namePop(ctxt);
10169
46.3M
  spacePop(ctxt);
10170
46.3M
  if (nsNr != ctxt->nsNr)
10171
40.0k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10172
46.3M
  if ( ret != NULL && ctxt->record_info ) {
10173
0
     node_info.end_pos = ctxt->input->consumed +
10174
0
            (CUR_PTR - ctxt->input->base);
10175
0
     node_info.end_line = ctxt->input->line;
10176
0
     node_info.node = ret;
10177
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10178
0
  }
10179
46.3M
  return(1);
10180
46.3M
    }
10181
73.5M
    if (RAW == '>') {
10182
59.8M
        NEXT1;
10183
59.8M
    } else {
10184
13.6M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10185
13.6M
         "Couldn't find end of Start Tag %s line %d\n",
10186
13.6M
                    name, line, NULL);
10187
10188
  /*
10189
   * end of parsing of this node.
10190
   */
10191
13.6M
  nodePop(ctxt);
10192
13.6M
  namePop(ctxt);
10193
13.6M
  spacePop(ctxt);
10194
13.6M
  if (nsNr != ctxt->nsNr)
10195
137k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10196
10197
  /*
10198
   * Capture end position and add node
10199
   */
10200
13.6M
  if ( ret != NULL && ctxt->record_info ) {
10201
0
     node_info.end_pos = ctxt->input->consumed +
10202
0
            (CUR_PTR - ctxt->input->base);
10203
0
     node_info.end_line = ctxt->input->line;
10204
0
     node_info.node = ret;
10205
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10206
0
  }
10207
13.6M
  return(-1);
10208
13.6M
    }
10209
10210
59.8M
    return(0);
10211
73.5M
}
10212
10213
/**
10214
 * xmlParseElementEnd:
10215
 * @ctxt:  an XML parser context
10216
 *
10217
 * Parse the end of an XML element.
10218
 */
10219
static void
10220
42.2M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10221
42.2M
    xmlParserNodeInfo node_info;
10222
42.2M
    xmlNodePtr ret = ctxt->node;
10223
10224
42.2M
    if (ctxt->nameNr <= 0)
10225
0
        return;
10226
10227
    /*
10228
     * parse the end of tag: '</' should be here.
10229
     */
10230
42.2M
    if (ctxt->sax2) {
10231
2.07M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10232
2.07M
  namePop(ctxt);
10233
2.07M
    }
10234
40.2M
#ifdef LIBXML_SAX1_ENABLED
10235
40.2M
    else
10236
40.2M
  xmlParseEndTag1(ctxt, 0);
10237
42.2M
#endif /* LIBXML_SAX1_ENABLED */
10238
10239
    /*
10240
     * Capture end position and add node
10241
     */
10242
42.2M
    if ( ret != NULL && ctxt->record_info ) {
10243
0
       node_info.end_pos = ctxt->input->consumed +
10244
0
                          (CUR_PTR - ctxt->input->base);
10245
0
       node_info.end_line = ctxt->input->line;
10246
0
       node_info.node = ret;
10247
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10248
0
    }
10249
42.2M
}
10250
10251
/**
10252
 * xmlParseVersionNum:
10253
 * @ctxt:  an XML parser context
10254
 *
10255
 * DEPRECATED: Internal function, don't use.
10256
 *
10257
 * parse the XML version value.
10258
 *
10259
 * [26] VersionNum ::= '1.' [0-9]+
10260
 *
10261
 * In practice allow [0-9].[0-9]+ at that level
10262
 *
10263
 * Returns the string giving the XML version number, or NULL
10264
 */
10265
xmlChar *
10266
317k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10267
317k
    xmlChar *buf = NULL;
10268
317k
    int len = 0;
10269
317k
    int size = 10;
10270
317k
    xmlChar cur;
10271
10272
317k
    buf = (xmlChar *) xmlMallocAtomic(size);
10273
317k
    if (buf == NULL) {
10274
0
  xmlErrMemory(ctxt, NULL);
10275
0
  return(NULL);
10276
0
    }
10277
317k
    cur = CUR;
10278
317k
    if (!((cur >= '0') && (cur <= '9'))) {
10279
4.11k
  xmlFree(buf);
10280
4.11k
  return(NULL);
10281
4.11k
    }
10282
313k
    buf[len++] = cur;
10283
313k
    NEXT;
10284
313k
    cur=CUR;
10285
313k
    if (cur != '.') {
10286
4.90k
  xmlFree(buf);
10287
4.90k
  return(NULL);
10288
4.90k
    }
10289
308k
    buf[len++] = cur;
10290
308k
    NEXT;
10291
308k
    cur=CUR;
10292
1.05M
    while ((cur >= '0') && (cur <= '9')) {
10293
747k
  if (len + 1 >= size) {
10294
1.46k
      xmlChar *tmp;
10295
10296
1.46k
      size *= 2;
10297
1.46k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10298
1.46k
      if (tmp == NULL) {
10299
0
          xmlFree(buf);
10300
0
    xmlErrMemory(ctxt, NULL);
10301
0
    return(NULL);
10302
0
      }
10303
1.46k
      buf = tmp;
10304
1.46k
  }
10305
747k
  buf[len++] = cur;
10306
747k
  NEXT;
10307
747k
  cur=CUR;
10308
747k
    }
10309
308k
    buf[len] = 0;
10310
308k
    return(buf);
10311
308k
}
10312
10313
/**
10314
 * xmlParseVersionInfo:
10315
 * @ctxt:  an XML parser context
10316
 *
10317
 * DEPRECATED: Internal function, don't use.
10318
 *
10319
 * parse the XML version.
10320
 *
10321
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10322
 *
10323
 * [25] Eq ::= S? '=' S?
10324
 *
10325
 * Returns the version string, e.g. "1.0"
10326
 */
10327
10328
xmlChar *
10329
384k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10330
384k
    xmlChar *version = NULL;
10331
10332
384k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10333
327k
  SKIP(7);
10334
327k
  SKIP_BLANKS;
10335
327k
  if (RAW != '=') {
10336
5.59k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10337
5.59k
      return(NULL);
10338
5.59k
        }
10339
322k
  NEXT;
10340
322k
  SKIP_BLANKS;
10341
322k
  if (RAW == '"') {
10342
273k
      NEXT;
10343
273k
      version = xmlParseVersionNum(ctxt);
10344
273k
      if (RAW != '"') {
10345
15.7k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10346
15.7k
      } else
10347
257k
          NEXT;
10348
273k
  } else if (RAW == '\''){
10349
44.4k
      NEXT;
10350
44.4k
      version = xmlParseVersionNum(ctxt);
10351
44.4k
      if (RAW != '\'') {
10352
2.34k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10353
2.34k
      } else
10354
42.1k
          NEXT;
10355
44.4k
  } else {
10356
4.58k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10357
4.58k
  }
10358
322k
    }
10359
379k
    return(version);
10360
384k
}
10361
10362
/**
10363
 * xmlParseEncName:
10364
 * @ctxt:  an XML parser context
10365
 *
10366
 * DEPRECATED: Internal function, don't use.
10367
 *
10368
 * parse the XML encoding name
10369
 *
10370
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10371
 *
10372
 * Returns the encoding name value or NULL
10373
 */
10374
xmlChar *
10375
161k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10376
161k
    xmlChar *buf = NULL;
10377
161k
    int len = 0;
10378
161k
    int size = 10;
10379
161k
    xmlChar cur;
10380
10381
161k
    cur = CUR;
10382
161k
    if (((cur >= 'a') && (cur <= 'z')) ||
10383
161k
        ((cur >= 'A') && (cur <= 'Z'))) {
10384
159k
  buf = (xmlChar *) xmlMallocAtomic(size);
10385
159k
  if (buf == NULL) {
10386
0
      xmlErrMemory(ctxt, NULL);
10387
0
      return(NULL);
10388
0
  }
10389
10390
159k
  buf[len++] = cur;
10391
159k
  NEXT;
10392
159k
  cur = CUR;
10393
2.82M
  while (((cur >= 'a') && (cur <= 'z')) ||
10394
2.82M
         ((cur >= 'A') && (cur <= 'Z')) ||
10395
2.82M
         ((cur >= '0') && (cur <= '9')) ||
10396
2.82M
         (cur == '.') || (cur == '_') ||
10397
2.82M
         (cur == '-')) {
10398
2.66M
      if (len + 1 >= size) {
10399
54.6k
          xmlChar *tmp;
10400
10401
54.6k
    size *= 2;
10402
54.6k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10403
54.6k
    if (tmp == NULL) {
10404
0
        xmlErrMemory(ctxt, NULL);
10405
0
        xmlFree(buf);
10406
0
        return(NULL);
10407
0
    }
10408
54.6k
    buf = tmp;
10409
54.6k
      }
10410
2.66M
      buf[len++] = cur;
10411
2.66M
      NEXT;
10412
2.66M
      cur = CUR;
10413
2.66M
      if (cur == 0) {
10414
933
          SHRINK;
10415
933
    GROW;
10416
933
    cur = CUR;
10417
933
      }
10418
2.66M
        }
10419
159k
  buf[len] = 0;
10420
159k
    } else {
10421
1.96k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10422
1.96k
    }
10423
161k
    return(buf);
10424
161k
}
10425
10426
/**
10427
 * xmlParseEncodingDecl:
10428
 * @ctxt:  an XML parser context
10429
 *
10430
 * DEPRECATED: Internal function, don't use.
10431
 *
10432
 * parse the XML encoding declaration
10433
 *
10434
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10435
 *
10436
 * this setups the conversion filters.
10437
 *
10438
 * Returns the encoding value or NULL
10439
 */
10440
10441
const xmlChar *
10442
302k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10443
302k
    xmlChar *encoding = NULL;
10444
10445
302k
    SKIP_BLANKS;
10446
302k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10447
164k
  SKIP(8);
10448
164k
  SKIP_BLANKS;
10449
164k
  if (RAW != '=') {
10450
1.85k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10451
1.85k
      return(NULL);
10452
1.85k
        }
10453
162k
  NEXT;
10454
162k
  SKIP_BLANKS;
10455
162k
  if (RAW == '"') {
10456
138k
      NEXT;
10457
138k
      encoding = xmlParseEncName(ctxt);
10458
138k
      if (RAW != '"') {
10459
8.42k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10460
8.42k
    xmlFree((xmlChar *) encoding);
10461
8.42k
    return(NULL);
10462
8.42k
      } else
10463
130k
          NEXT;
10464
138k
  } else if (RAW == '\''){
10465
22.9k
      NEXT;
10466
22.9k
      encoding = xmlParseEncName(ctxt);
10467
22.9k
      if (RAW != '\'') {
10468
1.49k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10469
1.49k
    xmlFree((xmlChar *) encoding);
10470
1.49k
    return(NULL);
10471
1.49k
      } else
10472
21.4k
          NEXT;
10473
22.9k
  } else {
10474
1.54k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10475
1.54k
  }
10476
10477
        /*
10478
         * Non standard parsing, allowing the user to ignore encoding
10479
         */
10480
153k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10481
22.1k
      xmlFree((xmlChar *) encoding);
10482
22.1k
            return(NULL);
10483
22.1k
  }
10484
10485
  /*
10486
   * UTF-16 encoding switch has already taken place at this stage,
10487
   * more over the little-endian/big-endian selection is already done
10488
   */
10489
130k
        if ((encoding != NULL) &&
10490
130k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10491
129k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10492
      /*
10493
       * If no encoding was passed to the parser, that we are
10494
       * using UTF-16 and no decoder is present i.e. the
10495
       * document is apparently UTF-8 compatible, then raise an
10496
       * encoding mismatch fatal error
10497
       */
10498
977
      if ((ctxt->encoding == NULL) &&
10499
977
          (ctxt->input->buf != NULL) &&
10500
977
          (ctxt->input->buf->encoder == NULL)) {
10501
977
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10502
977
      "Document labelled UTF-16 but has UTF-8 content\n");
10503
977
      }
10504
977
      if (ctxt->encoding != NULL)
10505
0
    xmlFree((xmlChar *) ctxt->encoding);
10506
977
      ctxt->encoding = encoding;
10507
977
  }
10508
  /*
10509
   * UTF-8 encoding is handled natively
10510
   */
10511
129k
        else if ((encoding != NULL) &&
10512
129k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10513
128k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10514
77.6k
      if (ctxt->encoding != NULL)
10515
0
    xmlFree((xmlChar *) ctxt->encoding);
10516
77.6k
      ctxt->encoding = encoding;
10517
77.6k
  }
10518
52.3k
  else if (encoding != NULL) {
10519
51.0k
      xmlCharEncodingHandlerPtr handler;
10520
10521
51.0k
      if (ctxt->input->encoding != NULL)
10522
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10523
51.0k
      ctxt->input->encoding = encoding;
10524
10525
51.0k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10526
51.0k
      if (handler != NULL) {
10527
49.6k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10528
        /* failed to convert */
10529
154
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10530
154
        return(NULL);
10531
154
    }
10532
49.6k
      } else {
10533
1.44k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10534
1.44k
      "Unsupported encoding %s\n", encoding);
10535
1.44k
    return(NULL);
10536
1.44k
      }
10537
51.0k
  }
10538
130k
    }
10539
266k
    return(encoding);
10540
302k
}
10541
10542
/**
10543
 * xmlParseSDDecl:
10544
 * @ctxt:  an XML parser context
10545
 *
10546
 * DEPRECATED: Internal function, don't use.
10547
 *
10548
 * parse the XML standalone declaration
10549
 *
10550
 * [32] SDDecl ::= S 'standalone' Eq
10551
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10552
 *
10553
 * [ VC: Standalone Document Declaration ]
10554
 * TODO The standalone document declaration must have the value "no"
10555
 * if any external markup declarations contain declarations of:
10556
 *  - attributes with default values, if elements to which these
10557
 *    attributes apply appear in the document without specifications
10558
 *    of values for these attributes, or
10559
 *  - entities (other than amp, lt, gt, apos, quot), if references
10560
 *    to those entities appear in the document, or
10561
 *  - attributes with values subject to normalization, where the
10562
 *    attribute appears in the document with a value which will change
10563
 *    as a result of normalization, or
10564
 *  - element types with element content, if white space occurs directly
10565
 *    within any instance of those types.
10566
 *
10567
 * Returns:
10568
 *   1 if standalone="yes"
10569
 *   0 if standalone="no"
10570
 *  -2 if standalone attribute is missing or invalid
10571
 *    (A standalone value of -2 means that the XML declaration was found,
10572
 *     but no value was specified for the standalone attribute).
10573
 */
10574
10575
int
10576
246k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10577
246k
    int standalone = -2;
10578
10579
246k
    SKIP_BLANKS;
10580
246k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10581
31.0k
  SKIP(10);
10582
31.0k
        SKIP_BLANKS;
10583
31.0k
  if (RAW != '=') {
10584
348
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10585
348
      return(standalone);
10586
348
        }
10587
30.6k
  NEXT;
10588
30.6k
  SKIP_BLANKS;
10589
30.6k
        if (RAW == '\''){
10590
16.8k
      NEXT;
10591
16.8k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10592
13.4k
          standalone = 0;
10593
13.4k
                SKIP(2);
10594
13.4k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10595
3.41k
                 (NXT(2) == 's')) {
10596
3.00k
          standalone = 1;
10597
3.00k
    SKIP(3);
10598
3.00k
            } else {
10599
401
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10600
401
      }
10601
16.8k
      if (RAW != '\'') {
10602
672
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10603
672
      } else
10604
16.1k
          NEXT;
10605
16.8k
  } else if (RAW == '"'){
10606
13.4k
      NEXT;
10607
13.4k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10608
6.18k
          standalone = 0;
10609
6.18k
    SKIP(2);
10610
7.22k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10611
7.22k
                 (NXT(2) == 's')) {
10612
6.60k
          standalone = 1;
10613
6.60k
                SKIP(3);
10614
6.60k
            } else {
10615
617
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10616
617
      }
10617
13.4k
      if (RAW != '"') {
10618
841
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10619
841
      } else
10620
12.5k
          NEXT;
10621
13.4k
  } else {
10622
388
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10623
388
        }
10624
30.6k
    }
10625
246k
    return(standalone);
10626
246k
}
10627
10628
/**
10629
 * xmlParseXMLDecl:
10630
 * @ctxt:  an XML parser context
10631
 *
10632
 * DEPRECATED: Internal function, don't use.
10633
 *
10634
 * parse an XML declaration header
10635
 *
10636
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10637
 */
10638
10639
void
10640
362k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10641
362k
    xmlChar *version;
10642
10643
    /*
10644
     * This value for standalone indicates that the document has an
10645
     * XML declaration but it does not have a standalone attribute.
10646
     * It will be overwritten later if a standalone attribute is found.
10647
     */
10648
362k
    ctxt->input->standalone = -2;
10649
10650
    /*
10651
     * We know that '<?xml' is here.
10652
     */
10653
362k
    SKIP(5);
10654
10655
362k
    if (!IS_BLANK_CH(RAW)) {
10656
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10657
0
                 "Blank needed after '<?xml'\n");
10658
0
    }
10659
362k
    SKIP_BLANKS;
10660
10661
    /*
10662
     * We must have the VersionInfo here.
10663
     */
10664
362k
    version = xmlParseVersionInfo(ctxt);
10665
362k
    if (version == NULL) {
10666
74.3k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10667
288k
    } else {
10668
288k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10669
      /*
10670
       * Changed here for XML-1.0 5th edition
10671
       */
10672
6.91k
      if (ctxt->options & XML_PARSE_OLD10) {
10673
1.29k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10674
1.29k
                "Unsupported version '%s'\n",
10675
1.29k
                version);
10676
5.62k
      } else {
10677
5.62k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10678
5.12k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10679
5.12k
                      "Unsupported version '%s'\n",
10680
5.12k
          version, NULL);
10681
5.12k
    } else {
10682
499
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10683
499
              "Unsupported version '%s'\n",
10684
499
              version);
10685
499
    }
10686
5.62k
      }
10687
6.91k
  }
10688
288k
  if (ctxt->version != NULL)
10689
0
      xmlFree((void *) ctxt->version);
10690
288k
  ctxt->version = version;
10691
288k
    }
10692
10693
    /*
10694
     * We may have the encoding declaration
10695
     */
10696
362k
    if (!IS_BLANK_CH(RAW)) {
10697
167k
        if ((RAW == '?') && (NXT(1) == '>')) {
10698
82.2k
      SKIP(2);
10699
82.2k
      return;
10700
82.2k
  }
10701
85.5k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10702
85.5k
    }
10703
280k
    xmlParseEncodingDecl(ctxt);
10704
280k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10705
280k
         (ctxt->instate == XML_PARSER_EOF)) {
10706
  /*
10707
   * The XML REC instructs us to stop parsing right here
10708
   */
10709
1.38k
        return;
10710
1.38k
    }
10711
10712
    /*
10713
     * We may have the standalone status.
10714
     */
10715
278k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10716
34.0k
        if ((RAW == '?') && (NXT(1) == '>')) {
10717
31.9k
      SKIP(2);
10718
31.9k
      return;
10719
31.9k
  }
10720
2.09k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10721
2.09k
    }
10722
10723
    /*
10724
     * We can grow the input buffer freely at that point
10725
     */
10726
246k
    GROW;
10727
10728
246k
    SKIP_BLANKS;
10729
246k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10730
10731
246k
    SKIP_BLANKS;
10732
246k
    if ((RAW == '?') && (NXT(1) == '>')) {
10733
112k
        SKIP(2);
10734
134k
    } else if (RAW == '>') {
10735
        /* Deprecated old WD ... */
10736
595
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10737
595
  NEXT;
10738
133k
    } else {
10739
133k
        int c;
10740
10741
133k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10742
6.72M
        while ((c = CUR) != 0) {
10743
6.71M
            NEXT;
10744
6.71M
            if (c == '>')
10745
122k
                break;
10746
6.71M
        }
10747
133k
    }
10748
246k
}
10749
10750
/**
10751
 * xmlParseMisc:
10752
 * @ctxt:  an XML parser context
10753
 *
10754
 * DEPRECATED: Internal function, don't use.
10755
 *
10756
 * parse an XML Misc* optional field.
10757
 *
10758
 * [27] Misc ::= Comment | PI |  S
10759
 */
10760
10761
void
10762
900k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10763
974k
    while (ctxt->instate != XML_PARSER_EOF) {
10764
974k
        SKIP_BLANKS;
10765
974k
        GROW;
10766
974k
        if ((RAW == '<') && (NXT(1) == '?')) {
10767
43.7k
      xmlParsePI(ctxt);
10768
930k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10769
29.7k
      xmlParseComment(ctxt);
10770
900k
        } else {
10771
900k
            break;
10772
900k
        }
10773
974k
    }
10774
900k
}
10775
10776
/**
10777
 * xmlParseDocument:
10778
 * @ctxt:  an XML parser context
10779
 *
10780
 * parse an XML document (and build a tree if using the standard SAX
10781
 * interface).
10782
 *
10783
 * [1] document ::= prolog element Misc*
10784
 *
10785
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10786
 *
10787
 * Returns 0, -1 in case of error. the parser context is augmented
10788
 *                as a result of the parsing.
10789
 */
10790
10791
int
10792
425k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10793
425k
    xmlChar start[4];
10794
425k
    xmlCharEncoding enc;
10795
10796
425k
    xmlInitParser();
10797
10798
425k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10799
0
        return(-1);
10800
10801
425k
    GROW;
10802
10803
    /*
10804
     * SAX: detecting the level.
10805
     */
10806
425k
    xmlDetectSAX2(ctxt);
10807
10808
    /*
10809
     * SAX: beginning of the document processing.
10810
     */
10811
425k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10812
425k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10813
425k
    if (ctxt->instate == XML_PARSER_EOF)
10814
0
  return(-1);
10815
10816
425k
    if ((ctxt->encoding == NULL) &&
10817
425k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10818
  /*
10819
   * Get the 4 first bytes and decode the charset
10820
   * if enc != XML_CHAR_ENCODING_NONE
10821
   * plug some encoding conversion routines.
10822
   */
10823
420k
  start[0] = RAW;
10824
420k
  start[1] = NXT(1);
10825
420k
  start[2] = NXT(2);
10826
420k
  start[3] = NXT(3);
10827
420k
  enc = xmlDetectCharEncoding(&start[0], 4);
10828
420k
  if (enc != XML_CHAR_ENCODING_NONE) {
10829
146k
      xmlSwitchEncoding(ctxt, enc);
10830
146k
  }
10831
420k
    }
10832
10833
10834
425k
    if (CUR == 0) {
10835
3.75k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10836
3.75k
  return(-1);
10837
3.75k
    }
10838
10839
    /*
10840
     * Check for the XMLDecl in the Prolog.
10841
     * do not GROW here to avoid the detected encoder to decode more
10842
     * than just the first line, unless the amount of data is really
10843
     * too small to hold "<?xml version="1.0" encoding="foo"
10844
     */
10845
421k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10846
17.5k
       GROW;
10847
17.5k
    }
10848
421k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10849
10850
  /*
10851
   * Note that we will switch encoding on the fly.
10852
   */
10853
132k
  xmlParseXMLDecl(ctxt);
10854
132k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10855
132k
      (ctxt->instate == XML_PARSER_EOF)) {
10856
      /*
10857
       * The XML REC instructs us to stop parsing right here
10858
       */
10859
557
      return(-1);
10860
557
  }
10861
132k
  ctxt->standalone = ctxt->input->standalone;
10862
132k
  SKIP_BLANKS;
10863
288k
    } else {
10864
288k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10865
288k
    }
10866
421k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10867
400k
        ctxt->sax->startDocument(ctxt->userData);
10868
421k
    if (ctxt->instate == XML_PARSER_EOF)
10869
0
  return(-1);
10870
421k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10871
421k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10872
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10873
0
    }
10874
10875
    /*
10876
     * The Misc part of the Prolog
10877
     */
10878
421k
    xmlParseMisc(ctxt);
10879
10880
    /*
10881
     * Then possibly doc type declaration(s) and more Misc
10882
     * (doctypedecl Misc*)?
10883
     */
10884
421k
    GROW;
10885
421k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10886
10887
154k
  ctxt->inSubset = 1;
10888
154k
  xmlParseDocTypeDecl(ctxt);
10889
154k
  if (RAW == '[') {
10890
113k
      ctxt->instate = XML_PARSER_DTD;
10891
113k
      xmlParseInternalSubset(ctxt);
10892
113k
      if (ctxt->instate == XML_PARSER_EOF)
10893
7.93k
    return(-1);
10894
113k
  }
10895
10896
  /*
10897
   * Create and update the external subset.
10898
   */
10899
146k
  ctxt->inSubset = 2;
10900
146k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10901
146k
      (!ctxt->disableSAX))
10902
112k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10903
112k
                                ctxt->extSubSystem, ctxt->extSubURI);
10904
146k
  if (ctxt->instate == XML_PARSER_EOF)
10905
3.50k
      return(-1);
10906
142k
  ctxt->inSubset = 0;
10907
10908
142k
        xmlCleanSpecialAttr(ctxt);
10909
10910
142k
  ctxt->instate = XML_PARSER_PROLOG;
10911
142k
  xmlParseMisc(ctxt);
10912
142k
    }
10913
10914
    /*
10915
     * Time to start parsing the tree itself
10916
     */
10917
409k
    GROW;
10918
409k
    if (RAW != '<') {
10919
72.6k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10920
72.6k
           "Start tag expected, '<' not found\n");
10921
337k
    } else {
10922
337k
  ctxt->instate = XML_PARSER_CONTENT;
10923
337k
  xmlParseElement(ctxt);
10924
337k
  ctxt->instate = XML_PARSER_EPILOG;
10925
10926
10927
  /*
10928
   * The Misc part at the end
10929
   */
10930
337k
  xmlParseMisc(ctxt);
10931
10932
337k
  if (RAW != 0) {
10933
114k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10934
114k
  }
10935
337k
  ctxt->instate = XML_PARSER_EOF;
10936
337k
    }
10937
10938
    /*
10939
     * SAX: end of the document processing.
10940
     */
10941
409k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10942
409k
        ctxt->sax->endDocument(ctxt->userData);
10943
10944
    /*
10945
     * Remove locally kept entity definitions if the tree was not built
10946
     */
10947
409k
    if ((ctxt->myDoc != NULL) &&
10948
409k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10949
1.64k
  xmlFreeDoc(ctxt->myDoc);
10950
1.64k
  ctxt->myDoc = NULL;
10951
1.64k
    }
10952
10953
409k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10954
26.8k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10955
26.8k
  if (ctxt->valid)
10956
17.1k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10957
26.8k
  if (ctxt->nsWellFormed)
10958
25.7k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10959
26.8k
  if (ctxt->options & XML_PARSE_OLD10)
10960
5.98k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10961
26.8k
    }
10962
409k
    if (! ctxt->wellFormed) {
10963
382k
  ctxt->valid = 0;
10964
382k
  return(-1);
10965
382k
    }
10966
26.8k
    return(0);
10967
409k
}
10968
10969
/**
10970
 * xmlParseExtParsedEnt:
10971
 * @ctxt:  an XML parser context
10972
 *
10973
 * parse a general parsed entity
10974
 * An external general parsed entity is well-formed if it matches the
10975
 * production labeled extParsedEnt.
10976
 *
10977
 * [78] extParsedEnt ::= TextDecl? content
10978
 *
10979
 * Returns 0, -1 in case of error. the parser context is augmented
10980
 *                as a result of the parsing.
10981
 */
10982
10983
int
10984
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10985
0
    xmlChar start[4];
10986
0
    xmlCharEncoding enc;
10987
10988
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10989
0
        return(-1);
10990
10991
0
    xmlDetectSAX2(ctxt);
10992
10993
0
    GROW;
10994
10995
    /*
10996
     * SAX: beginning of the document processing.
10997
     */
10998
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10999
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11000
11001
    /*
11002
     * Get the 4 first bytes and decode the charset
11003
     * if enc != XML_CHAR_ENCODING_NONE
11004
     * plug some encoding conversion routines.
11005
     */
11006
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11007
0
  start[0] = RAW;
11008
0
  start[1] = NXT(1);
11009
0
  start[2] = NXT(2);
11010
0
  start[3] = NXT(3);
11011
0
  enc = xmlDetectCharEncoding(start, 4);
11012
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11013
0
      xmlSwitchEncoding(ctxt, enc);
11014
0
  }
11015
0
    }
11016
11017
11018
0
    if (CUR == 0) {
11019
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11020
0
    }
11021
11022
    /*
11023
     * Check for the XMLDecl in the Prolog.
11024
     */
11025
0
    GROW;
11026
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11027
11028
  /*
11029
   * Note that we will switch encoding on the fly.
11030
   */
11031
0
  xmlParseXMLDecl(ctxt);
11032
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11033
      /*
11034
       * The XML REC instructs us to stop parsing right here
11035
       */
11036
0
      return(-1);
11037
0
  }
11038
0
  SKIP_BLANKS;
11039
0
    } else {
11040
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11041
0
    }
11042
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11043
0
        ctxt->sax->startDocument(ctxt->userData);
11044
0
    if (ctxt->instate == XML_PARSER_EOF)
11045
0
  return(-1);
11046
11047
    /*
11048
     * Doing validity checking on chunk doesn't make sense
11049
     */
11050
0
    ctxt->instate = XML_PARSER_CONTENT;
11051
0
    ctxt->validate = 0;
11052
0
    ctxt->loadsubset = 0;
11053
0
    ctxt->depth = 0;
11054
11055
0
    xmlParseContent(ctxt);
11056
0
    if (ctxt->instate == XML_PARSER_EOF)
11057
0
  return(-1);
11058
11059
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11060
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11061
0
    } else if (RAW != 0) {
11062
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11063
0
    }
11064
11065
    /*
11066
     * SAX: end of the document processing.
11067
     */
11068
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11069
0
        ctxt->sax->endDocument(ctxt->userData);
11070
11071
0
    if (! ctxt->wellFormed) return(-1);
11072
0
    return(0);
11073
0
}
11074
11075
#ifdef LIBXML_PUSH_ENABLED
11076
/************************************************************************
11077
 *                  *
11078
 *    Progressive parsing interfaces        *
11079
 *                  *
11080
 ************************************************************************/
11081
11082
/**
11083
 * xmlParseLookupSequence:
11084
 * @ctxt:  an XML parser context
11085
 * @first:  the first char to lookup
11086
 * @next:  the next char to lookup or zero
11087
 * @third:  the next char to lookup or zero
11088
 *
11089
 * Try to find if a sequence (first, next, third) or  just (first next) or
11090
 * (first) is available in the input stream.
11091
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11092
 * to avoid rescanning sequences of bytes, it DOES change the state of the
11093
 * parser, do not use liberally.
11094
 *
11095
 * Returns the index to the current parsing point if the full sequence
11096
 *      is available, -1 otherwise.
11097
 */
11098
static int
11099
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11100
12.7M
                       xmlChar next, xmlChar third) {
11101
12.7M
    int base, len;
11102
12.7M
    xmlParserInputPtr in;
11103
12.7M
    const xmlChar *buf;
11104
11105
12.7M
    in = ctxt->input;
11106
12.7M
    if (in == NULL) return(-1);
11107
12.7M
    base = in->cur - in->base;
11108
12.7M
    if (base < 0) return(-1);
11109
12.7M
    if (ctxt->checkIndex > base)
11110
1.32M
        base = ctxt->checkIndex;
11111
12.7M
    if (in->buf == NULL) {
11112
0
  buf = in->base;
11113
0
  len = in->length;
11114
12.7M
    } else {
11115
12.7M
  buf = xmlBufContent(in->buf->buffer);
11116
12.7M
  len = xmlBufUse(in->buf->buffer);
11117
12.7M
    }
11118
    /* take into account the sequence length */
11119
12.7M
    if (third) len -= 2;
11120
10.9M
    else if (next) len --;
11121
259G
    for (;base < len;base++) {
11122
259G
        if (buf[base] == first) {
11123
16.1M
      if (third != 0) {
11124
5.57M
    if ((buf[base + 1] != next) ||
11125
5.57M
        (buf[base + 2] != third)) continue;
11126
10.5M
      } else if (next != 0) {
11127
1.04M
    if (buf[base + 1] != next) continue;
11128
1.04M
      }
11129
10.4M
      ctxt->checkIndex = 0;
11130
#ifdef DEBUG_PUSH
11131
      if (next == 0)
11132
    xmlGenericError(xmlGenericErrorContext,
11133
      "PP: lookup '%c' found at %d\n",
11134
      first, base);
11135
      else if (third == 0)
11136
    xmlGenericError(xmlGenericErrorContext,
11137
      "PP: lookup '%c%c' found at %d\n",
11138
      first, next, base);
11139
      else
11140
    xmlGenericError(xmlGenericErrorContext,
11141
      "PP: lookup '%c%c%c' found at %d\n",
11142
      first, next, third, base);
11143
#endif
11144
10.4M
      return(base - (in->cur - in->base));
11145
16.1M
  }
11146
259G
    }
11147
2.35M
    ctxt->checkIndex = base;
11148
#ifdef DEBUG_PUSH
11149
    if (next == 0)
11150
  xmlGenericError(xmlGenericErrorContext,
11151
    "PP: lookup '%c' failed\n", first);
11152
    else if (third == 0)
11153
  xmlGenericError(xmlGenericErrorContext,
11154
    "PP: lookup '%c%c' failed\n", first, next);
11155
    else
11156
  xmlGenericError(xmlGenericErrorContext,
11157
    "PP: lookup '%c%c%c' failed\n", first, next, third);
11158
#endif
11159
2.35M
    return(-1);
11160
12.7M
}
11161
11162
/**
11163
 * xmlParseGetLasts:
11164
 * @ctxt:  an XML parser context
11165
 * @lastlt:  pointer to store the last '<' from the input
11166
 * @lastgt:  pointer to store the last '>' from the input
11167
 *
11168
 * Lookup the last < and > in the current chunk
11169
 */
11170
static void
11171
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11172
10.1M
                 const xmlChar **lastgt) {
11173
10.1M
    const xmlChar *tmp;
11174
11175
10.1M
    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11176
0
  xmlGenericError(xmlGenericErrorContext,
11177
0
        "Internal error: xmlParseGetLasts\n");
11178
0
  return;
11179
0
    }
11180
10.1M
    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11181
4.74M
        tmp = ctxt->input->end;
11182
4.74M
  tmp--;
11183
1.59G
  while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11184
4.74M
  if (tmp < ctxt->input->base) {
11185
108k
      *lastlt = NULL;
11186
108k
      *lastgt = NULL;
11187
4.63M
  } else {
11188
4.63M
      *lastlt = tmp;
11189
4.63M
      tmp++;
11190
274M
      while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11191
269M
          if (*tmp == '\'') {
11192
225k
        tmp++;
11193
87.8M
        while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11194
225k
        if (tmp < ctxt->input->end) tmp++;
11195
269M
    } else if (*tmp == '"') {
11196
3.29M
        tmp++;
11197
178M
        while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11198
3.29M
        if (tmp < ctxt->input->end) tmp++;
11199
3.29M
    } else
11200
266M
        tmp++;
11201
269M
      }
11202
4.63M
      if (tmp < ctxt->input->end)
11203
1.90M
          *lastgt = tmp;
11204
2.73M
      else {
11205
2.73M
          tmp = *lastlt;
11206
2.73M
    tmp--;
11207
74.1M
    while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11208
2.73M
    if (tmp >= ctxt->input->base)
11209
2.68M
        *lastgt = tmp;
11210
48.7k
    else
11211
48.7k
        *lastgt = NULL;
11212
2.73M
      }
11213
4.63M
  }
11214
5.36M
    } else {
11215
5.36M
        *lastlt = NULL;
11216
5.36M
  *lastgt = NULL;
11217
5.36M
    }
11218
10.1M
}
11219
/**
11220
 * xmlCheckCdataPush:
11221
 * @cur: pointer to the block of characters
11222
 * @len: length of the block in bytes
11223
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11224
 *
11225
 * Check that the block of characters is okay as SCdata content [20]
11226
 *
11227
 * Returns the number of bytes to pass if okay, a negative index where an
11228
 *         UTF-8 error occurred otherwise
11229
 */
11230
static int
11231
1.30M
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11232
1.30M
    int ix;
11233
1.30M
    unsigned char c;
11234
1.30M
    int codepoint;
11235
11236
1.30M
    if ((utf == NULL) || (len <= 0))
11237
1.34k
        return(0);
11238
11239
18.3M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11240
18.2M
        c = utf[ix];
11241
18.2M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11242
14.6M
      if (c >= 0x20)
11243
12.2M
    ix++;
11244
2.33M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11245
1.61M
          ix++;
11246
719k
      else
11247
719k
          return(-ix);
11248
14.6M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11249
1.60M
      if (ix + 2 > len) return(complete ? -ix : ix);
11250
1.59M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11251
172k
          return(-ix);
11252
1.42M
      codepoint = (utf[ix] & 0x1f) << 6;
11253
1.42M
      codepoint |= utf[ix+1] & 0x3f;
11254
1.42M
      if (!xmlIsCharQ(codepoint))
11255
12.4k
          return(-ix);
11256
1.41M
      ix += 2;
11257
2.01M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11258
760k
      if (ix + 3 > len) return(complete ? -ix : ix);
11259
754k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11260
754k
          ((utf[ix+2] & 0xc0) != 0x80))
11261
57.0k
        return(-ix);
11262
697k
      codepoint = (utf[ix] & 0xf) << 12;
11263
697k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11264
697k
      codepoint |= utf[ix+2] & 0x3f;
11265
697k
      if (!xmlIsCharQ(codepoint))
11266
3.22k
          return(-ix);
11267
693k
      ix += 3;
11268
1.25M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11269
1.12M
      if (ix + 4 > len) return(complete ? -ix : ix);
11270
1.11M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11271
1.11M
          ((utf[ix+2] & 0xc0) != 0x80) ||
11272
1.11M
    ((utf[ix+3] & 0xc0) != 0x80))
11273
94.6k
        return(-ix);
11274
1.02M
      codepoint = (utf[ix] & 0x7) << 18;
11275
1.02M
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11276
1.02M
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11277
1.02M
      codepoint |= utf[ix+3] & 0x3f;
11278
1.02M
      if (!xmlIsCharQ(codepoint))
11279
22.2k
          return(-ix);
11280
1.00M
      ix += 4;
11281
1.00M
  } else       /* unknown encoding */
11282
129k
      return(-ix);
11283
18.2M
      }
11284
67.2k
      return(ix);
11285
1.30M
}
11286
11287
/**
11288
 * xmlParseTryOrFinish:
11289
 * @ctxt:  an XML parser context
11290
 * @terminate:  last chunk indicator
11291
 *
11292
 * Try to progress on parsing
11293
 *
11294
 * Returns zero if no parsing was possible
11295
 */
11296
static int
11297
9.58M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11298
9.58M
    int ret = 0;
11299
9.58M
    int avail, tlen;
11300
9.58M
    xmlChar cur, next;
11301
9.58M
    const xmlChar *lastlt, *lastgt;
11302
11303
9.58M
    if (ctxt->input == NULL)
11304
0
        return(0);
11305
11306
#ifdef DEBUG_PUSH
11307
    switch (ctxt->instate) {
11308
  case XML_PARSER_EOF:
11309
      xmlGenericError(xmlGenericErrorContext,
11310
        "PP: try EOF\n"); break;
11311
  case XML_PARSER_START:
11312
      xmlGenericError(xmlGenericErrorContext,
11313
        "PP: try START\n"); break;
11314
  case XML_PARSER_MISC:
11315
      xmlGenericError(xmlGenericErrorContext,
11316
        "PP: try MISC\n");break;
11317
  case XML_PARSER_COMMENT:
11318
      xmlGenericError(xmlGenericErrorContext,
11319
        "PP: try COMMENT\n");break;
11320
  case XML_PARSER_PROLOG:
11321
      xmlGenericError(xmlGenericErrorContext,
11322
        "PP: try PROLOG\n");break;
11323
  case XML_PARSER_START_TAG:
11324
      xmlGenericError(xmlGenericErrorContext,
11325
        "PP: try START_TAG\n");break;
11326
  case XML_PARSER_CONTENT:
11327
      xmlGenericError(xmlGenericErrorContext,
11328
        "PP: try CONTENT\n");break;
11329
  case XML_PARSER_CDATA_SECTION:
11330
      xmlGenericError(xmlGenericErrorContext,
11331
        "PP: try CDATA_SECTION\n");break;
11332
  case XML_PARSER_END_TAG:
11333
      xmlGenericError(xmlGenericErrorContext,
11334
        "PP: try END_TAG\n");break;
11335
  case XML_PARSER_ENTITY_DECL:
11336
      xmlGenericError(xmlGenericErrorContext,
11337
        "PP: try ENTITY_DECL\n");break;
11338
  case XML_PARSER_ENTITY_VALUE:
11339
      xmlGenericError(xmlGenericErrorContext,
11340
        "PP: try ENTITY_VALUE\n");break;
11341
  case XML_PARSER_ATTRIBUTE_VALUE:
11342
      xmlGenericError(xmlGenericErrorContext,
11343
        "PP: try ATTRIBUTE_VALUE\n");break;
11344
  case XML_PARSER_DTD:
11345
      xmlGenericError(xmlGenericErrorContext,
11346
        "PP: try DTD\n");break;
11347
  case XML_PARSER_EPILOG:
11348
      xmlGenericError(xmlGenericErrorContext,
11349
        "PP: try EPILOG\n");break;
11350
  case XML_PARSER_PI:
11351
      xmlGenericError(xmlGenericErrorContext,
11352
        "PP: try PI\n");break;
11353
        case XML_PARSER_IGNORE:
11354
            xmlGenericError(xmlGenericErrorContext,
11355
        "PP: try IGNORE\n");break;
11356
    }
11357
#endif
11358
11359
9.58M
    if ((ctxt->input != NULL) &&
11360
9.58M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11361
49.2k
  xmlSHRINK(ctxt);
11362
49.2k
  ctxt->checkIndex = 0;
11363
49.2k
    }
11364
9.58M
    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11365
11366
49.6M
    while (ctxt->instate != XML_PARSER_EOF) {
11367
49.5M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11368
193k
      return(0);
11369
11370
49.3M
  if (ctxt->input == NULL) break;
11371
49.3M
  if (ctxt->input->buf == NULL)
11372
0
      avail = ctxt->input->length -
11373
0
              (ctxt->input->cur - ctxt->input->base);
11374
49.3M
  else {
11375
      /*
11376
       * If we are operating on converted input, try to flush
11377
       * remaining chars to avoid them stalling in the non-converted
11378
       * buffer. But do not do this in document start where
11379
       * encoding="..." may not have been read and we work on a
11380
       * guessed encoding.
11381
       */
11382
49.3M
      if ((ctxt->instate != XML_PARSER_START) &&
11383
49.3M
          (ctxt->input->buf->raw != NULL) &&
11384
49.3M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11385
196k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11386
196k
                                                 ctxt->input);
11387
196k
    size_t current = ctxt->input->cur - ctxt->input->base;
11388
11389
196k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11390
196k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11391
196k
                                      base, current);
11392
196k
      }
11393
49.3M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11394
49.3M
        (ctxt->input->cur - ctxt->input->base);
11395
49.3M
  }
11396
49.3M
        if (avail < 1)
11397
254k
      goto done;
11398
49.1M
        switch (ctxt->instate) {
11399
0
            case XML_PARSER_EOF:
11400
          /*
11401
     * Document parsing is done !
11402
     */
11403
0
          goto done;
11404
1.53M
            case XML_PARSER_START:
11405
1.53M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11406
369k
        xmlChar start[4];
11407
369k
        xmlCharEncoding enc;
11408
11409
        /*
11410
         * Very first chars read from the document flow.
11411
         */
11412
369k
        if (avail < 4)
11413
20.6k
      goto done;
11414
11415
        /*
11416
         * Get the 4 first bytes and decode the charset
11417
         * if enc != XML_CHAR_ENCODING_NONE
11418
         * plug some encoding conversion routines,
11419
         * else xmlSwitchEncoding will set to (default)
11420
         * UTF8.
11421
         */
11422
349k
        start[0] = RAW;
11423
349k
        start[1] = NXT(1);
11424
349k
        start[2] = NXT(2);
11425
349k
        start[3] = NXT(3);
11426
349k
        enc = xmlDetectCharEncoding(start, 4);
11427
349k
        xmlSwitchEncoding(ctxt, enc);
11428
349k
        break;
11429
369k
    }
11430
11431
1.16M
    if (avail < 2)
11432
368
        goto done;
11433
1.16M
    cur = ctxt->input->cur[0];
11434
1.16M
    next = ctxt->input->cur[1];
11435
1.16M
    if (cur == 0) {
11436
4.97k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11437
4.97k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11438
4.97k
                  &xmlDefaultSAXLocator);
11439
4.97k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11440
4.97k
        xmlHaltParser(ctxt);
11441
#ifdef DEBUG_PUSH
11442
        xmlGenericError(xmlGenericErrorContext,
11443
          "PP: entering EOF\n");
11444
#endif
11445
4.97k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11446
4.97k
      ctxt->sax->endDocument(ctxt->userData);
11447
4.97k
        goto done;
11448
4.97k
    }
11449
1.15M
          if ((cur == '<') && (next == '?')) {
11450
        /* PI or XML decl */
11451
734k
        if (avail < 5) return(ret);
11452
734k
        if ((!terminate) &&
11453
734k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11454
470k
      return(ret);
11455
264k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11456
264k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11457
264k
                  &xmlDefaultSAXLocator);
11458
264k
        if ((ctxt->input->cur[2] == 'x') &&
11459
264k
      (ctxt->input->cur[3] == 'm') &&
11460
264k
      (ctxt->input->cur[4] == 'l') &&
11461
264k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11462
229k
      ret += 5;
11463
#ifdef DEBUG_PUSH
11464
      xmlGenericError(xmlGenericErrorContext,
11465
        "PP: Parsing XML Decl\n");
11466
#endif
11467
229k
      xmlParseXMLDecl(ctxt);
11468
229k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11469
          /*
11470
           * The XML REC instructs us to stop parsing right
11471
           * here
11472
           */
11473
825
          xmlHaltParser(ctxt);
11474
825
          return(0);
11475
825
      }
11476
228k
      ctxt->standalone = ctxt->input->standalone;
11477
228k
      if ((ctxt->encoding == NULL) &&
11478
228k
          (ctxt->input->encoding != NULL))
11479
31.9k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11480
228k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11481
228k
          (!ctxt->disableSAX))
11482
193k
          ctxt->sax->startDocument(ctxt->userData);
11483
228k
      ctxt->instate = XML_PARSER_MISC;
11484
#ifdef DEBUG_PUSH
11485
      xmlGenericError(xmlGenericErrorContext,
11486
        "PP: entering MISC\n");
11487
#endif
11488
228k
        } else {
11489
34.5k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11490
34.5k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11491
34.5k
          (!ctxt->disableSAX))
11492
34.5k
          ctxt->sax->startDocument(ctxt->userData);
11493
34.5k
      ctxt->instate = XML_PARSER_MISC;
11494
#ifdef DEBUG_PUSH
11495
      xmlGenericError(xmlGenericErrorContext,
11496
        "PP: entering MISC\n");
11497
#endif
11498
34.5k
        }
11499
422k
    } else {
11500
422k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11501
422k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11502
422k
                  &xmlDefaultSAXLocator);
11503
422k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11504
422k
        if (ctxt->version == NULL) {
11505
0
            xmlErrMemory(ctxt, NULL);
11506
0
      break;
11507
0
        }
11508
422k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11509
422k
            (!ctxt->disableSAX))
11510
422k
      ctxt->sax->startDocument(ctxt->userData);
11511
422k
        ctxt->instate = XML_PARSER_MISC;
11512
#ifdef DEBUG_PUSH
11513
        xmlGenericError(xmlGenericErrorContext,
11514
          "PP: entering MISC\n");
11515
#endif
11516
422k
    }
11517
685k
    break;
11518
8.00M
            case XML_PARSER_START_TAG: {
11519
8.00M
          const xmlChar *name;
11520
8.00M
    const xmlChar *prefix = NULL;
11521
8.00M
    const xmlChar *URI = NULL;
11522
8.00M
                int line = ctxt->input->line;
11523
8.00M
    int nsNr = ctxt->nsNr;
11524
11525
8.00M
    if ((avail < 2) && (ctxt->inputNr == 1))
11526
0
        goto done;
11527
8.00M
    cur = ctxt->input->cur[0];
11528
8.00M
          if (cur != '<') {
11529
36.3k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11530
36.3k
        xmlHaltParser(ctxt);
11531
36.3k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11532
36.3k
      ctxt->sax->endDocument(ctxt->userData);
11533
36.3k
        goto done;
11534
36.3k
    }
11535
7.96M
    if (!terminate) {
11536
7.28M
        if (ctxt->progressive) {
11537
            /* > can be found unescaped in attribute values */
11538
7.28M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11539
940k
          goto done;
11540
7.28M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11541
0
      goto done;
11542
0
        }
11543
7.28M
    }
11544
7.02M
    if (ctxt->spaceNr == 0)
11545
96.2k
        spacePush(ctxt, -1);
11546
6.92M
    else if (*ctxt->space == -2)
11547
1.12M
        spacePush(ctxt, -1);
11548
5.80M
    else
11549
5.80M
        spacePush(ctxt, *ctxt->space);
11550
7.02M
#ifdef LIBXML_SAX1_ENABLED
11551
7.02M
    if (ctxt->sax2)
11552
3.58M
#endif /* LIBXML_SAX1_ENABLED */
11553
3.58M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11554
3.43M
#ifdef LIBXML_SAX1_ENABLED
11555
3.43M
    else
11556
3.43M
        name = xmlParseStartTag(ctxt);
11557
7.02M
#endif /* LIBXML_SAX1_ENABLED */
11558
7.02M
    if (ctxt->instate == XML_PARSER_EOF)
11559
15
        goto done;
11560
7.02M
    if (name == NULL) {
11561
54.4k
        spacePop(ctxt);
11562
54.4k
        xmlHaltParser(ctxt);
11563
54.4k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11564
54.4k
      ctxt->sax->endDocument(ctxt->userData);
11565
54.4k
        goto done;
11566
54.4k
    }
11567
6.96M
#ifdef LIBXML_VALID_ENABLED
11568
    /*
11569
     * [ VC: Root Element Type ]
11570
     * The Name in the document type declaration must match
11571
     * the element type of the root element.
11572
     */
11573
6.96M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11574
6.96M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11575
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11576
6.96M
#endif /* LIBXML_VALID_ENABLED */
11577
11578
    /*
11579
     * Check for an Empty Element.
11580
     */
11581
6.96M
    if ((RAW == '/') && (NXT(1) == '>')) {
11582
3.24M
        SKIP(2);
11583
11584
3.24M
        if (ctxt->sax2) {
11585
1.70M
      if ((ctxt->sax != NULL) &&
11586
1.70M
          (ctxt->sax->endElementNs != NULL) &&
11587
1.70M
          (!ctxt->disableSAX))
11588
1.69M
          ctxt->sax->endElementNs(ctxt->userData, name,
11589
1.69M
                                  prefix, URI);
11590
1.70M
      if (ctxt->nsNr - nsNr > 0)
11591
8.80k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11592
1.70M
#ifdef LIBXML_SAX1_ENABLED
11593
1.70M
        } else {
11594
1.54M
      if ((ctxt->sax != NULL) &&
11595
1.54M
          (ctxt->sax->endElement != NULL) &&
11596
1.54M
          (!ctxt->disableSAX))
11597
1.54M
          ctxt->sax->endElement(ctxt->userData, name);
11598
1.54M
#endif /* LIBXML_SAX1_ENABLED */
11599
1.54M
        }
11600
3.24M
        if (ctxt->instate == XML_PARSER_EOF)
11601
0
      goto done;
11602
3.24M
        spacePop(ctxt);
11603
3.24M
        if (ctxt->nameNr == 0) {
11604
10.6k
      ctxt->instate = XML_PARSER_EPILOG;
11605
3.23M
        } else {
11606
3.23M
      ctxt->instate = XML_PARSER_CONTENT;
11607
3.23M
        }
11608
3.24M
                    ctxt->progressive = 1;
11609
3.24M
        break;
11610
3.24M
    }
11611
3.72M
    if (RAW == '>') {
11612
2.85M
        NEXT;
11613
2.85M
    } else {
11614
874k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11615
874k
           "Couldn't find end of Start Tag %s\n",
11616
874k
           name);
11617
874k
        nodePop(ctxt);
11618
874k
        spacePop(ctxt);
11619
874k
    }
11620
3.72M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11621
11622
3.72M
    ctxt->instate = XML_PARSER_CONTENT;
11623
3.72M
                ctxt->progressive = 1;
11624
3.72M
                break;
11625
6.96M
      }
11626
30.7M
            case XML_PARSER_CONTENT: {
11627
30.7M
    int id;
11628
30.7M
    unsigned long cons;
11629
30.7M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
106k
        goto done;
11631
30.6M
    cur = ctxt->input->cur[0];
11632
30.6M
    next = ctxt->input->cur[1];
11633
11634
30.6M
    id = ctxt->input->id;
11635
30.6M
          cons = CUR_CONSUMED;
11636
30.6M
    if ((cur == '<') && (next == '/')) {
11637
1.97M
        ctxt->instate = XML_PARSER_END_TAG;
11638
1.97M
        break;
11639
28.6M
          } else if ((cur == '<') && (next == '?')) {
11640
209k
        if ((!terminate) &&
11641
209k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11642
144k
                        ctxt->progressive = XML_PARSER_PI;
11643
144k
      goto done;
11644
144k
                    }
11645
65.0k
        xmlParsePI(ctxt);
11646
65.0k
        ctxt->instate = XML_PARSER_CONTENT;
11647
65.0k
                    ctxt->progressive = 1;
11648
28.4M
    } else if ((cur == '<') && (next != '!')) {
11649
6.56M
        ctxt->instate = XML_PARSER_START_TAG;
11650
6.56M
        break;
11651
21.8M
    } else if ((cur == '<') && (next == '!') &&
11652
21.8M
               (ctxt->input->cur[2] == '-') &&
11653
21.8M
         (ctxt->input->cur[3] == '-')) {
11654
388k
        int term;
11655
11656
388k
              if (avail < 4)
11657
0
            goto done;
11658
388k
        ctxt->input->cur += 4;
11659
388k
        term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11660
388k
        ctxt->input->cur -= 4;
11661
388k
        if ((!terminate) && (term < 0)) {
11662
230k
                        ctxt->progressive = XML_PARSER_COMMENT;
11663
230k
      goto done;
11664
230k
                    }
11665
158k
        xmlParseComment(ctxt);
11666
158k
        ctxt->instate = XML_PARSER_CONTENT;
11667
158k
                    ctxt->progressive = 1;
11668
21.4M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11669
21.4M
        (ctxt->input->cur[2] == '[') &&
11670
21.4M
        (ctxt->input->cur[3] == 'C') &&
11671
21.4M
        (ctxt->input->cur[4] == 'D') &&
11672
21.4M
        (ctxt->input->cur[5] == 'A') &&
11673
21.4M
        (ctxt->input->cur[6] == 'T') &&
11674
21.4M
        (ctxt->input->cur[7] == 'A') &&
11675
21.4M
        (ctxt->input->cur[8] == '[')) {
11676
40.9k
        SKIP(9);
11677
40.9k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11678
40.9k
        break;
11679
21.4M
    } else if ((cur == '<') && (next == '!') &&
11680
21.4M
               (avail < 9)) {
11681
20.8k
        goto done;
11682
21.4M
    } else if (cur == '&') {
11683
9.75M
        if ((!terminate) &&
11684
9.75M
            (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11685
377k
      goto done;
11686
9.37M
        xmlParseReference(ctxt);
11687
11.6M
    } else {
11688
        /* TODO Avoid the extra copy, handle directly !!! */
11689
        /*
11690
         * Goal of the following test is:
11691
         *  - minimize calls to the SAX 'character' callback
11692
         *    when they are mergeable
11693
         *  - handle an problem for isBlank when we only parse
11694
         *    a sequence of blank chars and the next one is
11695
         *    not available to check against '<' presence.
11696
         *  - tries to homogenize the differences in SAX
11697
         *    callbacks between the push and pull versions
11698
         *    of the parser.
11699
         */
11700
11.6M
        if ((ctxt->inputNr == 1) &&
11701
11.6M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11702
8.03M
      if (!terminate) {
11703
7.93M
          if (ctxt->progressive) {
11704
7.93M
        if ((lastlt == NULL) ||
11705
7.93M
            (ctxt->input->cur > lastlt))
11706
764k
            goto done;
11707
7.93M
          } else if (xmlParseLookupSequence(ctxt,
11708
0
                                            '<', 0, 0) < 0) {
11709
0
        goto done;
11710
0
          }
11711
7.93M
      }
11712
8.03M
                    }
11713
10.8M
        ctxt->checkIndex = 0;
11714
10.8M
        xmlParseCharData(ctxt, 0);
11715
10.8M
    }
11716
20.4M
    if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11717
144k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11718
144k
                    "detected an error in element content\n");
11719
144k
        xmlHaltParser(ctxt);
11720
144k
        break;
11721
144k
    }
11722
20.3M
    break;
11723
20.4M
      }
11724
20.3M
            case XML_PARSER_END_TAG:
11725
2.05M
    if (avail < 2)
11726
0
        goto done;
11727
2.05M
    if (!terminate) {
11728
1.92M
        if (ctxt->progressive) {
11729
            /* > can be found unescaped in attribute values */
11730
1.92M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11731
84.1k
          goto done;
11732
1.92M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11733
0
      goto done;
11734
0
        }
11735
1.92M
    }
11736
1.96M
    if (ctxt->sax2) {
11737
1.09M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11738
1.09M
        nameNsPop(ctxt);
11739
1.09M
    }
11740
867k
#ifdef LIBXML_SAX1_ENABLED
11741
867k
      else
11742
867k
        xmlParseEndTag1(ctxt, 0);
11743
1.96M
#endif /* LIBXML_SAX1_ENABLED */
11744
1.96M
    if (ctxt->instate == XML_PARSER_EOF) {
11745
        /* Nothing */
11746
1.96M
    } else if (ctxt->nameNr == 0) {
11747
68.7k
        ctxt->instate = XML_PARSER_EPILOG;
11748
1.89M
    } else {
11749
1.89M
        ctxt->instate = XML_PARSER_CONTENT;
11750
1.89M
    }
11751
1.96M
    break;
11752
1.35M
            case XML_PARSER_CDATA_SECTION: {
11753
          /*
11754
     * The Push mode need to have the SAX callback for
11755
     * cdataBlock merge back contiguous callbacks.
11756
     */
11757
1.35M
    int base;
11758
11759
1.35M
    base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11760
1.35M
    if (base < 0) {
11761
1.01M
        if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11762
967k
            int tmp;
11763
11764
967k
      tmp = xmlCheckCdataPush(ctxt->input->cur,
11765
967k
                              XML_PARSER_BIG_BUFFER_SIZE, 0);
11766
967k
      if (tmp < 0) {
11767
5.89k
          tmp = -tmp;
11768
5.89k
          ctxt->input->cur += tmp;
11769
5.89k
          goto encoding_error;
11770
5.89k
      }
11771
961k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11772
961k
          if (ctxt->sax->cdataBlock != NULL)
11773
584k
        ctxt->sax->cdataBlock(ctxt->userData,
11774
584k
                              ctxt->input->cur, tmp);
11775
377k
          else if (ctxt->sax->characters != NULL)
11776
377k
        ctxt->sax->characters(ctxt->userData,
11777
377k
                              ctxt->input->cur, tmp);
11778
961k
      }
11779
961k
      if (ctxt->instate == XML_PARSER_EOF)
11780
0
          goto done;
11781
961k
      SKIPL(tmp);
11782
961k
      ctxt->checkIndex = 0;
11783
961k
        }
11784
1.01M
        goto done;
11785
1.01M
    } else {
11786
334k
        int tmp;
11787
11788
334k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11789
334k
        if ((tmp < 0) || (tmp != base)) {
11790
310k
      tmp = -tmp;
11791
310k
      ctxt->input->cur += tmp;
11792
310k
      goto encoding_error;
11793
310k
        }
11794
23.6k
        if ((ctxt->sax != NULL) && (base == 0) &&
11795
23.6k
            (ctxt->sax->cdataBlock != NULL) &&
11796
23.6k
            (!ctxt->disableSAX)) {
11797
      /*
11798
       * Special case to provide identical behaviour
11799
       * between pull and push parsers on enpty CDATA
11800
       * sections
11801
       */
11802
842
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11803
842
           (!strncmp((const char *)&ctxt->input->cur[-9],
11804
842
                     "<![CDATA[", 9)))
11805
842
           ctxt->sax->cdataBlock(ctxt->userData,
11806
842
                                 BAD_CAST "", 0);
11807
22.8k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11808
22.8k
      (!ctxt->disableSAX)) {
11809
22.3k
      if (ctxt->sax->cdataBlock != NULL)
11810
16.6k
          ctxt->sax->cdataBlock(ctxt->userData,
11811
16.6k
              ctxt->input->cur, base);
11812
5.66k
      else if (ctxt->sax->characters != NULL)
11813
5.66k
          ctxt->sax->characters(ctxt->userData,
11814
5.66k
              ctxt->input->cur, base);
11815
22.3k
        }
11816
23.6k
        if (ctxt->instate == XML_PARSER_EOF)
11817
0
      goto done;
11818
23.6k
        SKIPL(base + 3);
11819
23.6k
        ctxt->checkIndex = 0;
11820
23.6k
        ctxt->instate = XML_PARSER_CONTENT;
11821
#ifdef DEBUG_PUSH
11822
        xmlGenericError(xmlGenericErrorContext,
11823
          "PP: entering CONTENT\n");
11824
#endif
11825
23.6k
    }
11826
23.6k
    break;
11827
1.35M
      }
11828
774k
            case XML_PARSER_MISC:
11829
774k
    SKIP_BLANKS;
11830
774k
    if (ctxt->input->buf == NULL)
11831
0
        avail = ctxt->input->length -
11832
0
                (ctxt->input->cur - ctxt->input->base);
11833
774k
    else
11834
774k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11835
774k
                (ctxt->input->cur - ctxt->input->base);
11836
774k
    if (avail < 2)
11837
9.76k
        goto done;
11838
764k
    cur = ctxt->input->cur[0];
11839
764k
    next = ctxt->input->cur[1];
11840
764k
          if ((cur == '<') && (next == '?')) {
11841
46.6k
        if ((!terminate) &&
11842
46.6k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11843
7.43k
                        ctxt->progressive = XML_PARSER_PI;
11844
7.43k
      goto done;
11845
7.43k
                    }
11846
#ifdef DEBUG_PUSH
11847
        xmlGenericError(xmlGenericErrorContext,
11848
          "PP: Parsing PI\n");
11849
#endif
11850
39.2k
        xmlParsePI(ctxt);
11851
39.2k
        if (ctxt->instate == XML_PARSER_EOF)
11852
0
      goto done;
11853
39.2k
        ctxt->instate = XML_PARSER_MISC;
11854
39.2k
                    ctxt->progressive = 1;
11855
39.2k
        ctxt->checkIndex = 0;
11856
718k
    } else if ((cur == '<') && (next == '!') &&
11857
718k
        (ctxt->input->cur[2] == '-') &&
11858
718k
        (ctxt->input->cur[3] == '-')) {
11859
43.4k
        if ((!terminate) &&
11860
43.4k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11861
25.6k
                        ctxt->progressive = XML_PARSER_COMMENT;
11862
25.6k
      goto done;
11863
25.6k
                    }
11864
#ifdef DEBUG_PUSH
11865
        xmlGenericError(xmlGenericErrorContext,
11866
          "PP: Parsing Comment\n");
11867
#endif
11868
17.7k
        xmlParseComment(ctxt);
11869
17.7k
        if (ctxt->instate == XML_PARSER_EOF)
11870
0
      goto done;
11871
17.7k
        ctxt->instate = XML_PARSER_MISC;
11872
17.7k
                    ctxt->progressive = 1;
11873
17.7k
        ctxt->checkIndex = 0;
11874
674k
    } else if ((cur == '<') && (next == '!') &&
11875
674k
        (ctxt->input->cur[2] == 'D') &&
11876
674k
        (ctxt->input->cur[3] == 'O') &&
11877
674k
        (ctxt->input->cur[4] == 'C') &&
11878
674k
        (ctxt->input->cur[5] == 'T') &&
11879
674k
        (ctxt->input->cur[6] == 'Y') &&
11880
674k
        (ctxt->input->cur[7] == 'P') &&
11881
674k
        (ctxt->input->cur[8] == 'E')) {
11882
307k
        if ((!terminate) &&
11883
307k
            (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11884
37.9k
                        ctxt->progressive = XML_PARSER_DTD;
11885
37.9k
      goto done;
11886
37.9k
                    }
11887
#ifdef DEBUG_PUSH
11888
        xmlGenericError(xmlGenericErrorContext,
11889
          "PP: Parsing internal subset\n");
11890
#endif
11891
269k
        ctxt->inSubset = 1;
11892
269k
                    ctxt->progressive = 0;
11893
269k
        ctxt->checkIndex = 0;
11894
269k
        xmlParseDocTypeDecl(ctxt);
11895
269k
        if (ctxt->instate == XML_PARSER_EOF)
11896
0
      goto done;
11897
269k
        if (RAW == '[') {
11898
194k
      ctxt->instate = XML_PARSER_DTD;
11899
#ifdef DEBUG_PUSH
11900
      xmlGenericError(xmlGenericErrorContext,
11901
        "PP: entering DTD\n");
11902
#endif
11903
194k
        } else {
11904
      /*
11905
       * Create and update the external subset.
11906
       */
11907
74.1k
      ctxt->inSubset = 2;
11908
74.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11909
74.1k
          (ctxt->sax->externalSubset != NULL))
11910
68.7k
          ctxt->sax->externalSubset(ctxt->userData,
11911
68.7k
            ctxt->intSubName, ctxt->extSubSystem,
11912
68.7k
            ctxt->extSubURI);
11913
74.1k
      ctxt->inSubset = 0;
11914
74.1k
      xmlCleanSpecialAttr(ctxt);
11915
74.1k
      ctxt->instate = XML_PARSER_PROLOG;
11916
#ifdef DEBUG_PUSH
11917
      xmlGenericError(xmlGenericErrorContext,
11918
        "PP: entering PROLOG\n");
11919
#endif
11920
74.1k
        }
11921
367k
    } else if ((cur == '<') && (next == '!') &&
11922
367k
               (avail < 9)) {
11923
4.43k
        goto done;
11924
363k
    } else {
11925
363k
        ctxt->instate = XML_PARSER_START_TAG;
11926
363k
        ctxt->progressive = XML_PARSER_START_TAG;
11927
363k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11928
#ifdef DEBUG_PUSH
11929
        xmlGenericError(xmlGenericErrorContext,
11930
          "PP: entering START_TAG\n");
11931
#endif
11932
363k
    }
11933
689k
    break;
11934
689k
            case XML_PARSER_PROLOG:
11935
247k
    SKIP_BLANKS;
11936
247k
    if (ctxt->input->buf == NULL)
11937
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11938
247k
    else
11939
247k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11940
247k
                            (ctxt->input->cur - ctxt->input->base);
11941
247k
    if (avail < 2)
11942
5.35k
        goto done;
11943
242k
    cur = ctxt->input->cur[0];
11944
242k
    next = ctxt->input->cur[1];
11945
242k
          if ((cur == '<') && (next == '?')) {
11946
38.5k
        if ((!terminate) &&
11947
38.5k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11948
16.5k
                        ctxt->progressive = XML_PARSER_PI;
11949
16.5k
      goto done;
11950
16.5k
                    }
11951
#ifdef DEBUG_PUSH
11952
        xmlGenericError(xmlGenericErrorContext,
11953
          "PP: Parsing PI\n");
11954
#endif
11955
22.0k
        xmlParsePI(ctxt);
11956
22.0k
        if (ctxt->instate == XML_PARSER_EOF)
11957
0
      goto done;
11958
22.0k
        ctxt->instate = XML_PARSER_PROLOG;
11959
22.0k
                    ctxt->progressive = 1;
11960
203k
    } else if ((cur == '<') && (next == '!') &&
11961
203k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11962
41.6k
        if ((!terminate) &&
11963
41.6k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11964
13.8k
                        ctxt->progressive = XML_PARSER_COMMENT;
11965
13.8k
      goto done;
11966
13.8k
                    }
11967
#ifdef DEBUG_PUSH
11968
        xmlGenericError(xmlGenericErrorContext,
11969
          "PP: Parsing Comment\n");
11970
#endif
11971
27.8k
        xmlParseComment(ctxt);
11972
27.8k
        if (ctxt->instate == XML_PARSER_EOF)
11973
0
      goto done;
11974
27.8k
        ctxt->instate = XML_PARSER_PROLOG;
11975
27.8k
                    ctxt->progressive = 1;
11976
161k
    } else if ((cur == '<') && (next == '!') &&
11977
161k
               (avail < 4)) {
11978
345
        goto done;
11979
161k
    } else {
11980
161k
        ctxt->instate = XML_PARSER_START_TAG;
11981
161k
        if (ctxt->progressive == 0)
11982
135k
      ctxt->progressive = XML_PARSER_START_TAG;
11983
161k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11984
#ifdef DEBUG_PUSH
11985
        xmlGenericError(xmlGenericErrorContext,
11986
          "PP: entering START_TAG\n");
11987
#endif
11988
161k
    }
11989
211k
    break;
11990
211k
            case XML_PARSER_EPILOG:
11991
89.0k
    SKIP_BLANKS;
11992
89.0k
    if (ctxt->input->buf == NULL)
11993
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11994
89.0k
    else
11995
89.0k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11996
89.0k
                            (ctxt->input->cur - ctxt->input->base);
11997
89.0k
    if (avail < 2)
11998
62.7k
        goto done;
11999
26.3k
    cur = ctxt->input->cur[0];
12000
26.3k
    next = ctxt->input->cur[1];
12001
26.3k
          if ((cur == '<') && (next == '?')) {
12002
7.67k
        if ((!terminate) &&
12003
7.67k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12004
5.74k
                        ctxt->progressive = XML_PARSER_PI;
12005
5.74k
      goto done;
12006
5.74k
                    }
12007
#ifdef DEBUG_PUSH
12008
        xmlGenericError(xmlGenericErrorContext,
12009
          "PP: Parsing PI\n");
12010
#endif
12011
1.93k
        xmlParsePI(ctxt);
12012
1.93k
        if (ctxt->instate == XML_PARSER_EOF)
12013
0
      goto done;
12014
1.93k
        ctxt->instate = XML_PARSER_EPILOG;
12015
1.93k
                    ctxt->progressive = 1;
12016
18.6k
    } else if ((cur == '<') && (next == '!') &&
12017
18.6k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12018
5.81k
        if ((!terminate) &&
12019
5.81k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12020
4.98k
                        ctxt->progressive = XML_PARSER_COMMENT;
12021
4.98k
      goto done;
12022
4.98k
                    }
12023
#ifdef DEBUG_PUSH
12024
        xmlGenericError(xmlGenericErrorContext,
12025
          "PP: Parsing Comment\n");
12026
#endif
12027
825
        xmlParseComment(ctxt);
12028
825
        if (ctxt->instate == XML_PARSER_EOF)
12029
0
      goto done;
12030
825
        ctxt->instate = XML_PARSER_EPILOG;
12031
825
                    ctxt->progressive = 1;
12032
12.8k
    } else if ((cur == '<') && (next == '!') &&
12033
12.8k
               (avail < 4)) {
12034
566
        goto done;
12035
12.2k
    } else {
12036
12.2k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12037
12.2k
        xmlHaltParser(ctxt);
12038
#ifdef DEBUG_PUSH
12039
        xmlGenericError(xmlGenericErrorContext,
12040
          "PP: entering EOF\n");
12041
#endif
12042
12.2k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12043
12.2k
      ctxt->sax->endDocument(ctxt->userData);
12044
12.2k
        goto done;
12045
12.2k
    }
12046
2.75k
    break;
12047
4.34M
            case XML_PARSER_DTD: {
12048
          /*
12049
     * Sorry but progressive parsing of the internal subset
12050
     * is not expected to be supported. We first check that
12051
     * the full content of the internal subset is available and
12052
     * the parsing is launched only at that point.
12053
     * Internal subset ends up with "']' S? '>'" in an unescaped
12054
     * section and not in a ']]>' sequence which are conditional
12055
     * sections (whoever argued to keep that crap in XML deserve
12056
     * a place in hell !).
12057
     */
12058
4.34M
    int base, i;
12059
4.34M
    xmlChar *buf;
12060
4.34M
          xmlChar quote = 0;
12061
4.34M
                size_t use;
12062
12063
4.34M
    base = ctxt->input->cur - ctxt->input->base;
12064
4.34M
    if (base < 0) return(0);
12065
4.34M
    if (ctxt->checkIndex > base)
12066
1.78M
        base = ctxt->checkIndex;
12067
4.34M
    buf = xmlBufContent(ctxt->input->buf->buffer);
12068
4.34M
                use = xmlBufUse(ctxt->input->buf->buffer);
12069
314G
    for (;(unsigned int) base < use; base++) {
12070
314G
        if (quote != 0) {
12071
209G
            if (buf[base] == quote)
12072
12.0G
          quote = 0;
12073
209G
      continue;
12074
209G
        }
12075
105G
        if ((quote == 0) && (buf[base] == '<')) {
12076
3.19G
            int found  = 0;
12077
      /* special handling of comments */
12078
3.19G
            if (((unsigned int) base + 4 < use) &&
12079
3.19G
          (buf[base + 1] == '!') &&
12080
3.19G
          (buf[base + 2] == '-') &&
12081
3.19G
          (buf[base + 3] == '-')) {
12082
3.72G
          for (;(unsigned int) base + 3 < use; base++) {
12083
3.72G
        if ((buf[base] == '-') &&
12084
3.72G
            (buf[base + 1] == '-') &&
12085
3.72G
            (buf[base + 2] == '>')) {
12086
6.25M
            found = 1;
12087
6.25M
            base += 2;
12088
6.25M
            break;
12089
6.25M
        }
12090
3.72G
                }
12091
6.39M
          if (!found) {
12092
#if 0
12093
              fprintf(stderr, "unfinished comment\n");
12094
#endif
12095
138k
              break; /* for */
12096
138k
                }
12097
6.25M
                continue;
12098
6.39M
      }
12099
3.19G
        }
12100
105G
        if (buf[base] == '"') {
12101
12.0G
            quote = '"';
12102
12.0G
      continue;
12103
12.0G
        }
12104
93.2G
        if (buf[base] == '\'') {
12105
68.4M
            quote = '\'';
12106
68.4M
      continue;
12107
68.4M
        }
12108
93.2G
        if (buf[base] == ']') {
12109
#if 0
12110
            fprintf(stderr, "%c%c%c%c: ", buf[base],
12111
              buf[base + 1], buf[base + 2], buf[base + 3]);
12112
#endif
12113
7.66M
            if ((unsigned int) base +1 >= use)
12114
724
          break;
12115
7.66M
      if (buf[base + 1] == ']') {
12116
          /* conditional crap, skip both ']' ! */
12117
4.41M
          base++;
12118
4.41M
          continue;
12119
4.41M
      }
12120
5.48M
            for (i = 1; (unsigned int) base + i < use; i++) {
12121
5.48M
          if (buf[base + i] == '>') {
12122
#if 0
12123
              fprintf(stderr, "found\n");
12124
#endif
12125
148k
              goto found_end_int_subset;
12126
148k
          }
12127
5.33M
          if (!IS_BLANK_CH(buf[base + i])) {
12128
#if 0
12129
              fprintf(stderr, "not found\n");
12130
#endif
12131
3.10M
              goto not_end_of_int_subset;
12132
3.10M
          }
12133
5.33M
      }
12134
#if 0
12135
      fprintf(stderr, "end of stream\n");
12136
#endif
12137
374
            break;
12138
12139
3.25M
        }
12140
93.1G
not_end_of_int_subset:
12141
93.1G
                    continue; /* for */
12142
93.2G
    }
12143
    /*
12144
     * We didn't found the end of the Internal subset
12145
     */
12146
4.19M
                if (quote == 0)
12147
1.81M
                    ctxt->checkIndex = base;
12148
2.38M
                else
12149
2.38M
                    ctxt->checkIndex = 0;
12150
#ifdef DEBUG_PUSH
12151
    if (next == 0)
12152
        xmlGenericError(xmlGenericErrorContext,
12153
          "PP: lookup of int subset end filed\n");
12154
#endif
12155
4.19M
          goto done;
12156
12157
148k
found_end_int_subset:
12158
148k
                ctxt->checkIndex = 0;
12159
148k
    xmlParseInternalSubset(ctxt);
12160
148k
    if (ctxt->instate == XML_PARSER_EOF)
12161
5.02k
        goto done;
12162
143k
    ctxt->inSubset = 2;
12163
143k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12164
143k
        (ctxt->sax->externalSubset != NULL))
12165
119k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12166
119k
          ctxt->extSubSystem, ctxt->extSubURI);
12167
143k
    ctxt->inSubset = 0;
12168
143k
    xmlCleanSpecialAttr(ctxt);
12169
143k
    if (ctxt->instate == XML_PARSER_EOF)
12170
3.42k
        goto done;
12171
140k
    ctxt->instate = XML_PARSER_PROLOG;
12172
140k
    ctxt->checkIndex = 0;
12173
#ifdef DEBUG_PUSH
12174
    xmlGenericError(xmlGenericErrorContext,
12175
      "PP: entering PROLOG\n");
12176
#endif
12177
140k
                break;
12178
143k
      }
12179
0
            case XML_PARSER_COMMENT:
12180
0
    xmlGenericError(xmlGenericErrorContext,
12181
0
      "PP: internal error, state == COMMENT\n");
12182
0
    ctxt->instate = XML_PARSER_CONTENT;
12183
#ifdef DEBUG_PUSH
12184
    xmlGenericError(xmlGenericErrorContext,
12185
      "PP: entering CONTENT\n");
12186
#endif
12187
0
    break;
12188
0
            case XML_PARSER_IGNORE:
12189
0
    xmlGenericError(xmlGenericErrorContext,
12190
0
      "PP: internal error, state == IGNORE");
12191
0
          ctxt->instate = XML_PARSER_DTD;
12192
#ifdef DEBUG_PUSH
12193
    xmlGenericError(xmlGenericErrorContext,
12194
      "PP: entering DTD\n");
12195
#endif
12196
0
          break;
12197
0
            case XML_PARSER_PI:
12198
0
    xmlGenericError(xmlGenericErrorContext,
12199
0
      "PP: internal error, state == PI\n");
12200
0
    ctxt->instate = XML_PARSER_CONTENT;
12201
#ifdef DEBUG_PUSH
12202
    xmlGenericError(xmlGenericErrorContext,
12203
      "PP: entering CONTENT\n");
12204
#endif
12205
0
    break;
12206
0
            case XML_PARSER_ENTITY_DECL:
12207
0
    xmlGenericError(xmlGenericErrorContext,
12208
0
      "PP: internal error, state == ENTITY_DECL\n");
12209
0
    ctxt->instate = XML_PARSER_DTD;
12210
#ifdef DEBUG_PUSH
12211
    xmlGenericError(xmlGenericErrorContext,
12212
      "PP: entering DTD\n");
12213
#endif
12214
0
    break;
12215
0
            case XML_PARSER_ENTITY_VALUE:
12216
0
    xmlGenericError(xmlGenericErrorContext,
12217
0
      "PP: internal error, state == ENTITY_VALUE\n");
12218
0
    ctxt->instate = XML_PARSER_CONTENT;
12219
#ifdef DEBUG_PUSH
12220
    xmlGenericError(xmlGenericErrorContext,
12221
      "PP: entering DTD\n");
12222
#endif
12223
0
    break;
12224
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12225
0
    xmlGenericError(xmlGenericErrorContext,
12226
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12227
0
    ctxt->instate = XML_PARSER_START_TAG;
12228
#ifdef DEBUG_PUSH
12229
    xmlGenericError(xmlGenericErrorContext,
12230
      "PP: entering START_TAG\n");
12231
#endif
12232
0
    break;
12233
0
            case XML_PARSER_SYSTEM_LITERAL:
12234
0
    xmlGenericError(xmlGenericErrorContext,
12235
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12236
0
    ctxt->instate = XML_PARSER_START_TAG;
12237
#ifdef DEBUG_PUSH
12238
    xmlGenericError(xmlGenericErrorContext,
12239
      "PP: entering START_TAG\n");
12240
#endif
12241
0
    break;
12242
0
            case XML_PARSER_PUBLIC_LITERAL:
12243
0
    xmlGenericError(xmlGenericErrorContext,
12244
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12245
0
    ctxt->instate = XML_PARSER_START_TAG;
12246
#ifdef DEBUG_PUSH
12247
    xmlGenericError(xmlGenericErrorContext,
12248
      "PP: entering START_TAG\n");
12249
#endif
12250
0
    break;
12251
49.1M
  }
12252
49.1M
    }
12253
8.60M
done:
12254
#ifdef DEBUG_PUSH
12255
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12256
#endif
12257
8.60M
    return(ret);
12258
316k
encoding_error:
12259
316k
    {
12260
316k
        char buffer[150];
12261
12262
316k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12263
316k
      ctxt->input->cur[0], ctxt->input->cur[1],
12264
316k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12265
316k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12266
316k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12267
316k
         BAD_CAST buffer, NULL);
12268
316k
    }
12269
316k
    return(0);
12270
9.58M
}
12271
12272
/**
12273
 * xmlParseCheckTransition:
12274
 * @ctxt:  an XML parser context
12275
 * @chunk:  a char array
12276
 * @size:  the size in byte of the chunk
12277
 *
12278
 * Check depending on the current parser state if the chunk given must be
12279
 * processed immediately or one need more data to advance on parsing.
12280
 *
12281
 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12282
 */
12283
static int
12284
11.0M
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12285
11.0M
    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12286
0
        return(-1);
12287
11.0M
    if (ctxt->instate == XML_PARSER_START_TAG) {
12288
1.58M
        if (memchr(chunk, '>', size) != NULL)
12289
879k
            return(1);
12290
707k
        return(0);
12291
1.58M
    }
12292
9.41M
    if (ctxt->progressive == XML_PARSER_COMMENT) {
12293
362k
        if (memchr(chunk, '>', size) != NULL)
12294
264k
            return(1);
12295
97.7k
        return(0);
12296
362k
    }
12297
9.05M
    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12298
1.66M
        if (memchr(chunk, '>', size) != NULL)
12299
1.30M
            return(1);
12300
358k
        return(0);
12301
1.66M
    }
12302
7.39M
    if (ctxt->progressive == XML_PARSER_PI) {
12303
214k
        if (memchr(chunk, '>', size) != NULL)
12304
168k
            return(1);
12305
46.3k
        return(0);
12306
214k
    }
12307
7.17M
    if (ctxt->instate == XML_PARSER_END_TAG) {
12308
86.8k
        if (memchr(chunk, '>', size) != NULL)
12309
73.7k
            return(1);
12310
13.0k
        return(0);
12311
86.8k
    }
12312
7.08M
    if ((ctxt->progressive == XML_PARSER_DTD) ||
12313
7.08M
        (ctxt->instate == XML_PARSER_DTD)) {
12314
5.00M
        if (memchr(chunk, '>', size) != NULL)
12315
4.12M
            return(1);
12316
882k
        return(0);
12317
5.00M
    }
12318
2.08M
    return(1);
12319
7.08M
}
12320
12321
/**
12322
 * xmlParseChunk:
12323
 * @ctxt:  an XML parser context
12324
 * @chunk:  an char array
12325
 * @size:  the size in byte of the chunk
12326
 * @terminate:  last chunk indicator
12327
 *
12328
 * Parse a Chunk of memory
12329
 *
12330
 * Returns zero if no error, the xmlParserErrors otherwise.
12331
 */
12332
int
12333
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12334
20.0M
              int terminate) {
12335
20.0M
    int end_in_lf = 0;
12336
20.0M
    int remain = 0;
12337
20.0M
    size_t old_avail = 0;
12338
20.0M
    size_t avail = 0;
12339
12340
20.0M
    if (ctxt == NULL)
12341
0
        return(XML_ERR_INTERNAL_ERROR);
12342
20.0M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12343
8.36M
        return(ctxt->errNo);
12344
11.6M
    if (ctxt->instate == XML_PARSER_EOF)
12345
1.39k
        return(-1);
12346
11.6M
    if (ctxt->instate == XML_PARSER_START)
12347
1.14M
        xmlDetectSAX2(ctxt);
12348
11.6M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12349
11.6M
        (chunk[size - 1] == '\r')) {
12350
175k
  end_in_lf = 1;
12351
175k
  size--;
12352
175k
    }
12353
12354
11.7M
xmldecl_done:
12355
12356
11.7M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12357
11.7M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12358
11.4M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12359
11.4M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12360
11.4M
  int res;
12361
12362
11.4M
        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12363
        /*
12364
         * Specific handling if we autodetected an encoding, we should not
12365
         * push more than the first line ... which depend on the encoding
12366
         * And only push the rest once the final encoding was detected
12367
         */
12368
11.4M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12369
11.4M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12370
69.6k
            unsigned int len = 45;
12371
12372
69.6k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12373
69.6k
                               BAD_CAST "UTF-16")) ||
12374
69.6k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12375
26.1k
                               BAD_CAST "UTF16")))
12376
43.5k
                len = 90;
12377
26.1k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12378
26.1k
                                    BAD_CAST "UCS-4")) ||
12379
26.1k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12380
25.7k
                                    BAD_CAST "UCS4")))
12381
355
                len = 180;
12382
12383
69.6k
            if (ctxt->input->buf->rawconsumed < len)
12384
9.07k
                len -= ctxt->input->buf->rawconsumed;
12385
12386
            /*
12387
             * Change size for reading the initial declaration only
12388
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12389
             * will blindly copy extra bytes from memory.
12390
             */
12391
69.6k
            if ((unsigned int) size > len) {
12392
47.5k
                remain = size - len;
12393
47.5k
                size = len;
12394
47.5k
            } else {
12395
22.1k
                remain = 0;
12396
22.1k
            }
12397
69.6k
        }
12398
11.4M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12399
11.4M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12400
11.4M
  if (res < 0) {
12401
3.63k
      ctxt->errNo = XML_PARSER_EOF;
12402
3.63k
      xmlHaltParser(ctxt);
12403
3.63k
      return (XML_PARSER_EOF);
12404
3.63k
  }
12405
#ifdef DEBUG_PUSH
12406
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12407
#endif
12408
12409
11.4M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12410
284k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12411
284k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12412
284k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12413
284k
        (in->raw != NULL)) {
12414
22.3k
    int nbchars;
12415
22.3k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12416
22.3k
    size_t current = ctxt->input->cur - ctxt->input->base;
12417
12418
22.3k
    nbchars = xmlCharEncInput(in, terminate);
12419
22.3k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12420
22.3k
    if (nbchars < 0) {
12421
        /* TODO 2.6.0 */
12422
3.39k
        xmlGenericError(xmlGenericErrorContext,
12423
3.39k
            "xmlParseChunk: encoder error\n");
12424
3.39k
                    xmlHaltParser(ctxt);
12425
3.39k
        return(XML_ERR_INVALID_ENCODING);
12426
3.39k
    }
12427
22.3k
      }
12428
284k
  }
12429
284k
    }
12430
11.6M
    if (remain != 0) {
12431
46.4k
        xmlParseTryOrFinish(ctxt, 0);
12432
11.6M
    } else {
12433
11.6M
        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12434
11.6M
            avail = xmlBufUse(ctxt->input->buf->buffer);
12435
        /*
12436
         * Depending on the current state it may not be such
12437
         * a good idea to try parsing if there is nothing in the chunk
12438
         * which would be worth doing a parser state transition and we
12439
         * need to wait for more data
12440
         */
12441
11.6M
        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12442
11.6M
            (old_avail == 0) || (avail == 0) ||
12443
11.6M
            (xmlParseCheckTransition(ctxt,
12444
11.0M
                       (const char *)&ctxt->input->base[old_avail],
12445
11.0M
                                     avail - old_avail)))
12446
9.54M
            xmlParseTryOrFinish(ctxt, terminate);
12447
11.6M
    }
12448
11.6M
    if (ctxt->instate == XML_PARSER_EOF)
12449
265k
        return(ctxt->errNo);
12450
12451
11.4M
    if ((ctxt->input != NULL) &&
12452
11.4M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12453
11.4M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12454
11.4M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12455
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12456
0
        xmlHaltParser(ctxt);
12457
0
    }
12458
11.4M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12459
199k
        return(ctxt->errNo);
12460
12461
11.2M
    if (remain != 0) {
12462
44.7k
        chunk += size;
12463
44.7k
        size = remain;
12464
44.7k
        remain = 0;
12465
44.7k
        goto xmldecl_done;
12466
44.7k
    }
12467
11.1M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12468
11.1M
        (ctxt->input->buf != NULL)) {
12469
174k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12470
174k
           ctxt->input);
12471
174k
  size_t current = ctxt->input->cur - ctxt->input->base;
12472
12473
174k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12474
12475
174k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12476
174k
            base, current);
12477
174k
    }
12478
11.1M
    if (terminate) {
12479
  /*
12480
   * Check for termination
12481
   */
12482
149k
  int cur_avail = 0;
12483
12484
149k
  if (ctxt->input != NULL) {
12485
149k
      if (ctxt->input->buf == NULL)
12486
0
    cur_avail = ctxt->input->length -
12487
0
          (ctxt->input->cur - ctxt->input->base);
12488
149k
      else
12489
149k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12490
149k
                    (ctxt->input->cur - ctxt->input->base);
12491
149k
  }
12492
12493
149k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12494
149k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12495
96.9k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12496
96.9k
  }
12497
149k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12498
974
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12499
974
  }
12500
149k
  if (ctxt->instate != XML_PARSER_EOF) {
12501
149k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12502
149k
    ctxt->sax->endDocument(ctxt->userData);
12503
149k
  }
12504
149k
  ctxt->instate = XML_PARSER_EOF;
12505
149k
    }
12506
11.1M
    if (ctxt->wellFormed == 0)
12507
4.74M
  return((xmlParserErrors) ctxt->errNo);
12508
6.43M
    else
12509
6.43M
        return(0);
12510
11.1M
}
12511
12512
/************************************************************************
12513
 *                  *
12514
 *    I/O front end functions to the parser     *
12515
 *                  *
12516
 ************************************************************************/
12517
12518
/**
12519
 * xmlCreatePushParserCtxt:
12520
 * @sax:  a SAX handler
12521
 * @user_data:  The user data returned on SAX callbacks
12522
 * @chunk:  a pointer to an array of chars
12523
 * @size:  number of chars in the array
12524
 * @filename:  an optional file name or URI
12525
 *
12526
 * Create a parser context for using the XML parser in push mode.
12527
 * If @buffer and @size are non-NULL, the data is used to detect
12528
 * the encoding.  The remaining characters will be parsed so they
12529
 * don't need to be fed in again through xmlParseChunk.
12530
 * To allow content encoding detection, @size should be >= 4
12531
 * The value of @filename is used for fetching external entities
12532
 * and error/warning reports.
12533
 *
12534
 * Returns the new parser context or NULL
12535
 */
12536
12537
xmlParserCtxtPtr
12538
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12539
706k
                        const char *chunk, int size, const char *filename) {
12540
706k
    xmlParserCtxtPtr ctxt;
12541
706k
    xmlParserInputPtr inputStream;
12542
706k
    xmlParserInputBufferPtr buf;
12543
706k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12544
12545
    /*
12546
     * plug some encoding conversion routines
12547
     */
12548
706k
    if ((chunk != NULL) && (size >= 4))
12549
348k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12550
12551
706k
    buf = xmlAllocParserInputBuffer(enc);
12552
706k
    if (buf == NULL) return(NULL);
12553
12554
706k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12555
706k
    if (ctxt == NULL) {
12556
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12557
0
  xmlFreeParserInputBuffer(buf);
12558
0
  return(NULL);
12559
0
    }
12560
706k
    ctxt->dictNames = 1;
12561
706k
    if (filename == NULL) {
12562
353k
  ctxt->directory = NULL;
12563
353k
    } else {
12564
353k
        ctxt->directory = xmlParserGetDirectory(filename);
12565
353k
    }
12566
12567
706k
    inputStream = xmlNewInputStream(ctxt);
12568
706k
    if (inputStream == NULL) {
12569
0
  xmlFreeParserCtxt(ctxt);
12570
0
  xmlFreeParserInputBuffer(buf);
12571
0
  return(NULL);
12572
0
    }
12573
12574
706k
    if (filename == NULL)
12575
353k
  inputStream->filename = NULL;
12576
353k
    else {
12577
353k
  inputStream->filename = (char *)
12578
353k
      xmlCanonicPath((const xmlChar *) filename);
12579
353k
  if (inputStream->filename == NULL) {
12580
0
      xmlFreeParserCtxt(ctxt);
12581
0
      xmlFreeParserInputBuffer(buf);
12582
0
      return(NULL);
12583
0
  }
12584
353k
    }
12585
706k
    inputStream->buf = buf;
12586
706k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12587
706k
    inputPush(ctxt, inputStream);
12588
12589
    /*
12590
     * If the caller didn't provide an initial 'chunk' for determining
12591
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12592
     * that it can be automatically determined later
12593
     */
12594
706k
    if ((size == 0) || (chunk == NULL)) {
12595
358k
  ctxt->charset = XML_CHAR_ENCODING_NONE;
12596
358k
    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12597
348k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12598
348k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12599
12600
348k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12601
12602
348k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12603
#ifdef DEBUG_PUSH
12604
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12605
#endif
12606
348k
    }
12607
12608
706k
    if (enc != XML_CHAR_ENCODING_NONE) {
12609
128k
        xmlSwitchEncoding(ctxt, enc);
12610
128k
    }
12611
12612
706k
    return(ctxt);
12613
706k
}
12614
#endif /* LIBXML_PUSH_ENABLED */
12615
12616
/**
12617
 * xmlHaltParser:
12618
 * @ctxt:  an XML parser context
12619
 *
12620
 * Blocks further parser processing don't override error
12621
 * for internal use
12622
 */
12623
static void
12624
1.33M
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12625
1.33M
    if (ctxt == NULL)
12626
0
        return;
12627
1.33M
    ctxt->instate = XML_PARSER_EOF;
12628
1.33M
    ctxt->disableSAX = 1;
12629
1.33M
    while (ctxt->inputNr > 1)
12630
3.08k
        xmlFreeInputStream(inputPop(ctxt));
12631
1.33M
    if (ctxt->input != NULL) {
12632
        /*
12633
   * in case there was a specific allocation deallocate before
12634
   * overriding base
12635
   */
12636
1.33M
        if (ctxt->input->free != NULL) {
12637
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12638
0
      ctxt->input->free = NULL;
12639
0
  }
12640
1.33M
        if (ctxt->input->buf != NULL) {
12641
1.20M
            xmlFreeParserInputBuffer(ctxt->input->buf);
12642
1.20M
            ctxt->input->buf = NULL;
12643
1.20M
        }
12644
1.33M
  ctxt->input->cur = BAD_CAST"";
12645
1.33M
        ctxt->input->length = 0;
12646
1.33M
  ctxt->input->base = ctxt->input->cur;
12647
1.33M
        ctxt->input->end = ctxt->input->cur;
12648
1.33M
    }
12649
1.33M
}
12650
12651
/**
12652
 * xmlStopParser:
12653
 * @ctxt:  an XML parser context
12654
 *
12655
 * Blocks further parser processing
12656
 */
12657
void
12658
353k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12659
353k
    if (ctxt == NULL)
12660
0
        return;
12661
353k
    xmlHaltParser(ctxt);
12662
353k
    ctxt->errNo = XML_ERR_USER_STOP;
12663
353k
}
12664
12665
/**
12666
 * xmlCreateIOParserCtxt:
12667
 * @sax:  a SAX handler
12668
 * @user_data:  The user data returned on SAX callbacks
12669
 * @ioread:  an I/O read function
12670
 * @ioclose:  an I/O close function
12671
 * @ioctx:  an I/O handler
12672
 * @enc:  the charset encoding if known
12673
 *
12674
 * Create a parser context for using the XML parser with an existing
12675
 * I/O stream
12676
 *
12677
 * Returns the new parser context or NULL
12678
 */
12679
xmlParserCtxtPtr
12680
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12681
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12682
0
  void *ioctx, xmlCharEncoding enc) {
12683
0
    xmlParserCtxtPtr ctxt;
12684
0
    xmlParserInputPtr inputStream;
12685
0
    xmlParserInputBufferPtr buf;
12686
12687
0
    if (ioread == NULL) return(NULL);
12688
12689
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12690
0
    if (buf == NULL) {
12691
0
        if (ioclose != NULL)
12692
0
            ioclose(ioctx);
12693
0
        return (NULL);
12694
0
    }
12695
12696
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12697
0
    if (ctxt == NULL) {
12698
0
  xmlFreeParserInputBuffer(buf);
12699
0
  return(NULL);
12700
0
    }
12701
12702
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12703
0
    if (inputStream == NULL) {
12704
0
  xmlFreeParserCtxt(ctxt);
12705
0
  return(NULL);
12706
0
    }
12707
0
    inputPush(ctxt, inputStream);
12708
12709
0
    return(ctxt);
12710
0
}
12711
12712
#ifdef LIBXML_VALID_ENABLED
12713
/************************************************************************
12714
 *                  *
12715
 *    Front ends when parsing a DTD       *
12716
 *                  *
12717
 ************************************************************************/
12718
12719
/**
12720
 * xmlIOParseDTD:
12721
 * @sax:  the SAX handler block or NULL
12722
 * @input:  an Input Buffer
12723
 * @enc:  the charset encoding if known
12724
 *
12725
 * Load and parse a DTD
12726
 *
12727
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12728
 * @input will be freed by the function in any case.
12729
 */
12730
12731
xmlDtdPtr
12732
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12733
0
        xmlCharEncoding enc) {
12734
0
    xmlDtdPtr ret = NULL;
12735
0
    xmlParserCtxtPtr ctxt;
12736
0
    xmlParserInputPtr pinput = NULL;
12737
0
    xmlChar start[4];
12738
12739
0
    if (input == NULL)
12740
0
  return(NULL);
12741
12742
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12743
0
    if (ctxt == NULL) {
12744
0
        xmlFreeParserInputBuffer(input);
12745
0
  return(NULL);
12746
0
    }
12747
12748
    /* We are loading a DTD */
12749
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12750
12751
0
    xmlDetectSAX2(ctxt);
12752
12753
    /*
12754
     * generate a parser input from the I/O handler
12755
     */
12756
12757
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12758
0
    if (pinput == NULL) {
12759
0
        xmlFreeParserInputBuffer(input);
12760
0
  xmlFreeParserCtxt(ctxt);
12761
0
  return(NULL);
12762
0
    }
12763
12764
    /*
12765
     * plug some encoding conversion routines here.
12766
     */
12767
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12768
0
  xmlFreeParserCtxt(ctxt);
12769
0
  return(NULL);
12770
0
    }
12771
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12772
0
        xmlSwitchEncoding(ctxt, enc);
12773
0
    }
12774
12775
0
    pinput->filename = NULL;
12776
0
    pinput->line = 1;
12777
0
    pinput->col = 1;
12778
0
    pinput->base = ctxt->input->cur;
12779
0
    pinput->cur = ctxt->input->cur;
12780
0
    pinput->free = NULL;
12781
12782
    /*
12783
     * let's parse that entity knowing it's an external subset.
12784
     */
12785
0
    ctxt->inSubset = 2;
12786
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12787
0
    if (ctxt->myDoc == NULL) {
12788
0
  xmlErrMemory(ctxt, "New Doc failed");
12789
0
  return(NULL);
12790
0
    }
12791
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12792
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793
0
                                 BAD_CAST "none", BAD_CAST "none");
12794
12795
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12796
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12797
  /*
12798
   * Get the 4 first bytes and decode the charset
12799
   * if enc != XML_CHAR_ENCODING_NONE
12800
   * plug some encoding conversion routines.
12801
   */
12802
0
  start[0] = RAW;
12803
0
  start[1] = NXT(1);
12804
0
  start[2] = NXT(2);
12805
0
  start[3] = NXT(3);
12806
0
  enc = xmlDetectCharEncoding(start, 4);
12807
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12808
0
      xmlSwitchEncoding(ctxt, enc);
12809
0
  }
12810
0
    }
12811
12812
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12813
12814
0
    if (ctxt->myDoc != NULL) {
12815
0
  if (ctxt->wellFormed) {
12816
0
      ret = ctxt->myDoc->extSubset;
12817
0
      ctxt->myDoc->extSubset = NULL;
12818
0
      if (ret != NULL) {
12819
0
    xmlNodePtr tmp;
12820
12821
0
    ret->doc = NULL;
12822
0
    tmp = ret->children;
12823
0
    while (tmp != NULL) {
12824
0
        tmp->doc = NULL;
12825
0
        tmp = tmp->next;
12826
0
    }
12827
0
      }
12828
0
  } else {
12829
0
      ret = NULL;
12830
0
  }
12831
0
        xmlFreeDoc(ctxt->myDoc);
12832
0
        ctxt->myDoc = NULL;
12833
0
    }
12834
0
    xmlFreeParserCtxt(ctxt);
12835
12836
0
    return(ret);
12837
0
}
12838
12839
/**
12840
 * xmlSAXParseDTD:
12841
 * @sax:  the SAX handler block
12842
 * @ExternalID:  a NAME* containing the External ID of the DTD
12843
 * @SystemID:  a NAME* containing the URL to the DTD
12844
 *
12845
 * DEPRECATED: Don't use.
12846
 *
12847
 * Load and parse an external subset.
12848
 *
12849
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12850
 */
12851
12852
xmlDtdPtr
12853
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12854
0
                          const xmlChar *SystemID) {
12855
0
    xmlDtdPtr ret = NULL;
12856
0
    xmlParserCtxtPtr ctxt;
12857
0
    xmlParserInputPtr input = NULL;
12858
0
    xmlCharEncoding enc;
12859
0
    xmlChar* systemIdCanonic;
12860
12861
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12862
12863
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12864
0
    if (ctxt == NULL) {
12865
0
  return(NULL);
12866
0
    }
12867
12868
    /* We are loading a DTD */
12869
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12870
12871
    /*
12872
     * Canonicalise the system ID
12873
     */
12874
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12875
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12876
0
  xmlFreeParserCtxt(ctxt);
12877
0
  return(NULL);
12878
0
    }
12879
12880
    /*
12881
     * Ask the Entity resolver to load the damn thing
12882
     */
12883
12884
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12885
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12886
0
                                   systemIdCanonic);
12887
0
    if (input == NULL) {
12888
0
  xmlFreeParserCtxt(ctxt);
12889
0
  if (systemIdCanonic != NULL)
12890
0
      xmlFree(systemIdCanonic);
12891
0
  return(NULL);
12892
0
    }
12893
12894
    /*
12895
     * plug some encoding conversion routines here.
12896
     */
12897
0
    if (xmlPushInput(ctxt, input) < 0) {
12898
0
  xmlFreeParserCtxt(ctxt);
12899
0
  if (systemIdCanonic != NULL)
12900
0
      xmlFree(systemIdCanonic);
12901
0
  return(NULL);
12902
0
    }
12903
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12904
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12905
0
  xmlSwitchEncoding(ctxt, enc);
12906
0
    }
12907
12908
0
    if (input->filename == NULL)
12909
0
  input->filename = (char *) systemIdCanonic;
12910
0
    else
12911
0
  xmlFree(systemIdCanonic);
12912
0
    input->line = 1;
12913
0
    input->col = 1;
12914
0
    input->base = ctxt->input->cur;
12915
0
    input->cur = ctxt->input->cur;
12916
0
    input->free = NULL;
12917
12918
    /*
12919
     * let's parse that entity knowing it's an external subset.
12920
     */
12921
0
    ctxt->inSubset = 2;
12922
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12923
0
    if (ctxt->myDoc == NULL) {
12924
0
  xmlErrMemory(ctxt, "New Doc failed");
12925
0
  xmlFreeParserCtxt(ctxt);
12926
0
  return(NULL);
12927
0
    }
12928
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12929
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12930
0
                                 ExternalID, SystemID);
12931
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12932
12933
0
    if (ctxt->myDoc != NULL) {
12934
0
  if (ctxt->wellFormed) {
12935
0
      ret = ctxt->myDoc->extSubset;
12936
0
      ctxt->myDoc->extSubset = NULL;
12937
0
      if (ret != NULL) {
12938
0
    xmlNodePtr tmp;
12939
12940
0
    ret->doc = NULL;
12941
0
    tmp = ret->children;
12942
0
    while (tmp != NULL) {
12943
0
        tmp->doc = NULL;
12944
0
        tmp = tmp->next;
12945
0
    }
12946
0
      }
12947
0
  } else {
12948
0
      ret = NULL;
12949
0
  }
12950
0
        xmlFreeDoc(ctxt->myDoc);
12951
0
        ctxt->myDoc = NULL;
12952
0
    }
12953
0
    xmlFreeParserCtxt(ctxt);
12954
12955
0
    return(ret);
12956
0
}
12957
12958
12959
/**
12960
 * xmlParseDTD:
12961
 * @ExternalID:  a NAME* containing the External ID of the DTD
12962
 * @SystemID:  a NAME* containing the URL to the DTD
12963
 *
12964
 * Load and parse an external subset.
12965
 *
12966
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12967
 */
12968
12969
xmlDtdPtr
12970
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12971
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12972
0
}
12973
#endif /* LIBXML_VALID_ENABLED */
12974
12975
/************************************************************************
12976
 *                  *
12977
 *    Front ends when parsing an Entity     *
12978
 *                  *
12979
 ************************************************************************/
12980
12981
/**
12982
 * xmlParseCtxtExternalEntity:
12983
 * @ctx:  the existing parsing context
12984
 * @URL:  the URL for the entity to load
12985
 * @ID:  the System ID for the entity to load
12986
 * @lst:  the return value for the set of parsed nodes
12987
 *
12988
 * Parse an external general entity within an existing parsing context
12989
 * An external general parsed entity is well-formed if it matches the
12990
 * production labeled extParsedEnt.
12991
 *
12992
 * [78] extParsedEnt ::= TextDecl? content
12993
 *
12994
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12995
 *    the parser error code otherwise
12996
 */
12997
12998
int
12999
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13000
0
                 const xmlChar *ID, xmlNodePtr *lst) {
13001
0
    void *userData;
13002
13003
0
    if (ctx == NULL) return(-1);
13004
    /*
13005
     * If the user provided their own SAX callbacks, then reuse the
13006
     * userData callback field, otherwise the expected setup in a
13007
     * DOM builder is to have userData == ctxt
13008
     */
13009
0
    if (ctx->userData == ctx)
13010
0
        userData = NULL;
13011
0
    else
13012
0
        userData = ctx->userData;
13013
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
13014
0
                                         userData, ctx->depth + 1,
13015
0
                                         URL, ID, lst);
13016
0
}
13017
13018
/**
13019
 * xmlParseExternalEntityPrivate:
13020
 * @doc:  the document the chunk pertains to
13021
 * @oldctxt:  the previous parser context if available
13022
 * @sax:  the SAX handler block (possibly NULL)
13023
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13024
 * @depth:  Used for loop detection, use 0
13025
 * @URL:  the URL for the entity to load
13026
 * @ID:  the System ID for the entity to load
13027
 * @list:  the return value for the set of parsed nodes
13028
 *
13029
 * Private version of xmlParseExternalEntity()
13030
 *
13031
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13032
 *    the parser error code otherwise
13033
 */
13034
13035
static xmlParserErrors
13036
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13037
                xmlSAXHandlerPtr sax,
13038
          void *user_data, int depth, const xmlChar *URL,
13039
4.96M
          const xmlChar *ID, xmlNodePtr *list) {
13040
4.96M
    xmlParserCtxtPtr ctxt;
13041
4.96M
    xmlDocPtr newDoc;
13042
4.96M
    xmlNodePtr newRoot;
13043
4.96M
    xmlParserErrors ret = XML_ERR_OK;
13044
4.96M
    xmlChar start[4];
13045
4.96M
    xmlCharEncoding enc;
13046
13047
4.96M
    if (((depth > 40) &&
13048
4.96M
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13049
4.96M
  (depth > 1024)) {
13050
3.42k
  return(XML_ERR_ENTITY_LOOP);
13051
3.42k
    }
13052
13053
4.96M
    if (list != NULL)
13054
4.95M
        *list = NULL;
13055
4.96M
    if ((URL == NULL) && (ID == NULL))
13056
272
  return(XML_ERR_INTERNAL_ERROR);
13057
4.96M
    if (doc == NULL)
13058
0
  return(XML_ERR_INTERNAL_ERROR);
13059
13060
4.96M
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
13061
4.96M
                                             oldctxt);
13062
4.96M
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13063
4.93M
    xmlDetectSAX2(ctxt);
13064
13065
4.93M
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13066
4.93M
    if (newDoc == NULL) {
13067
0
  xmlFreeParserCtxt(ctxt);
13068
0
  return(XML_ERR_INTERNAL_ERROR);
13069
0
    }
13070
4.93M
    newDoc->properties = XML_DOC_INTERNAL;
13071
4.93M
    if (doc) {
13072
4.93M
        newDoc->intSubset = doc->intSubset;
13073
4.93M
        newDoc->extSubset = doc->extSubset;
13074
4.93M
        if (doc->dict) {
13075
2.21M
            newDoc->dict = doc->dict;
13076
2.21M
            xmlDictReference(newDoc->dict);
13077
2.21M
        }
13078
4.93M
        if (doc->URL != NULL) {
13079
2.81M
            newDoc->URL = xmlStrdup(doc->URL);
13080
2.81M
        }
13081
4.93M
    }
13082
4.93M
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13083
4.93M
    if (newRoot == NULL) {
13084
0
  if (sax != NULL)
13085
0
  xmlFreeParserCtxt(ctxt);
13086
0
  newDoc->intSubset = NULL;
13087
0
  newDoc->extSubset = NULL;
13088
0
        xmlFreeDoc(newDoc);
13089
0
  return(XML_ERR_INTERNAL_ERROR);
13090
0
    }
13091
4.93M
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13092
4.93M
    nodePush(ctxt, newDoc->children);
13093
4.93M
    if (doc == NULL) {
13094
0
        ctxt->myDoc = newDoc;
13095
4.93M
    } else {
13096
4.93M
        ctxt->myDoc = doc;
13097
4.93M
        newRoot->doc = doc;
13098
4.93M
    }
13099
13100
    /*
13101
     * Get the 4 first bytes and decode the charset
13102
     * if enc != XML_CHAR_ENCODING_NONE
13103
     * plug some encoding conversion routines.
13104
     */
13105
4.93M
    GROW;
13106
4.93M
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13107
4.93M
  start[0] = RAW;
13108
4.93M
  start[1] = NXT(1);
13109
4.93M
  start[2] = NXT(2);
13110
4.93M
  start[3] = NXT(3);
13111
4.93M
  enc = xmlDetectCharEncoding(start, 4);
13112
4.93M
  if (enc != XML_CHAR_ENCODING_NONE) {
13113
9.96k
      xmlSwitchEncoding(ctxt, enc);
13114
9.96k
  }
13115
4.93M
    }
13116
13117
    /*
13118
     * Parse a possible text declaration first
13119
     */
13120
4.93M
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13121
9.01k
  xmlParseTextDecl(ctxt);
13122
        /*
13123
         * An XML-1.0 document can't reference an entity not XML-1.0
13124
         */
13125
9.01k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13126
9.01k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13127
154
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13128
154
                           "Version mismatch between document and entity\n");
13129
154
        }
13130
9.01k
    }
13131
13132
4.93M
    ctxt->instate = XML_PARSER_CONTENT;
13133
4.93M
    ctxt->depth = depth;
13134
4.93M
    if (oldctxt != NULL) {
13135
4.93M
  ctxt->_private = oldctxt->_private;
13136
4.93M
  ctxt->loadsubset = oldctxt->loadsubset;
13137
4.93M
  ctxt->validate = oldctxt->validate;
13138
4.93M
  ctxt->valid = oldctxt->valid;
13139
4.93M
  ctxt->replaceEntities = oldctxt->replaceEntities;
13140
4.93M
        if (oldctxt->validate) {
13141
4.65M
            ctxt->vctxt.error = oldctxt->vctxt.error;
13142
4.65M
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
13143
4.65M
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
13144
4.65M
        }
13145
4.93M
  ctxt->external = oldctxt->external;
13146
4.93M
        if (ctxt->dict) xmlDictFree(ctxt->dict);
13147
4.93M
        ctxt->dict = oldctxt->dict;
13148
4.93M
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13149
4.93M
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13150
4.93M
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13151
4.93M
        ctxt->dictNames = oldctxt->dictNames;
13152
4.93M
        ctxt->attsDefault = oldctxt->attsDefault;
13153
4.93M
        ctxt->attsSpecial = oldctxt->attsSpecial;
13154
4.93M
        ctxt->linenumbers = oldctxt->linenumbers;
13155
4.93M
  ctxt->record_info = oldctxt->record_info;
13156
4.93M
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13157
4.93M
  ctxt->node_seq.length = oldctxt->node_seq.length;
13158
4.93M
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13159
4.93M
    } else {
13160
  /*
13161
   * Doing validity checking on chunk without context
13162
   * doesn't make sense
13163
   */
13164
0
  ctxt->_private = NULL;
13165
0
  ctxt->validate = 0;
13166
0
  ctxt->external = 2;
13167
0
  ctxt->loadsubset = 0;
13168
0
    }
13169
13170
4.93M
    xmlParseContent(ctxt);
13171
13172
4.93M
    if ((RAW == '<') && (NXT(1) == '/')) {
13173
526k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13174
4.40M
    } else if (RAW != 0) {
13175
7.44k
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13176
7.44k
    }
13177
4.93M
    if (ctxt->node != newDoc->children) {
13178
3.75M
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13179
3.75M
    }
13180
13181
4.93M
    if (!ctxt->wellFormed) {
13182
4.91M
        if (ctxt->errNo == 0)
13183
0
      ret = XML_ERR_INTERNAL_ERROR;
13184
4.91M
  else
13185
4.91M
      ret = (xmlParserErrors)ctxt->errNo;
13186
4.91M
    } else {
13187
17.3k
  if (list != NULL) {
13188
4.97k
      xmlNodePtr cur;
13189
13190
      /*
13191
       * Return the newly created nodeset after unlinking it from
13192
       * they pseudo parent.
13193
       */
13194
4.97k
      cur = newDoc->children->children;
13195
4.97k
      *list = cur;
13196
9.40k
      while (cur != NULL) {
13197
4.42k
    cur->parent = NULL;
13198
4.42k
    cur = cur->next;
13199
4.42k
      }
13200
4.97k
            newDoc->children->children = NULL;
13201
4.97k
  }
13202
17.3k
  ret = XML_ERR_OK;
13203
17.3k
    }
13204
13205
    /*
13206
     * Record in the parent context the number of entities replacement
13207
     * done when parsing that reference.
13208
     */
13209
4.93M
    if (oldctxt != NULL)
13210
4.92M
        oldctxt->nbentities += ctxt->nbentities;
13211
13212
    /*
13213
     * Also record the size of the entity parsed
13214
     */
13215
4.93M
    if (ctxt->input != NULL && oldctxt != NULL) {
13216
4.92M
  oldctxt->sizeentities += ctxt->input->consumed;
13217
4.92M
  oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13218
4.92M
    }
13219
    /*
13220
     * And record the last error if any
13221
     */
13222
4.93M
    if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13223
4.91M
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13224
13225
4.93M
    if (oldctxt != NULL) {
13226
4.92M
        ctxt->dict = NULL;
13227
4.92M
        ctxt->attsDefault = NULL;
13228
4.92M
        ctxt->attsSpecial = NULL;
13229
4.92M
        oldctxt->validate = ctxt->validate;
13230
4.92M
        oldctxt->valid = ctxt->valid;
13231
4.92M
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13232
4.92M
        oldctxt->node_seq.length = ctxt->node_seq.length;
13233
4.92M
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13234
4.92M
    }
13235
4.93M
    ctxt->node_seq.maximum = 0;
13236
4.93M
    ctxt->node_seq.length = 0;
13237
4.93M
    ctxt->node_seq.buffer = NULL;
13238
4.93M
    xmlFreeParserCtxt(ctxt);
13239
4.93M
    newDoc->intSubset = NULL;
13240
4.93M
    newDoc->extSubset = NULL;
13241
4.93M
    xmlFreeDoc(newDoc);
13242
13243
4.93M
    return(ret);
13244
4.93M
}
13245
13246
#ifdef LIBXML_SAX1_ENABLED
13247
/**
13248
 * xmlParseExternalEntity:
13249
 * @doc:  the document the chunk pertains to
13250
 * @sax:  the SAX handler block (possibly NULL)
13251
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13252
 * @depth:  Used for loop detection, use 0
13253
 * @URL:  the URL for the entity to load
13254
 * @ID:  the System ID for the entity to load
13255
 * @lst:  the return value for the set of parsed nodes
13256
 *
13257
 * Parse an external general entity
13258
 * An external general parsed entity is well-formed if it matches the
13259
 * production labeled extParsedEnt.
13260
 *
13261
 * [78] extParsedEnt ::= TextDecl? content
13262
 *
13263
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13264
 *    the parser error code otherwise
13265
 */
13266
13267
int
13268
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13269
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13270
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13271
0
                           ID, lst));
13272
0
}
13273
13274
/**
13275
 * xmlParseBalancedChunkMemory:
13276
 * @doc:  the document the chunk pertains to (must not be NULL)
13277
 * @sax:  the SAX handler block (possibly NULL)
13278
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13279
 * @depth:  Used for loop detection, use 0
13280
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13281
 * @lst:  the return value for the set of parsed nodes
13282
 *
13283
 * Parse a well-balanced chunk of an XML document
13284
 * called by the parser
13285
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13286
 * the content production in the XML grammar:
13287
 *
13288
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13289
 *
13290
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13291
 *    the parser error code otherwise
13292
 */
13293
13294
int
13295
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13296
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13297
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13298
0
                                                depth, string, lst, 0 );
13299
0
}
13300
#endif /* LIBXML_SAX1_ENABLED */
13301
13302
/**
13303
 * xmlParseBalancedChunkMemoryInternal:
13304
 * @oldctxt:  the existing parsing context
13305
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13306
 * @user_data:  the user data field for the parser context
13307
 * @lst:  the return value for the set of parsed nodes
13308
 *
13309
 *
13310
 * Parse a well-balanced chunk of an XML document
13311
 * called by the parser
13312
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13313
 * the content production in the XML grammar:
13314
 *
13315
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13316
 *
13317
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13318
 * error code otherwise
13319
 *
13320
 * In case recover is set to 1, the nodelist will not be empty even if
13321
 * the parsed chunk is not well balanced.
13322
 */
13323
static xmlParserErrors
13324
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13325
298k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13326
298k
    xmlParserCtxtPtr ctxt;
13327
298k
    xmlDocPtr newDoc = NULL;
13328
298k
    xmlNodePtr newRoot;
13329
298k
    xmlSAXHandlerPtr oldsax = NULL;
13330
298k
    xmlNodePtr content = NULL;
13331
298k
    xmlNodePtr last = NULL;
13332
298k
    int size;
13333
298k
    xmlParserErrors ret = XML_ERR_OK;
13334
298k
#ifdef SAX2
13335
298k
    int i;
13336
298k
#endif
13337
13338
298k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13339
298k
        (oldctxt->depth >  1024)) {
13340
1.00k
  return(XML_ERR_ENTITY_LOOP);
13341
1.00k
    }
13342
13343
13344
297k
    if (lst != NULL)
13345
275k
        *lst = NULL;
13346
297k
    if (string == NULL)
13347
110
        return(XML_ERR_INTERNAL_ERROR);
13348
13349
297k
    size = xmlStrlen(string);
13350
13351
297k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13352
297k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13353
273k
    if (user_data != NULL)
13354
0
  ctxt->userData = user_data;
13355
273k
    else
13356
273k
  ctxt->userData = ctxt;
13357
273k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13358
273k
    ctxt->dict = oldctxt->dict;
13359
273k
    ctxt->input_id = oldctxt->input_id + 1;
13360
273k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13361
273k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13362
273k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13363
13364
273k
#ifdef SAX2
13365
    /* propagate namespaces down the entity */
13366
1.32M
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13367
1.05M
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13368
1.05M
    }
13369
273k
#endif
13370
13371
273k
    oldsax = ctxt->sax;
13372
273k
    ctxt->sax = oldctxt->sax;
13373
273k
    xmlDetectSAX2(ctxt);
13374
273k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13375
273k
    ctxt->options = oldctxt->options;
13376
13377
273k
    ctxt->_private = oldctxt->_private;
13378
273k
    if (oldctxt->myDoc == NULL) {
13379
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13380
0
  if (newDoc == NULL) {
13381
0
      ctxt->sax = oldsax;
13382
0
      ctxt->dict = NULL;
13383
0
      xmlFreeParserCtxt(ctxt);
13384
0
      return(XML_ERR_INTERNAL_ERROR);
13385
0
  }
13386
0
  newDoc->properties = XML_DOC_INTERNAL;
13387
0
  newDoc->dict = ctxt->dict;
13388
0
  xmlDictReference(newDoc->dict);
13389
0
  ctxt->myDoc = newDoc;
13390
273k
    } else {
13391
273k
  ctxt->myDoc = oldctxt->myDoc;
13392
273k
        content = ctxt->myDoc->children;
13393
273k
  last = ctxt->myDoc->last;
13394
273k
    }
13395
273k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13396
273k
    if (newRoot == NULL) {
13397
0
  ctxt->sax = oldsax;
13398
0
  ctxt->dict = NULL;
13399
0
  xmlFreeParserCtxt(ctxt);
13400
0
  if (newDoc != NULL) {
13401
0
      xmlFreeDoc(newDoc);
13402
0
  }
13403
0
  return(XML_ERR_INTERNAL_ERROR);
13404
0
    }
13405
273k
    ctxt->myDoc->children = NULL;
13406
273k
    ctxt->myDoc->last = NULL;
13407
273k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13408
273k
    nodePush(ctxt, ctxt->myDoc->children);
13409
273k
    ctxt->instate = XML_PARSER_CONTENT;
13410
273k
    ctxt->depth = oldctxt->depth + 1;
13411
13412
273k
    ctxt->validate = 0;
13413
273k
    ctxt->loadsubset = oldctxt->loadsubset;
13414
273k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13415
  /*
13416
   * ID/IDREF registration will be done in xmlValidateElement below
13417
   */
13418
77.4k
  ctxt->loadsubset |= XML_SKIP_IDS;
13419
77.4k
    }
13420
273k
    ctxt->dictNames = oldctxt->dictNames;
13421
273k
    ctxt->attsDefault = oldctxt->attsDefault;
13422
273k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13423
13424
273k
    xmlParseContent(ctxt);
13425
273k
    if ((RAW == '<') && (NXT(1) == '/')) {
13426
63.4k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13427
210k
    } else if (RAW != 0) {
13428
702
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13429
702
    }
13430
273k
    if (ctxt->node != ctxt->myDoc->children) {
13431
90.3k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13432
90.3k
    }
13433
13434
273k
    if (!ctxt->wellFormed) {
13435
236k
        if (ctxt->errNo == 0)
13436
0
      ret = XML_ERR_INTERNAL_ERROR;
13437
236k
  else
13438
236k
      ret = (xmlParserErrors)ctxt->errNo;
13439
236k
    } else {
13440
36.7k
      ret = XML_ERR_OK;
13441
36.7k
    }
13442
13443
273k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13444
36.6k
  xmlNodePtr cur;
13445
13446
  /*
13447
   * Return the newly created nodeset after unlinking it from
13448
   * they pseudo parent.
13449
   */
13450
36.6k
  cur = ctxt->myDoc->children->children;
13451
36.6k
  *lst = cur;
13452
85.2k
  while (cur != NULL) {
13453
48.5k
#ifdef LIBXML_VALID_ENABLED
13454
48.5k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13455
48.5k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13456
48.5k
    (cur->type == XML_ELEMENT_NODE)) {
13457
4.90k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13458
4.90k
      oldctxt->myDoc, cur);
13459
4.90k
      }
13460
48.5k
#endif /* LIBXML_VALID_ENABLED */
13461
48.5k
      cur->parent = NULL;
13462
48.5k
      cur = cur->next;
13463
48.5k
  }
13464
36.6k
  ctxt->myDoc->children->children = NULL;
13465
36.6k
    }
13466
273k
    if (ctxt->myDoc != NULL) {
13467
273k
  xmlFreeNode(ctxt->myDoc->children);
13468
273k
        ctxt->myDoc->children = content;
13469
273k
        ctxt->myDoc->last = last;
13470
273k
    }
13471
13472
    /*
13473
     * Record in the parent context the number of entities replacement
13474
     * done when parsing that reference.
13475
     */
13476
273k
    if (oldctxt != NULL)
13477
273k
        oldctxt->nbentities += ctxt->nbentities;
13478
13479
    /*
13480
     * Also record the last error if any
13481
     */
13482
273k
    if (ctxt->lastError.code != XML_ERR_OK)
13483
238k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13484
13485
273k
    ctxt->sax = oldsax;
13486
273k
    ctxt->dict = NULL;
13487
273k
    ctxt->attsDefault = NULL;
13488
273k
    ctxt->attsSpecial = NULL;
13489
273k
    xmlFreeParserCtxt(ctxt);
13490
273k
    if (newDoc != NULL) {
13491
0
  xmlFreeDoc(newDoc);
13492
0
    }
13493
13494
273k
    return(ret);
13495
273k
}
13496
13497
/**
13498
 * xmlParseInNodeContext:
13499
 * @node:  the context node
13500
 * @data:  the input string
13501
 * @datalen:  the input string length in bytes
13502
 * @options:  a combination of xmlParserOption
13503
 * @lst:  the return value for the set of parsed nodes
13504
 *
13505
 * Parse a well-balanced chunk of an XML document
13506
 * within the context (DTD, namespaces, etc ...) of the given node.
13507
 *
13508
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13509
 * the content production in the XML grammar:
13510
 *
13511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13512
 *
13513
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13514
 * error code otherwise
13515
 */
13516
xmlParserErrors
13517
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13518
0
                      int options, xmlNodePtr *lst) {
13519
0
#ifdef SAX2
13520
0
    xmlParserCtxtPtr ctxt;
13521
0
    xmlDocPtr doc = NULL;
13522
0
    xmlNodePtr fake, cur;
13523
0
    int nsnr = 0;
13524
13525
0
    xmlParserErrors ret = XML_ERR_OK;
13526
13527
    /*
13528
     * check all input parameters, grab the document
13529
     */
13530
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13531
0
        return(XML_ERR_INTERNAL_ERROR);
13532
0
    switch (node->type) {
13533
0
        case XML_ELEMENT_NODE:
13534
0
        case XML_ATTRIBUTE_NODE:
13535
0
        case XML_TEXT_NODE:
13536
0
        case XML_CDATA_SECTION_NODE:
13537
0
        case XML_ENTITY_REF_NODE:
13538
0
        case XML_PI_NODE:
13539
0
        case XML_COMMENT_NODE:
13540
0
        case XML_DOCUMENT_NODE:
13541
0
        case XML_HTML_DOCUMENT_NODE:
13542
0
      break;
13543
0
  default:
13544
0
      return(XML_ERR_INTERNAL_ERROR);
13545
13546
0
    }
13547
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13548
0
           (node->type != XML_DOCUMENT_NODE) &&
13549
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13550
0
  node = node->parent;
13551
0
    if (node == NULL)
13552
0
  return(XML_ERR_INTERNAL_ERROR);
13553
0
    if (node->type == XML_ELEMENT_NODE)
13554
0
  doc = node->doc;
13555
0
    else
13556
0
        doc = (xmlDocPtr) node;
13557
0
    if (doc == NULL)
13558
0
  return(XML_ERR_INTERNAL_ERROR);
13559
13560
    /*
13561
     * allocate a context and set-up everything not related to the
13562
     * node position in the tree
13563
     */
13564
0
    if (doc->type == XML_DOCUMENT_NODE)
13565
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13566
0
#ifdef LIBXML_HTML_ENABLED
13567
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13568
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13569
        /*
13570
         * When parsing in context, it makes no sense to add implied
13571
         * elements like html/body/etc...
13572
         */
13573
0
        options |= HTML_PARSE_NOIMPLIED;
13574
0
    }
13575
0
#endif
13576
0
    else
13577
0
        return(XML_ERR_INTERNAL_ERROR);
13578
13579
0
    if (ctxt == NULL)
13580
0
        return(XML_ERR_NO_MEMORY);
13581
13582
    /*
13583
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13584
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13585
     * we must wait until the last moment to free the original one.
13586
     */
13587
0
    if (doc->dict != NULL) {
13588
0
        if (ctxt->dict != NULL)
13589
0
      xmlDictFree(ctxt->dict);
13590
0
  ctxt->dict = doc->dict;
13591
0
    } else
13592
0
        options |= XML_PARSE_NODICT;
13593
13594
0
    if (doc->encoding != NULL) {
13595
0
        xmlCharEncodingHandlerPtr hdlr;
13596
13597
0
        if (ctxt->encoding != NULL)
13598
0
      xmlFree((xmlChar *) ctxt->encoding);
13599
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13600
13601
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13602
0
        if (hdlr != NULL) {
13603
0
            xmlSwitchToEncoding(ctxt, hdlr);
13604
0
  } else {
13605
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13606
0
        }
13607
0
    }
13608
13609
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13610
0
    xmlDetectSAX2(ctxt);
13611
0
    ctxt->myDoc = doc;
13612
    /* parsing in context, i.e. as within existing content */
13613
0
    ctxt->input_id = 2;
13614
0
    ctxt->instate = XML_PARSER_CONTENT;
13615
13616
0
    fake = xmlNewDocComment(node->doc, NULL);
13617
0
    if (fake == NULL) {
13618
0
        xmlFreeParserCtxt(ctxt);
13619
0
  return(XML_ERR_NO_MEMORY);
13620
0
    }
13621
0
    xmlAddChild(node, fake);
13622
13623
0
    if (node->type == XML_ELEMENT_NODE) {
13624
0
  nodePush(ctxt, node);
13625
  /*
13626
   * initialize the SAX2 namespaces stack
13627
   */
13628
0
  cur = node;
13629
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13630
0
      xmlNsPtr ns = cur->nsDef;
13631
0
      const xmlChar *iprefix, *ihref;
13632
13633
0
      while (ns != NULL) {
13634
0
    if (ctxt->dict) {
13635
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13636
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13637
0
    } else {
13638
0
        iprefix = ns->prefix;
13639
0
        ihref = ns->href;
13640
0
    }
13641
13642
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13643
0
        nsPush(ctxt, iprefix, ihref);
13644
0
        nsnr++;
13645
0
    }
13646
0
    ns = ns->next;
13647
0
      }
13648
0
      cur = cur->parent;
13649
0
  }
13650
0
    }
13651
13652
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13653
  /*
13654
   * ID/IDREF registration will be done in xmlValidateElement below
13655
   */
13656
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13657
0
    }
13658
13659
0
#ifdef LIBXML_HTML_ENABLED
13660
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13661
0
        __htmlParseContent(ctxt);
13662
0
    else
13663
0
#endif
13664
0
  xmlParseContent(ctxt);
13665
13666
0
    nsPop(ctxt, nsnr);
13667
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13668
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13669
0
    } else if (RAW != 0) {
13670
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13671
0
    }
13672
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13673
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13674
0
  ctxt->wellFormed = 0;
13675
0
    }
13676
13677
0
    if (!ctxt->wellFormed) {
13678
0
        if (ctxt->errNo == 0)
13679
0
      ret = XML_ERR_INTERNAL_ERROR;
13680
0
  else
13681
0
      ret = (xmlParserErrors)ctxt->errNo;
13682
0
    } else {
13683
0
        ret = XML_ERR_OK;
13684
0
    }
13685
13686
    /*
13687
     * Return the newly created nodeset after unlinking it from
13688
     * the pseudo sibling.
13689
     */
13690
13691
0
    cur = fake->next;
13692
0
    fake->next = NULL;
13693
0
    node->last = fake;
13694
13695
0
    if (cur != NULL) {
13696
0
  cur->prev = NULL;
13697
0
    }
13698
13699
0
    *lst = cur;
13700
13701
0
    while (cur != NULL) {
13702
0
  cur->parent = NULL;
13703
0
  cur = cur->next;
13704
0
    }
13705
13706
0
    xmlUnlinkNode(fake);
13707
0
    xmlFreeNode(fake);
13708
13709
13710
0
    if (ret != XML_ERR_OK) {
13711
0
        xmlFreeNodeList(*lst);
13712
0
  *lst = NULL;
13713
0
    }
13714
13715
0
    if (doc->dict != NULL)
13716
0
        ctxt->dict = NULL;
13717
0
    xmlFreeParserCtxt(ctxt);
13718
13719
0
    return(ret);
13720
#else /* !SAX2 */
13721
    return(XML_ERR_INTERNAL_ERROR);
13722
#endif
13723
0
}
13724
13725
#ifdef LIBXML_SAX1_ENABLED
13726
/**
13727
 * xmlParseBalancedChunkMemoryRecover:
13728
 * @doc:  the document the chunk pertains to (must not be NULL)
13729
 * @sax:  the SAX handler block (possibly NULL)
13730
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13731
 * @depth:  Used for loop detection, use 0
13732
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13733
 * @lst:  the return value for the set of parsed nodes
13734
 * @recover: return nodes even if the data is broken (use 0)
13735
 *
13736
 *
13737
 * Parse a well-balanced chunk of an XML document
13738
 * called by the parser
13739
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13740
 * the content production in the XML grammar:
13741
 *
13742
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13743
 *
13744
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13745
 *    the parser error code otherwise
13746
 *
13747
 * In case recover is set to 1, the nodelist will not be empty even if
13748
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13749
 * some extent.
13750
 */
13751
int
13752
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13753
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13754
0
     int recover) {
13755
0
    xmlParserCtxtPtr ctxt;
13756
0
    xmlDocPtr newDoc;
13757
0
    xmlSAXHandlerPtr oldsax = NULL;
13758
0
    xmlNodePtr content, newRoot;
13759
0
    int size;
13760
0
    int ret = 0;
13761
13762
0
    if (depth > 40) {
13763
0
  return(XML_ERR_ENTITY_LOOP);
13764
0
    }
13765
13766
13767
0
    if (lst != NULL)
13768
0
        *lst = NULL;
13769
0
    if (string == NULL)
13770
0
        return(-1);
13771
13772
0
    size = xmlStrlen(string);
13773
13774
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13775
0
    if (ctxt == NULL) return(-1);
13776
0
    ctxt->userData = ctxt;
13777
0
    if (sax != NULL) {
13778
0
  oldsax = ctxt->sax;
13779
0
        ctxt->sax = sax;
13780
0
  if (user_data != NULL)
13781
0
      ctxt->userData = user_data;
13782
0
    }
13783
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13784
0
    if (newDoc == NULL) {
13785
0
  xmlFreeParserCtxt(ctxt);
13786
0
  return(-1);
13787
0
    }
13788
0
    newDoc->properties = XML_DOC_INTERNAL;
13789
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13790
0
        xmlDictFree(ctxt->dict);
13791
0
  ctxt->dict = doc->dict;
13792
0
  xmlDictReference(ctxt->dict);
13793
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13794
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13795
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13796
0
  ctxt->dictNames = 1;
13797
0
    } else {
13798
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13799
0
    }
13800
    /* doc == NULL is only supported for historic reasons */
13801
0
    if (doc != NULL) {
13802
0
  newDoc->intSubset = doc->intSubset;
13803
0
  newDoc->extSubset = doc->extSubset;
13804
0
    }
13805
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13806
0
    if (newRoot == NULL) {
13807
0
  if (sax != NULL)
13808
0
      ctxt->sax = oldsax;
13809
0
  xmlFreeParserCtxt(ctxt);
13810
0
  newDoc->intSubset = NULL;
13811
0
  newDoc->extSubset = NULL;
13812
0
        xmlFreeDoc(newDoc);
13813
0
  return(-1);
13814
0
    }
13815
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13816
0
    nodePush(ctxt, newRoot);
13817
    /* doc == NULL is only supported for historic reasons */
13818
0
    if (doc == NULL) {
13819
0
  ctxt->myDoc = newDoc;
13820
0
    } else {
13821
0
  ctxt->myDoc = newDoc;
13822
0
  newDoc->children->doc = doc;
13823
  /* Ensure that doc has XML spec namespace */
13824
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13825
0
  newDoc->oldNs = doc->oldNs;
13826
0
    }
13827
0
    ctxt->instate = XML_PARSER_CONTENT;
13828
0
    ctxt->input_id = 2;
13829
0
    ctxt->depth = depth;
13830
13831
    /*
13832
     * Doing validity checking on chunk doesn't make sense
13833
     */
13834
0
    ctxt->validate = 0;
13835
0
    ctxt->loadsubset = 0;
13836
0
    xmlDetectSAX2(ctxt);
13837
13838
0
    if ( doc != NULL ){
13839
0
        content = doc->children;
13840
0
        doc->children = NULL;
13841
0
        xmlParseContent(ctxt);
13842
0
        doc->children = content;
13843
0
    }
13844
0
    else {
13845
0
        xmlParseContent(ctxt);
13846
0
    }
13847
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13848
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13849
0
    } else if (RAW != 0) {
13850
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13851
0
    }
13852
0
    if (ctxt->node != newDoc->children) {
13853
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13854
0
    }
13855
13856
0
    if (!ctxt->wellFormed) {
13857
0
        if (ctxt->errNo == 0)
13858
0
      ret = 1;
13859
0
  else
13860
0
      ret = ctxt->errNo;
13861
0
    } else {
13862
0
      ret = 0;
13863
0
    }
13864
13865
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13866
0
  xmlNodePtr cur;
13867
13868
  /*
13869
   * Return the newly created nodeset after unlinking it from
13870
   * they pseudo parent.
13871
   */
13872
0
  cur = newDoc->children->children;
13873
0
  *lst = cur;
13874
0
  while (cur != NULL) {
13875
0
      xmlSetTreeDoc(cur, doc);
13876
0
      cur->parent = NULL;
13877
0
      cur = cur->next;
13878
0
  }
13879
0
  newDoc->children->children = NULL;
13880
0
    }
13881
13882
0
    if (sax != NULL)
13883
0
  ctxt->sax = oldsax;
13884
0
    xmlFreeParserCtxt(ctxt);
13885
0
    newDoc->intSubset = NULL;
13886
0
    newDoc->extSubset = NULL;
13887
    /* This leaks the namespace list if doc == NULL */
13888
0
    newDoc->oldNs = NULL;
13889
0
    xmlFreeDoc(newDoc);
13890
13891
0
    return(ret);
13892
0
}
13893
13894
/**
13895
 * xmlSAXParseEntity:
13896
 * @sax:  the SAX handler block
13897
 * @filename:  the filename
13898
 *
13899
 * DEPRECATED: Don't use.
13900
 *
13901
 * parse an XML external entity out of context and build a tree.
13902
 * It use the given SAX function block to handle the parsing callback.
13903
 * If sax is NULL, fallback to the default DOM tree building routines.
13904
 *
13905
 * [78] extParsedEnt ::= TextDecl? content
13906
 *
13907
 * This correspond to a "Well Balanced" chunk
13908
 *
13909
 * Returns the resulting document tree
13910
 */
13911
13912
xmlDocPtr
13913
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13914
0
    xmlDocPtr ret;
13915
0
    xmlParserCtxtPtr ctxt;
13916
13917
0
    ctxt = xmlCreateFileParserCtxt(filename);
13918
0
    if (ctxt == NULL) {
13919
0
  return(NULL);
13920
0
    }
13921
0
    if (sax != NULL) {
13922
0
  if (ctxt->sax != NULL)
13923
0
      xmlFree(ctxt->sax);
13924
0
        ctxt->sax = sax;
13925
0
        ctxt->userData = NULL;
13926
0
    }
13927
13928
0
    xmlParseExtParsedEnt(ctxt);
13929
13930
0
    if (ctxt->wellFormed)
13931
0
  ret = ctxt->myDoc;
13932
0
    else {
13933
0
        ret = NULL;
13934
0
        xmlFreeDoc(ctxt->myDoc);
13935
0
        ctxt->myDoc = NULL;
13936
0
    }
13937
0
    if (sax != NULL)
13938
0
        ctxt->sax = NULL;
13939
0
    xmlFreeParserCtxt(ctxt);
13940
13941
0
    return(ret);
13942
0
}
13943
13944
/**
13945
 * xmlParseEntity:
13946
 * @filename:  the filename
13947
 *
13948
 * parse an XML external entity out of context and build a tree.
13949
 *
13950
 * [78] extParsedEnt ::= TextDecl? content
13951
 *
13952
 * This correspond to a "Well Balanced" chunk
13953
 *
13954
 * Returns the resulting document tree
13955
 */
13956
13957
xmlDocPtr
13958
0
xmlParseEntity(const char *filename) {
13959
0
    return(xmlSAXParseEntity(NULL, filename));
13960
0
}
13961
#endif /* LIBXML_SAX1_ENABLED */
13962
13963
/**
13964
 * xmlCreateEntityParserCtxtInternal:
13965
 * @URL:  the entity URL
13966
 * @ID:  the entity PUBLIC ID
13967
 * @base:  a possible base for the target URI
13968
 * @pctx:  parser context used to set options on new context
13969
 *
13970
 * Create a parser context for an external entity
13971
 * Automatic support for ZLIB/Compress compressed document is provided
13972
 * by default if found at compile-time.
13973
 *
13974
 * Returns the new parser context or NULL
13975
 */
13976
static xmlParserCtxtPtr
13977
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13978
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13979
4.96M
        xmlParserCtxtPtr pctx) {
13980
4.96M
    xmlParserCtxtPtr ctxt;
13981
4.96M
    xmlParserInputPtr inputStream;
13982
4.96M
    char *directory = NULL;
13983
4.96M
    xmlChar *uri;
13984
13985
4.96M
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13986
4.96M
    if (ctxt == NULL) {
13987
0
  return(NULL);
13988
0
    }
13989
13990
4.96M
    if (pctx != NULL) {
13991
4.96M
        ctxt->options = pctx->options;
13992
4.96M
        ctxt->_private = pctx->_private;
13993
  /*
13994
   * this is a subparser of pctx, so the input_id should be
13995
   * incremented to distinguish from main entity
13996
   */
13997
4.96M
  ctxt->input_id = pctx->input_id + 1;
13998
4.96M
    }
13999
14000
    /* Don't read from stdin. */
14001
4.96M
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
14002
0
        URL = BAD_CAST "./-";
14003
14004
4.96M
    uri = xmlBuildURI(URL, base);
14005
14006
4.96M
    if (uri == NULL) {
14007
2.88k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14008
2.88k
  if (inputStream == NULL) {
14009
2.87k
      xmlFreeParserCtxt(ctxt);
14010
2.87k
      return(NULL);
14011
2.87k
  }
14012
14013
14
  inputPush(ctxt, inputStream);
14014
14015
14
  if ((ctxt->directory == NULL) && (directory == NULL))
14016
14
      directory = xmlParserGetDirectory((char *)URL);
14017
14
  if ((ctxt->directory == NULL) && (directory != NULL))
14018
14
      ctxt->directory = directory;
14019
4.96M
    } else {
14020
4.96M
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14021
4.96M
  if (inputStream == NULL) {
14022
25.9k
      xmlFree(uri);
14023
25.9k
      xmlFreeParserCtxt(ctxt);
14024
25.9k
      return(NULL);
14025
25.9k
  }
14026
14027
4.93M
  inputPush(ctxt, inputStream);
14028
14029
4.93M
  if ((ctxt->directory == NULL) && (directory == NULL))
14030
4.93M
      directory = xmlParserGetDirectory((char *)uri);
14031
4.93M
  if ((ctxt->directory == NULL) && (directory != NULL))
14032
4.93M
      ctxt->directory = directory;
14033
4.93M
  xmlFree(uri);
14034
4.93M
    }
14035
4.93M
    return(ctxt);
14036
4.96M
}
14037
14038
/**
14039
 * xmlCreateEntityParserCtxt:
14040
 * @URL:  the entity URL
14041
 * @ID:  the entity PUBLIC ID
14042
 * @base:  a possible base for the target URI
14043
 *
14044
 * Create a parser context for an external entity
14045
 * Automatic support for ZLIB/Compress compressed document is provided
14046
 * by default if found at compile-time.
14047
 *
14048
 * Returns the new parser context or NULL
14049
 */
14050
xmlParserCtxtPtr
14051
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14052
0
                    const xmlChar *base) {
14053
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
14054
14055
0
}
14056
14057
/************************************************************************
14058
 *                  *
14059
 *    Front ends when parsing from a file     *
14060
 *                  *
14061
 ************************************************************************/
14062
14063
/**
14064
 * xmlCreateURLParserCtxt:
14065
 * @filename:  the filename or URL
14066
 * @options:  a combination of xmlParserOption
14067
 *
14068
 * Create a parser context for a file or URL content.
14069
 * Automatic support for ZLIB/Compress compressed document is provided
14070
 * by default if found at compile-time and for file accesses
14071
 *
14072
 * Returns the new parser context or NULL
14073
 */
14074
xmlParserCtxtPtr
14075
xmlCreateURLParserCtxt(const char *filename, int options)
14076
0
{
14077
0
    xmlParserCtxtPtr ctxt;
14078
0
    xmlParserInputPtr inputStream;
14079
0
    char *directory = NULL;
14080
14081
0
    ctxt = xmlNewParserCtxt();
14082
0
    if (ctxt == NULL) {
14083
0
  xmlErrMemory(NULL, "cannot allocate parser context");
14084
0
  return(NULL);
14085
0
    }
14086
14087
0
    if (options)
14088
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14089
0
    ctxt->linenumbers = 1;
14090
14091
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14092
0
    if (inputStream == NULL) {
14093
0
  xmlFreeParserCtxt(ctxt);
14094
0
  return(NULL);
14095
0
    }
14096
14097
0
    inputPush(ctxt, inputStream);
14098
0
    if ((ctxt->directory == NULL) && (directory == NULL))
14099
0
        directory = xmlParserGetDirectory(filename);
14100
0
    if ((ctxt->directory == NULL) && (directory != NULL))
14101
0
        ctxt->directory = directory;
14102
14103
0
    return(ctxt);
14104
0
}
14105
14106
/**
14107
 * xmlCreateFileParserCtxt:
14108
 * @filename:  the filename
14109
 *
14110
 * Create a parser context for a file content.
14111
 * Automatic support for ZLIB/Compress compressed document is provided
14112
 * by default if found at compile-time.
14113
 *
14114
 * Returns the new parser context or NULL
14115
 */
14116
xmlParserCtxtPtr
14117
xmlCreateFileParserCtxt(const char *filename)
14118
0
{
14119
0
    return(xmlCreateURLParserCtxt(filename, 0));
14120
0
}
14121
14122
#ifdef LIBXML_SAX1_ENABLED
14123
/**
14124
 * xmlSAXParseFileWithData:
14125
 * @sax:  the SAX handler block
14126
 * @filename:  the filename
14127
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14128
 *             documents
14129
 * @data:  the userdata
14130
 *
14131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14132
 *
14133
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14134
 * compressed document is provided by default if found at compile-time.
14135
 * It use the given SAX function block to handle the parsing callback.
14136
 * If sax is NULL, fallback to the default DOM tree building routines.
14137
 *
14138
 * User data (void *) is stored within the parser context in the
14139
 * context's _private member, so it is available nearly everywhere in libxml
14140
 *
14141
 * Returns the resulting document tree
14142
 */
14143
14144
xmlDocPtr
14145
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14146
0
                        int recovery, void *data) {
14147
0
    xmlDocPtr ret;
14148
0
    xmlParserCtxtPtr ctxt;
14149
14150
0
    xmlInitParser();
14151
14152
0
    ctxt = xmlCreateFileParserCtxt(filename);
14153
0
    if (ctxt == NULL) {
14154
0
  return(NULL);
14155
0
    }
14156
0
    if (sax != NULL) {
14157
0
  if (ctxt->sax != NULL)
14158
0
      xmlFree(ctxt->sax);
14159
0
        ctxt->sax = sax;
14160
0
    }
14161
0
    xmlDetectSAX2(ctxt);
14162
0
    if (data!=NULL) {
14163
0
  ctxt->_private = data;
14164
0
    }
14165
14166
0
    if (ctxt->directory == NULL)
14167
0
        ctxt->directory = xmlParserGetDirectory(filename);
14168
14169
0
    ctxt->recovery = recovery;
14170
14171
0
    xmlParseDocument(ctxt);
14172
14173
0
    if ((ctxt->wellFormed) || recovery) {
14174
0
        ret = ctxt->myDoc;
14175
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14176
0
      if (ctxt->input->buf->compressed > 0)
14177
0
    ret->compression = 9;
14178
0
      else
14179
0
    ret->compression = ctxt->input->buf->compressed;
14180
0
  }
14181
0
    }
14182
0
    else {
14183
0
       ret = NULL;
14184
0
       xmlFreeDoc(ctxt->myDoc);
14185
0
       ctxt->myDoc = NULL;
14186
0
    }
14187
0
    if (sax != NULL)
14188
0
        ctxt->sax = NULL;
14189
0
    xmlFreeParserCtxt(ctxt);
14190
14191
0
    return(ret);
14192
0
}
14193
14194
/**
14195
 * xmlSAXParseFile:
14196
 * @sax:  the SAX handler block
14197
 * @filename:  the filename
14198
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14199
 *             documents
14200
 *
14201
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14202
 *
14203
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14204
 * compressed document is provided by default if found at compile-time.
14205
 * It use the given SAX function block to handle the parsing callback.
14206
 * If sax is NULL, fallback to the default DOM tree building routines.
14207
 *
14208
 * Returns the resulting document tree
14209
 */
14210
14211
xmlDocPtr
14212
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14213
0
                          int recovery) {
14214
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14215
0
}
14216
14217
/**
14218
 * xmlRecoverDoc:
14219
 * @cur:  a pointer to an array of xmlChar
14220
 *
14221
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14222
 *
14223
 * parse an XML in-memory document and build a tree.
14224
 * In the case the document is not Well Formed, a attempt to build a
14225
 * tree is tried anyway
14226
 *
14227
 * Returns the resulting document tree or NULL in case of failure
14228
 */
14229
14230
xmlDocPtr
14231
0
xmlRecoverDoc(const xmlChar *cur) {
14232
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14233
0
}
14234
14235
/**
14236
 * xmlParseFile:
14237
 * @filename:  the filename
14238
 *
14239
 * DEPRECATED: Use xmlReadFile.
14240
 *
14241
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14242
 * compressed document is provided by default if found at compile-time.
14243
 *
14244
 * Returns the resulting document tree if the file was wellformed,
14245
 * NULL otherwise.
14246
 */
14247
14248
xmlDocPtr
14249
0
xmlParseFile(const char *filename) {
14250
0
    return(xmlSAXParseFile(NULL, filename, 0));
14251
0
}
14252
14253
/**
14254
 * xmlRecoverFile:
14255
 * @filename:  the filename
14256
 *
14257
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14258
 *
14259
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14260
 * compressed document is provided by default if found at compile-time.
14261
 * In the case the document is not Well Formed, it attempts to build
14262
 * a tree anyway
14263
 *
14264
 * Returns the resulting document tree or NULL in case of failure
14265
 */
14266
14267
xmlDocPtr
14268
0
xmlRecoverFile(const char *filename) {
14269
0
    return(xmlSAXParseFile(NULL, filename, 1));
14270
0
}
14271
14272
14273
/**
14274
 * xmlSetupParserForBuffer:
14275
 * @ctxt:  an XML parser context
14276
 * @buffer:  a xmlChar * buffer
14277
 * @filename:  a file name
14278
 *
14279
 * DEPRECATED: Don't use.
14280
 *
14281
 * Setup the parser context to parse a new buffer; Clears any prior
14282
 * contents from the parser context. The buffer parameter must not be
14283
 * NULL, but the filename parameter can be
14284
 */
14285
void
14286
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14287
                             const char* filename)
14288
0
{
14289
0
    xmlParserInputPtr input;
14290
14291
0
    if ((ctxt == NULL) || (buffer == NULL))
14292
0
        return;
14293
14294
0
    input = xmlNewInputStream(ctxt);
14295
0
    if (input == NULL) {
14296
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14297
0
        xmlClearParserCtxt(ctxt);
14298
0
        return;
14299
0
    }
14300
14301
0
    xmlClearParserCtxt(ctxt);
14302
0
    if (filename != NULL)
14303
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14304
0
    input->base = buffer;
14305
0
    input->cur = buffer;
14306
0
    input->end = &buffer[xmlStrlen(buffer)];
14307
0
    inputPush(ctxt, input);
14308
0
}
14309
14310
/**
14311
 * xmlSAXUserParseFile:
14312
 * @sax:  a SAX handler
14313
 * @user_data:  The user data returned on SAX callbacks
14314
 * @filename:  a file name
14315
 *
14316
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14317
 *
14318
 * parse an XML file and call the given SAX handler routines.
14319
 * Automatic support for ZLIB/Compress compressed document is provided
14320
 *
14321
 * Returns 0 in case of success or a error number otherwise
14322
 */
14323
int
14324
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14325
0
                    const char *filename) {
14326
0
    int ret = 0;
14327
0
    xmlParserCtxtPtr ctxt;
14328
14329
0
    ctxt = xmlCreateFileParserCtxt(filename);
14330
0
    if (ctxt == NULL) return -1;
14331
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14332
0
  xmlFree(ctxt->sax);
14333
0
    ctxt->sax = sax;
14334
0
    xmlDetectSAX2(ctxt);
14335
14336
0
    if (user_data != NULL)
14337
0
  ctxt->userData = user_data;
14338
14339
0
    xmlParseDocument(ctxt);
14340
14341
0
    if (ctxt->wellFormed)
14342
0
  ret = 0;
14343
0
    else {
14344
0
        if (ctxt->errNo != 0)
14345
0
      ret = ctxt->errNo;
14346
0
  else
14347
0
      ret = -1;
14348
0
    }
14349
0
    if (sax != NULL)
14350
0
  ctxt->sax = NULL;
14351
0
    if (ctxt->myDoc != NULL) {
14352
0
        xmlFreeDoc(ctxt->myDoc);
14353
0
  ctxt->myDoc = NULL;
14354
0
    }
14355
0
    xmlFreeParserCtxt(ctxt);
14356
14357
0
    return ret;
14358
0
}
14359
#endif /* LIBXML_SAX1_ENABLED */
14360
14361
/************************************************************************
14362
 *                  *
14363
 *    Front ends when parsing from memory     *
14364
 *                  *
14365
 ************************************************************************/
14366
14367
/**
14368
 * xmlCreateMemoryParserCtxt:
14369
 * @buffer:  a pointer to a char array
14370
 * @size:  the size of the array
14371
 *
14372
 * Create a parser context for an XML in-memory document.
14373
 *
14374
 * Returns the new parser context or NULL
14375
 */
14376
xmlParserCtxtPtr
14377
657k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14378
657k
    xmlParserCtxtPtr ctxt;
14379
657k
    xmlParserInputPtr input;
14380
657k
    xmlParserInputBufferPtr buf;
14381
14382
657k
    if (buffer == NULL)
14383
0
  return(NULL);
14384
657k
    if (size <= 0)
14385
24.4k
  return(NULL);
14386
14387
632k
    ctxt = xmlNewParserCtxt();
14388
632k
    if (ctxt == NULL)
14389
0
  return(NULL);
14390
14391
    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14392
632k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14393
632k
    if (buf == NULL) {
14394
0
  xmlFreeParserCtxt(ctxt);
14395
0
  return(NULL);
14396
0
    }
14397
14398
632k
    input = xmlNewInputStream(ctxt);
14399
632k
    if (input == NULL) {
14400
0
  xmlFreeParserInputBuffer(buf);
14401
0
  xmlFreeParserCtxt(ctxt);
14402
0
  return(NULL);
14403
0
    }
14404
14405
632k
    input->filename = NULL;
14406
632k
    input->buf = buf;
14407
632k
    xmlBufResetInput(input->buf->buffer, input);
14408
14409
632k
    inputPush(ctxt, input);
14410
632k
    return(ctxt);
14411
632k
}
14412
14413
#ifdef LIBXML_SAX1_ENABLED
14414
/**
14415
 * xmlSAXParseMemoryWithData:
14416
 * @sax:  the SAX handler block
14417
 * @buffer:  an pointer to a char array
14418
 * @size:  the size of the array
14419
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14420
 *             documents
14421
 * @data:  the userdata
14422
 *
14423
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14424
 *
14425
 * parse an XML in-memory block and use the given SAX function block
14426
 * to handle the parsing callback. If sax is NULL, fallback to the default
14427
 * DOM tree building routines.
14428
 *
14429
 * User data (void *) is stored within the parser context in the
14430
 * context's _private member, so it is available nearly everywhere in libxml
14431
 *
14432
 * Returns the resulting document tree
14433
 */
14434
14435
xmlDocPtr
14436
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14437
0
            int size, int recovery, void *data) {
14438
0
    xmlDocPtr ret;
14439
0
    xmlParserCtxtPtr ctxt;
14440
14441
0
    xmlInitParser();
14442
14443
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14444
0
    if (ctxt == NULL) return(NULL);
14445
0
    if (sax != NULL) {
14446
0
  if (ctxt->sax != NULL)
14447
0
      xmlFree(ctxt->sax);
14448
0
        ctxt->sax = sax;
14449
0
    }
14450
0
    xmlDetectSAX2(ctxt);
14451
0
    if (data!=NULL) {
14452
0
  ctxt->_private=data;
14453
0
    }
14454
14455
0
    ctxt->recovery = recovery;
14456
14457
0
    xmlParseDocument(ctxt);
14458
14459
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14460
0
    else {
14461
0
       ret = NULL;
14462
0
       xmlFreeDoc(ctxt->myDoc);
14463
0
       ctxt->myDoc = NULL;
14464
0
    }
14465
0
    if (sax != NULL)
14466
0
  ctxt->sax = NULL;
14467
0
    xmlFreeParserCtxt(ctxt);
14468
14469
0
    return(ret);
14470
0
}
14471
14472
/**
14473
 * xmlSAXParseMemory:
14474
 * @sax:  the SAX handler block
14475
 * @buffer:  an pointer to a char array
14476
 * @size:  the size of the array
14477
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14478
 *             documents
14479
 *
14480
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14481
 *
14482
 * parse an XML in-memory block and use the given SAX function block
14483
 * to handle the parsing callback. If sax is NULL, fallback to the default
14484
 * DOM tree building routines.
14485
 *
14486
 * Returns the resulting document tree
14487
 */
14488
xmlDocPtr
14489
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14490
0
            int size, int recovery) {
14491
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14492
0
}
14493
14494
/**
14495
 * xmlParseMemory:
14496
 * @buffer:  an pointer to a char array
14497
 * @size:  the size of the array
14498
 *
14499
 * DEPRECATED: Use xmlReadMemory.
14500
 *
14501
 * parse an XML in-memory block and build a tree.
14502
 *
14503
 * Returns the resulting document tree
14504
 */
14505
14506
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14507
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14508
0
}
14509
14510
/**
14511
 * xmlRecoverMemory:
14512
 * @buffer:  an pointer to a char array
14513
 * @size:  the size of the array
14514
 *
14515
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14516
 *
14517
 * parse an XML in-memory block and build a tree.
14518
 * In the case the document is not Well Formed, an attempt to
14519
 * build a tree is tried anyway
14520
 *
14521
 * Returns the resulting document tree or NULL in case of error
14522
 */
14523
14524
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14525
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14526
0
}
14527
14528
/**
14529
 * xmlSAXUserParseMemory:
14530
 * @sax:  a SAX handler
14531
 * @user_data:  The user data returned on SAX callbacks
14532
 * @buffer:  an in-memory XML document input
14533
 * @size:  the length of the XML document in bytes
14534
 *
14535
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14536
 *
14537
 * parse an XML in-memory buffer and call the given SAX handler routines.
14538
 *
14539
 * Returns 0 in case of success or a error number otherwise
14540
 */
14541
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14542
0
        const char *buffer, int size) {
14543
0
    int ret = 0;
14544
0
    xmlParserCtxtPtr ctxt;
14545
14546
0
    xmlInitParser();
14547
14548
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14549
0
    if (ctxt == NULL) return -1;
14550
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14551
0
        xmlFree(ctxt->sax);
14552
0
    ctxt->sax = sax;
14553
0
    xmlDetectSAX2(ctxt);
14554
14555
0
    if (user_data != NULL)
14556
0
  ctxt->userData = user_data;
14557
14558
0
    xmlParseDocument(ctxt);
14559
14560
0
    if (ctxt->wellFormed)
14561
0
  ret = 0;
14562
0
    else {
14563
0
        if (ctxt->errNo != 0)
14564
0
      ret = ctxt->errNo;
14565
0
  else
14566
0
      ret = -1;
14567
0
    }
14568
0
    if (sax != NULL)
14569
0
        ctxt->sax = NULL;
14570
0
    if (ctxt->myDoc != NULL) {
14571
0
        xmlFreeDoc(ctxt->myDoc);
14572
0
  ctxt->myDoc = NULL;
14573
0
    }
14574
0
    xmlFreeParserCtxt(ctxt);
14575
14576
0
    return ret;
14577
0
}
14578
#endif /* LIBXML_SAX1_ENABLED */
14579
14580
/**
14581
 * xmlCreateDocParserCtxt:
14582
 * @cur:  a pointer to an array of xmlChar
14583
 *
14584
 * Creates a parser context for an XML in-memory document.
14585
 *
14586
 * Returns the new parser context or NULL
14587
 */
14588
xmlParserCtxtPtr
14589
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14590
0
    int len;
14591
14592
0
    if (cur == NULL)
14593
0
  return(NULL);
14594
0
    len = xmlStrlen(cur);
14595
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14596
0
}
14597
14598
#ifdef LIBXML_SAX1_ENABLED
14599
/**
14600
 * xmlSAXParseDoc:
14601
 * @sax:  the SAX handler block
14602
 * @cur:  a pointer to an array of xmlChar
14603
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14604
 *             documents
14605
 *
14606
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14607
 *
14608
 * parse an XML in-memory document and build a tree.
14609
 * It use the given SAX function block to handle the parsing callback.
14610
 * If sax is NULL, fallback to the default DOM tree building routines.
14611
 *
14612
 * Returns the resulting document tree
14613
 */
14614
14615
xmlDocPtr
14616
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14617
0
    xmlDocPtr ret;
14618
0
    xmlParserCtxtPtr ctxt;
14619
0
    xmlSAXHandlerPtr oldsax = NULL;
14620
14621
0
    if (cur == NULL) return(NULL);
14622
14623
14624
0
    ctxt = xmlCreateDocParserCtxt(cur);
14625
0
    if (ctxt == NULL) return(NULL);
14626
0
    if (sax != NULL) {
14627
0
        oldsax = ctxt->sax;
14628
0
        ctxt->sax = sax;
14629
0
        ctxt->userData = NULL;
14630
0
    }
14631
0
    xmlDetectSAX2(ctxt);
14632
14633
0
    xmlParseDocument(ctxt);
14634
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14635
0
    else {
14636
0
       ret = NULL;
14637
0
       xmlFreeDoc(ctxt->myDoc);
14638
0
       ctxt->myDoc = NULL;
14639
0
    }
14640
0
    if (sax != NULL)
14641
0
  ctxt->sax = oldsax;
14642
0
    xmlFreeParserCtxt(ctxt);
14643
14644
0
    return(ret);
14645
0
}
14646
14647
/**
14648
 * xmlParseDoc:
14649
 * @cur:  a pointer to an array of xmlChar
14650
 *
14651
 * DEPRECATED: Use xmlReadDoc.
14652
 *
14653
 * parse an XML in-memory document and build a tree.
14654
 *
14655
 * Returns the resulting document tree
14656
 */
14657
14658
xmlDocPtr
14659
0
xmlParseDoc(const xmlChar *cur) {
14660
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14661
0
}
14662
#endif /* LIBXML_SAX1_ENABLED */
14663
14664
#ifdef LIBXML_LEGACY_ENABLED
14665
/************************************************************************
14666
 *                  *
14667
 *  Specific function to keep track of entities references    *
14668
 *  and used by the XSLT debugger         *
14669
 *                  *
14670
 ************************************************************************/
14671
14672
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14673
14674
/**
14675
 * xmlAddEntityReference:
14676
 * @ent : A valid entity
14677
 * @firstNode : A valid first node for children of entity
14678
 * @lastNode : A valid last node of children entity
14679
 *
14680
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14681
 */
14682
static void
14683
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14684
                      xmlNodePtr lastNode)
14685
{
14686
    if (xmlEntityRefFunc != NULL) {
14687
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14688
    }
14689
}
14690
14691
14692
/**
14693
 * xmlSetEntityReferenceFunc:
14694
 * @func: A valid function
14695
 *
14696
 * Set the function to call call back when a xml reference has been made
14697
 */
14698
void
14699
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14700
{
14701
    xmlEntityRefFunc = func;
14702
}
14703
#endif /* LIBXML_LEGACY_ENABLED */
14704
14705
/************************************************************************
14706
 *                  *
14707
 *        Miscellaneous       *
14708
 *                  *
14709
 ************************************************************************/
14710
14711
static int xmlParserInitialized = 0;
14712
14713
/**
14714
 * xmlInitParser:
14715
 *
14716
 * Initialization function for the XML parser.
14717
 * This is not reentrant. Call once before processing in case of
14718
 * use in multithreaded programs.
14719
 */
14720
14721
void
14722
8.04M
xmlInitParser(void) {
14723
8.04M
    if (xmlParserInitialized != 0)
14724
8.03M
  return;
14725
14726
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14727
    if (xmlFree == free)
14728
        atexit(xmlCleanupParser);
14729
#endif
14730
14731
10.4k
#ifdef LIBXML_THREAD_ENABLED
14732
10.4k
    __xmlGlobalInitMutexLock();
14733
10.4k
    if (xmlParserInitialized == 0) {
14734
10.4k
#endif
14735
10.4k
  xmlInitThreads();
14736
10.4k
  xmlInitGlobals();
14737
10.4k
  xmlInitMemory();
14738
10.4k
        xmlInitializeDict();
14739
10.4k
  xmlInitCharEncodingHandlers();
14740
10.4k
  xmlDefaultSAXHandlerInit();
14741
10.4k
  xmlRegisterDefaultInputCallbacks();
14742
10.4k
#ifdef LIBXML_OUTPUT_ENABLED
14743
10.4k
  xmlRegisterDefaultOutputCallbacks();
14744
10.4k
#endif /* LIBXML_OUTPUT_ENABLED */
14745
10.4k
#ifdef LIBXML_HTML_ENABLED
14746
10.4k
  htmlInitAutoClose();
14747
10.4k
  htmlDefaultSAXHandlerInit();
14748
10.4k
#endif
14749
10.4k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14750
10.4k
  xmlXPathInit();
14751
10.4k
#endif
14752
10.4k
  xmlParserInitialized = 1;
14753
10.4k
#ifdef LIBXML_THREAD_ENABLED
14754
10.4k
    }
14755
10.4k
    __xmlGlobalInitMutexUnlock();
14756
10.4k
#endif
14757
10.4k
}
14758
14759
/**
14760
 * xmlCleanupParser:
14761
 *
14762
 * This function name is somewhat misleading. It does not clean up
14763
 * parser state, it cleans up memory allocated by the library itself.
14764
 * It is a cleanup function for the XML library. It tries to reclaim all
14765
 * related global memory allocated for the library processing.
14766
 * It doesn't deallocate any document related memory. One should
14767
 * call xmlCleanupParser() only when the process has finished using
14768
 * the library and all XML/HTML documents built with it.
14769
 * See also xmlInitParser() which has the opposite function of preparing
14770
 * the library for operations.
14771
 *
14772
 * WARNING: if your application is multithreaded or has plugin support
14773
 *          calling this may crash the application if another thread or
14774
 *          a plugin is still using libxml2. It's sometimes very hard to
14775
 *          guess if libxml2 is in use in the application, some libraries
14776
 *          or plugins may use it without notice. In case of doubt abstain
14777
 *          from calling this function or do it just before calling exit()
14778
 *          to avoid leak reports from valgrind !
14779
 */
14780
14781
void
14782
0
xmlCleanupParser(void) {
14783
0
    if (!xmlParserInitialized)
14784
0
  return;
14785
14786
0
    xmlCleanupCharEncodingHandlers();
14787
0
#ifdef LIBXML_CATALOG_ENABLED
14788
0
    xmlCatalogCleanup();
14789
0
#endif
14790
0
    xmlDictCleanup();
14791
0
    xmlCleanupInputCallbacks();
14792
0
#ifdef LIBXML_OUTPUT_ENABLED
14793
0
    xmlCleanupOutputCallbacks();
14794
0
#endif
14795
0
#ifdef LIBXML_SCHEMAS_ENABLED
14796
0
    xmlSchemaCleanupTypes();
14797
0
    xmlRelaxNGCleanupTypes();
14798
0
#endif
14799
0
    xmlCleanupGlobals();
14800
0
    xmlCleanupThreads(); /* must be last if called not from the main thread */
14801
0
    xmlCleanupMemory();
14802
0
    xmlParserInitialized = 0;
14803
0
}
14804
14805
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14806
    !defined(_WIN32)
14807
static void
14808
ATTRIBUTE_DESTRUCTOR
14809
xmlDestructor(void) {
14810
    /*
14811
     * Calling custom deallocation functions in a destructor can cause
14812
     * problems, for example with Nokogiri.
14813
     */
14814
    if (xmlFree == free)
14815
        xmlCleanupParser();
14816
}
14817
#endif
14818
14819
/************************************************************************
14820
 *                  *
14821
 *  New set (2.6.0) of simpler and more flexible APIs   *
14822
 *                  *
14823
 ************************************************************************/
14824
14825
/**
14826
 * DICT_FREE:
14827
 * @str:  a string
14828
 *
14829
 * Free a string if it is not owned by the "dict" dictionary in the
14830
 * current scope
14831
 */
14832
#define DICT_FREE(str)            \
14833
0
  if ((str) && ((!dict) ||       \
14834
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14835
0
      xmlFree((char *)(str));
14836
14837
/**
14838
 * xmlCtxtReset:
14839
 * @ctxt: an XML parser context
14840
 *
14841
 * Reset a parser context
14842
 */
14843
void
14844
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14845
0
{
14846
0
    xmlParserInputPtr input;
14847
0
    xmlDictPtr dict;
14848
14849
0
    if (ctxt == NULL)
14850
0
        return;
14851
14852
0
    dict = ctxt->dict;
14853
14854
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14855
0
        xmlFreeInputStream(input);
14856
0
    }
14857
0
    ctxt->inputNr = 0;
14858
0
    ctxt->input = NULL;
14859
14860
0
    ctxt->spaceNr = 0;
14861
0
    if (ctxt->spaceTab != NULL) {
14862
0
  ctxt->spaceTab[0] = -1;
14863
0
  ctxt->space = &ctxt->spaceTab[0];
14864
0
    } else {
14865
0
        ctxt->space = NULL;
14866
0
    }
14867
14868
14869
0
    ctxt->nodeNr = 0;
14870
0
    ctxt->node = NULL;
14871
14872
0
    ctxt->nameNr = 0;
14873
0
    ctxt->name = NULL;
14874
14875
0
    ctxt->nsNr = 0;
14876
14877
0
    DICT_FREE(ctxt->version);
14878
0
    ctxt->version = NULL;
14879
0
    DICT_FREE(ctxt->encoding);
14880
0
    ctxt->encoding = NULL;
14881
0
    DICT_FREE(ctxt->directory);
14882
0
    ctxt->directory = NULL;
14883
0
    DICT_FREE(ctxt->extSubURI);
14884
0
    ctxt->extSubURI = NULL;
14885
0
    DICT_FREE(ctxt->extSubSystem);
14886
0
    ctxt->extSubSystem = NULL;
14887
0
    if (ctxt->myDoc != NULL)
14888
0
        xmlFreeDoc(ctxt->myDoc);
14889
0
    ctxt->myDoc = NULL;
14890
14891
0
    ctxt->standalone = -1;
14892
0
    ctxt->hasExternalSubset = 0;
14893
0
    ctxt->hasPErefs = 0;
14894
0
    ctxt->html = 0;
14895
0
    ctxt->external = 0;
14896
0
    ctxt->instate = XML_PARSER_START;
14897
0
    ctxt->token = 0;
14898
14899
0
    ctxt->wellFormed = 1;
14900
0
    ctxt->nsWellFormed = 1;
14901
0
    ctxt->disableSAX = 0;
14902
0
    ctxt->valid = 1;
14903
#if 0
14904
    ctxt->vctxt.userData = ctxt;
14905
    ctxt->vctxt.error = xmlParserValidityError;
14906
    ctxt->vctxt.warning = xmlParserValidityWarning;
14907
#endif
14908
0
    ctxt->record_info = 0;
14909
0
    ctxt->checkIndex = 0;
14910
0
    ctxt->inSubset = 0;
14911
0
    ctxt->errNo = XML_ERR_OK;
14912
0
    ctxt->depth = 0;
14913
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14914
0
    ctxt->catalogs = NULL;
14915
0
    ctxt->nbentities = 0;
14916
0
    ctxt->sizeentities = 0;
14917
0
    ctxt->sizeentcopy = 0;
14918
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14919
14920
0
    if (ctxt->attsDefault != NULL) {
14921
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14922
0
        ctxt->attsDefault = NULL;
14923
0
    }
14924
0
    if (ctxt->attsSpecial != NULL) {
14925
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14926
0
        ctxt->attsSpecial = NULL;
14927
0
    }
14928
14929
0
#ifdef LIBXML_CATALOG_ENABLED
14930
0
    if (ctxt->catalogs != NULL)
14931
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14932
0
#endif
14933
0
    if (ctxt->lastError.code != XML_ERR_OK)
14934
0
        xmlResetError(&ctxt->lastError);
14935
0
}
14936
14937
/**
14938
 * xmlCtxtResetPush:
14939
 * @ctxt: an XML parser context
14940
 * @chunk:  a pointer to an array of chars
14941
 * @size:  number of chars in the array
14942
 * @filename:  an optional file name or URI
14943
 * @encoding:  the document encoding, or NULL
14944
 *
14945
 * Reset a push parser context
14946
 *
14947
 * Returns 0 in case of success and 1 in case of error
14948
 */
14949
int
14950
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14951
                 int size, const char *filename, const char *encoding)
14952
0
{
14953
0
    xmlParserInputPtr inputStream;
14954
0
    xmlParserInputBufferPtr buf;
14955
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14956
14957
0
    if (ctxt == NULL)
14958
0
        return(1);
14959
14960
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14961
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14962
14963
0
    buf = xmlAllocParserInputBuffer(enc);
14964
0
    if (buf == NULL)
14965
0
        return(1);
14966
14967
0
    if (ctxt == NULL) {
14968
0
        xmlFreeParserInputBuffer(buf);
14969
0
        return(1);
14970
0
    }
14971
14972
0
    xmlCtxtReset(ctxt);
14973
14974
0
    if (filename == NULL) {
14975
0
        ctxt->directory = NULL;
14976
0
    } else {
14977
0
        ctxt->directory = xmlParserGetDirectory(filename);
14978
0
    }
14979
14980
0
    inputStream = xmlNewInputStream(ctxt);
14981
0
    if (inputStream == NULL) {
14982
0
        xmlFreeParserInputBuffer(buf);
14983
0
        return(1);
14984
0
    }
14985
14986
0
    if (filename == NULL)
14987
0
        inputStream->filename = NULL;
14988
0
    else
14989
0
        inputStream->filename = (char *)
14990
0
            xmlCanonicPath((const xmlChar *) filename);
14991
0
    inputStream->buf = buf;
14992
0
    xmlBufResetInput(buf->buffer, inputStream);
14993
14994
0
    inputPush(ctxt, inputStream);
14995
14996
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14997
0
        (ctxt->input->buf != NULL)) {
14998
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14999
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
15000
15001
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15002
15003
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15004
#ifdef DEBUG_PUSH
15005
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15006
#endif
15007
0
    }
15008
15009
0
    if (encoding != NULL) {
15010
0
        xmlCharEncodingHandlerPtr hdlr;
15011
15012
0
        if (ctxt->encoding != NULL)
15013
0
      xmlFree((xmlChar *) ctxt->encoding);
15014
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15015
15016
0
        hdlr = xmlFindCharEncodingHandler(encoding);
15017
0
        if (hdlr != NULL) {
15018
0
            xmlSwitchToEncoding(ctxt, hdlr);
15019
0
  } else {
15020
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15021
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
15022
0
        }
15023
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
15024
0
        xmlSwitchEncoding(ctxt, enc);
15025
0
    }
15026
15027
0
    return(0);
15028
0
}
15029
15030
15031
/**
15032
 * xmlCtxtUseOptionsInternal:
15033
 * @ctxt: an XML parser context
15034
 * @options:  a combination of xmlParserOption
15035
 * @encoding:  the user provided encoding to use
15036
 *
15037
 * Applies the options to the parser context
15038
 *
15039
 * Returns 0 in case of success, the set of unknown or unimplemented options
15040
 *         in case of error.
15041
 */
15042
static int
15043
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15044
1.31M
{
15045
1.31M
    if (ctxt == NULL)
15046
0
        return(-1);
15047
1.31M
    if (encoding != NULL) {
15048
0
        if (ctxt->encoding != NULL)
15049
0
      xmlFree((xmlChar *) ctxt->encoding);
15050
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15051
0
    }
15052
1.31M
    if (options & XML_PARSE_RECOVER) {
15053
802k
        ctxt->recovery = 1;
15054
802k
        options -= XML_PARSE_RECOVER;
15055
802k
  ctxt->options |= XML_PARSE_RECOVER;
15056
802k
    } else
15057
510k
        ctxt->recovery = 0;
15058
1.31M
    if (options & XML_PARSE_DTDLOAD) {
15059
916k
        ctxt->loadsubset = XML_DETECT_IDS;
15060
916k
        options -= XML_PARSE_DTDLOAD;
15061
916k
  ctxt->options |= XML_PARSE_DTDLOAD;
15062
916k
    } else
15063
396k
        ctxt->loadsubset = 0;
15064
1.31M
    if (options & XML_PARSE_DTDATTR) {
15065
549k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15066
549k
        options -= XML_PARSE_DTDATTR;
15067
549k
  ctxt->options |= XML_PARSE_DTDATTR;
15068
549k
    }
15069
1.31M
    if (options & XML_PARSE_NOENT) {
15070
706k
        ctxt->replaceEntities = 1;
15071
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
15072
706k
        options -= XML_PARSE_NOENT;
15073
706k
  ctxt->options |= XML_PARSE_NOENT;
15074
706k
    } else
15075
607k
        ctxt->replaceEntities = 0;
15076
1.31M
    if (options & XML_PARSE_PEDANTIC) {
15077
443k
        ctxt->pedantic = 1;
15078
443k
        options -= XML_PARSE_PEDANTIC;
15079
443k
  ctxt->options |= XML_PARSE_PEDANTIC;
15080
443k
    } else
15081
869k
        ctxt->pedantic = 0;
15082
1.31M
    if (options & XML_PARSE_NOBLANKS) {
15083
603k
        ctxt->keepBlanks = 0;
15084
603k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15085
603k
        options -= XML_PARSE_NOBLANKS;
15086
603k
  ctxt->options |= XML_PARSE_NOBLANKS;
15087
603k
    } else
15088
709k
        ctxt->keepBlanks = 1;
15089
1.31M
    if (options & XML_PARSE_DTDVALID) {
15090
514k
        ctxt->validate = 1;
15091
514k
        if (options & XML_PARSE_NOWARNING)
15092
396k
            ctxt->vctxt.warning = NULL;
15093
514k
        if (options & XML_PARSE_NOERROR)
15094
440k
            ctxt->vctxt.error = NULL;
15095
514k
        options -= XML_PARSE_DTDVALID;
15096
514k
  ctxt->options |= XML_PARSE_DTDVALID;
15097
514k
    } else
15098
798k
        ctxt->validate = 0;
15099
1.31M
    if (options & XML_PARSE_NOWARNING) {
15100
515k
        ctxt->sax->warning = NULL;
15101
515k
        options -= XML_PARSE_NOWARNING;
15102
515k
    }
15103
1.31M
    if (options & XML_PARSE_NOERROR) {
15104
587k
        ctxt->sax->error = NULL;
15105
587k
        ctxt->sax->fatalError = NULL;
15106
587k
        options -= XML_PARSE_NOERROR;
15107
587k
    }
15108
1.31M
#ifdef LIBXML_SAX1_ENABLED
15109
1.31M
    if (options & XML_PARSE_SAX1) {
15110
587k
        ctxt->sax->startElement = xmlSAX2StartElement;
15111
587k
        ctxt->sax->endElement = xmlSAX2EndElement;
15112
587k
        ctxt->sax->startElementNs = NULL;
15113
587k
        ctxt->sax->endElementNs = NULL;
15114
587k
        ctxt->sax->initialized = 1;
15115
587k
        options -= XML_PARSE_SAX1;
15116
587k
  ctxt->options |= XML_PARSE_SAX1;
15117
587k
    }
15118
1.31M
#endif /* LIBXML_SAX1_ENABLED */
15119
1.31M
    if (options & XML_PARSE_NODICT) {
15120
531k
        ctxt->dictNames = 0;
15121
531k
        options -= XML_PARSE_NODICT;
15122
531k
  ctxt->options |= XML_PARSE_NODICT;
15123
782k
    } else {
15124
782k
        ctxt->dictNames = 1;
15125
782k
    }
15126
1.31M
    if (options & XML_PARSE_NOCDATA) {
15127
509k
        ctxt->sax->cdataBlock = NULL;
15128
509k
        options -= XML_PARSE_NOCDATA;
15129
509k
  ctxt->options |= XML_PARSE_NOCDATA;
15130
509k
    }
15131
1.31M
    if (options & XML_PARSE_NSCLEAN) {
15132
603k
  ctxt->options |= XML_PARSE_NSCLEAN;
15133
603k
        options -= XML_PARSE_NSCLEAN;
15134
603k
    }
15135
1.31M
    if (options & XML_PARSE_NONET) {
15136
592k
  ctxt->options |= XML_PARSE_NONET;
15137
592k
        options -= XML_PARSE_NONET;
15138
592k
    }
15139
1.31M
    if (options & XML_PARSE_COMPACT) {
15140
656k
  ctxt->options |= XML_PARSE_COMPACT;
15141
656k
        options -= XML_PARSE_COMPACT;
15142
656k
    }
15143
1.31M
    if (options & XML_PARSE_OLD10) {
15144
424k
  ctxt->options |= XML_PARSE_OLD10;
15145
424k
        options -= XML_PARSE_OLD10;
15146
424k
    }
15147
1.31M
    if (options & XML_PARSE_NOBASEFIX) {
15148
438k
  ctxt->options |= XML_PARSE_NOBASEFIX;
15149
438k
        options -= XML_PARSE_NOBASEFIX;
15150
438k
    }
15151
1.31M
    if (options & XML_PARSE_HUGE) {
15152
372k
  ctxt->options |= XML_PARSE_HUGE;
15153
372k
        options -= XML_PARSE_HUGE;
15154
372k
        if (ctxt->dict != NULL)
15155
372k
            xmlDictSetLimit(ctxt->dict, 0);
15156
372k
    }
15157
1.31M
    if (options & XML_PARSE_OLDSAX) {
15158
368k
  ctxt->options |= XML_PARSE_OLDSAX;
15159
368k
        options -= XML_PARSE_OLDSAX;
15160
368k
    }
15161
1.31M
    if (options & XML_PARSE_IGNORE_ENC) {
15162
491k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
15163
491k
        options -= XML_PARSE_IGNORE_ENC;
15164
491k
    }
15165
1.31M
    if (options & XML_PARSE_BIG_LINES) {
15166
396k
  ctxt->options |= XML_PARSE_BIG_LINES;
15167
396k
        options -= XML_PARSE_BIG_LINES;
15168
396k
    }
15169
1.31M
    ctxt->linenumbers = 1;
15170
1.31M
    return (options);
15171
1.31M
}
15172
15173
/**
15174
 * xmlCtxtUseOptions:
15175
 * @ctxt: an XML parser context
15176
 * @options:  a combination of xmlParserOption
15177
 *
15178
 * Applies the options to the parser context
15179
 *
15180
 * Returns 0 in case of success, the set of unknown or unimplemented options
15181
 *         in case of error.
15182
 */
15183
int
15184
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15185
953k
{
15186
953k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15187
953k
}
15188
15189
/**
15190
 * xmlDoRead:
15191
 * @ctxt:  an XML parser context
15192
 * @URL:  the base URL to use for the document
15193
 * @encoding:  the document encoding, or NULL
15194
 * @options:  a combination of xmlParserOption
15195
 * @reuse:  keep the context for reuse
15196
 *
15197
 * Common front-end for the xmlRead functions
15198
 *
15199
 * Returns the resulting document tree or NULL
15200
 */
15201
static xmlDocPtr
15202
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15203
          int options, int reuse)
15204
359k
{
15205
359k
    xmlDocPtr ret;
15206
15207
359k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15208
359k
    if (encoding != NULL) {
15209
0
        xmlCharEncodingHandlerPtr hdlr;
15210
15211
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15212
0
  if (hdlr != NULL)
15213
0
      xmlSwitchToEncoding(ctxt, hdlr);
15214
0
    }
15215
359k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15216
359k
        (ctxt->input->filename == NULL))
15217
359k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15218
359k
    xmlParseDocument(ctxt);
15219
359k
    if ((ctxt->wellFormed) || ctxt->recovery)
15220
211k
        ret = ctxt->myDoc;
15221
147k
    else {
15222
147k
        ret = NULL;
15223
147k
  if (ctxt->myDoc != NULL) {
15224
126k
      xmlFreeDoc(ctxt->myDoc);
15225
126k
  }
15226
147k
    }
15227
359k
    ctxt->myDoc = NULL;
15228
359k
    if (!reuse) {
15229
359k
  xmlFreeParserCtxt(ctxt);
15230
359k
    }
15231
15232
359k
    return (ret);
15233
359k
}
15234
15235
/**
15236
 * xmlReadDoc:
15237
 * @cur:  a pointer to a zero terminated string
15238
 * @URL:  the base URL to use for the document
15239
 * @encoding:  the document encoding, or NULL
15240
 * @options:  a combination of xmlParserOption
15241
 *
15242
 * parse an XML in-memory document and build a tree.
15243
 *
15244
 * Returns the resulting document tree
15245
 */
15246
xmlDocPtr
15247
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15248
0
{
15249
0
    xmlParserCtxtPtr ctxt;
15250
15251
0
    if (cur == NULL)
15252
0
        return (NULL);
15253
0
    xmlInitParser();
15254
15255
0
    ctxt = xmlCreateDocParserCtxt(cur);
15256
0
    if (ctxt == NULL)
15257
0
        return (NULL);
15258
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15259
0
}
15260
15261
/**
15262
 * xmlReadFile:
15263
 * @filename:  a file or URL
15264
 * @encoding:  the document encoding, or NULL
15265
 * @options:  a combination of xmlParserOption
15266
 *
15267
 * parse an XML file from the filesystem or the network.
15268
 *
15269
 * Returns the resulting document tree
15270
 */
15271
xmlDocPtr
15272
xmlReadFile(const char *filename, const char *encoding, int options)
15273
0
{
15274
0
    xmlParserCtxtPtr ctxt;
15275
15276
0
    xmlInitParser();
15277
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15278
0
    if (ctxt == NULL)
15279
0
        return (NULL);
15280
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15281
0
}
15282
15283
/**
15284
 * xmlReadMemory:
15285
 * @buffer:  a pointer to a char array
15286
 * @size:  the size of the array
15287
 * @URL:  the base URL to use for the document
15288
 * @encoding:  the document encoding, or NULL
15289
 * @options:  a combination of xmlParserOption
15290
 *
15291
 * parse an XML in-memory document and build a tree.
15292
 *
15293
 * Returns the resulting document tree
15294
 */
15295
xmlDocPtr
15296
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15297
359k
{
15298
359k
    xmlParserCtxtPtr ctxt;
15299
15300
359k
    xmlInitParser();
15301
359k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15302
359k
    if (ctxt == NULL)
15303
526
        return (NULL);
15304
359k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15305
359k
}
15306
15307
/**
15308
 * xmlReadFd:
15309
 * @fd:  an open file descriptor
15310
 * @URL:  the base URL to use for the document
15311
 * @encoding:  the document encoding, or NULL
15312
 * @options:  a combination of xmlParserOption
15313
 *
15314
 * parse an XML from a file descriptor and build a tree.
15315
 * NOTE that the file descriptor will not be closed when the
15316
 *      reader is closed or reset.
15317
 *
15318
 * Returns the resulting document tree
15319
 */
15320
xmlDocPtr
15321
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15322
0
{
15323
0
    xmlParserCtxtPtr ctxt;
15324
0
    xmlParserInputBufferPtr input;
15325
0
    xmlParserInputPtr stream;
15326
15327
0
    if (fd < 0)
15328
0
        return (NULL);
15329
0
    xmlInitParser();
15330
15331
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15332
0
    if (input == NULL)
15333
0
        return (NULL);
15334
0
    input->closecallback = NULL;
15335
0
    ctxt = xmlNewParserCtxt();
15336
0
    if (ctxt == NULL) {
15337
0
        xmlFreeParserInputBuffer(input);
15338
0
        return (NULL);
15339
0
    }
15340
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15341
0
    if (stream == NULL) {
15342
0
        xmlFreeParserInputBuffer(input);
15343
0
  xmlFreeParserCtxt(ctxt);
15344
0
        return (NULL);
15345
0
    }
15346
0
    inputPush(ctxt, stream);
15347
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15348
0
}
15349
15350
/**
15351
 * xmlReadIO:
15352
 * @ioread:  an I/O read function
15353
 * @ioclose:  an I/O close function
15354
 * @ioctx:  an I/O handler
15355
 * @URL:  the base URL to use for the document
15356
 * @encoding:  the document encoding, or NULL
15357
 * @options:  a combination of xmlParserOption
15358
 *
15359
 * parse an XML document from I/O functions and source and build a tree.
15360
 *
15361
 * Returns the resulting document tree
15362
 */
15363
xmlDocPtr
15364
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15365
          void *ioctx, const char *URL, const char *encoding, int options)
15366
0
{
15367
0
    xmlParserCtxtPtr ctxt;
15368
0
    xmlParserInputBufferPtr input;
15369
0
    xmlParserInputPtr stream;
15370
15371
0
    if (ioread == NULL)
15372
0
        return (NULL);
15373
0
    xmlInitParser();
15374
15375
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15376
0
                                         XML_CHAR_ENCODING_NONE);
15377
0
    if (input == NULL) {
15378
0
        if (ioclose != NULL)
15379
0
            ioclose(ioctx);
15380
0
        return (NULL);
15381
0
    }
15382
0
    ctxt = xmlNewParserCtxt();
15383
0
    if (ctxt == NULL) {
15384
0
        xmlFreeParserInputBuffer(input);
15385
0
        return (NULL);
15386
0
    }
15387
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15388
0
    if (stream == NULL) {
15389
0
        xmlFreeParserInputBuffer(input);
15390
0
  xmlFreeParserCtxt(ctxt);
15391
0
        return (NULL);
15392
0
    }
15393
0
    inputPush(ctxt, stream);
15394
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15395
0
}
15396
15397
/**
15398
 * xmlCtxtReadDoc:
15399
 * @ctxt:  an XML parser context
15400
 * @cur:  a pointer to a zero terminated string
15401
 * @URL:  the base URL to use for the document
15402
 * @encoding:  the document encoding, or NULL
15403
 * @options:  a combination of xmlParserOption
15404
 *
15405
 * parse an XML in-memory document and build a tree.
15406
 * This reuses the existing @ctxt parser context
15407
 *
15408
 * Returns the resulting document tree
15409
 */
15410
xmlDocPtr
15411
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15412
               const char *URL, const char *encoding, int options)
15413
0
{
15414
0
    if (cur == NULL)
15415
0
        return (NULL);
15416
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15417
0
                              encoding, options));
15418
0
}
15419
15420
/**
15421
 * xmlCtxtReadFile:
15422
 * @ctxt:  an XML parser context
15423
 * @filename:  a file or URL
15424
 * @encoding:  the document encoding, or NULL
15425
 * @options:  a combination of xmlParserOption
15426
 *
15427
 * parse an XML file from the filesystem or the network.
15428
 * This reuses the existing @ctxt parser context
15429
 *
15430
 * Returns the resulting document tree
15431
 */
15432
xmlDocPtr
15433
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15434
                const char *encoding, int options)
15435
0
{
15436
0
    xmlParserInputPtr stream;
15437
15438
0
    if (filename == NULL)
15439
0
        return (NULL);
15440
0
    if (ctxt == NULL)
15441
0
        return (NULL);
15442
0
    xmlInitParser();
15443
15444
0
    xmlCtxtReset(ctxt);
15445
15446
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15447
0
    if (stream == NULL) {
15448
0
        return (NULL);
15449
0
    }
15450
0
    inputPush(ctxt, stream);
15451
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15452
0
}
15453
15454
/**
15455
 * xmlCtxtReadMemory:
15456
 * @ctxt:  an XML parser context
15457
 * @buffer:  a pointer to a char array
15458
 * @size:  the size of the array
15459
 * @URL:  the base URL to use for the document
15460
 * @encoding:  the document encoding, or NULL
15461
 * @options:  a combination of xmlParserOption
15462
 *
15463
 * parse an XML in-memory document and build a tree.
15464
 * This reuses the existing @ctxt parser context
15465
 *
15466
 * Returns the resulting document tree
15467
 */
15468
xmlDocPtr
15469
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15470
                  const char *URL, const char *encoding, int options)
15471
0
{
15472
0
    xmlParserInputBufferPtr input;
15473
0
    xmlParserInputPtr stream;
15474
15475
0
    if (ctxt == NULL)
15476
0
        return (NULL);
15477
0
    if (buffer == NULL)
15478
0
        return (NULL);
15479
0
    xmlInitParser();
15480
15481
0
    xmlCtxtReset(ctxt);
15482
15483
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15484
0
    if (input == NULL) {
15485
0
  return(NULL);
15486
0
    }
15487
15488
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15489
0
    if (stream == NULL) {
15490
0
  xmlFreeParserInputBuffer(input);
15491
0
  return(NULL);
15492
0
    }
15493
15494
0
    inputPush(ctxt, stream);
15495
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15496
0
}
15497
15498
/**
15499
 * xmlCtxtReadFd:
15500
 * @ctxt:  an XML parser context
15501
 * @fd:  an open file descriptor
15502
 * @URL:  the base URL to use for the document
15503
 * @encoding:  the document encoding, or NULL
15504
 * @options:  a combination of xmlParserOption
15505
 *
15506
 * parse an XML from a file descriptor and build a tree.
15507
 * This reuses the existing @ctxt parser context
15508
 * NOTE that the file descriptor will not be closed when the
15509
 *      reader is closed or reset.
15510
 *
15511
 * Returns the resulting document tree
15512
 */
15513
xmlDocPtr
15514
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15515
              const char *URL, const char *encoding, int options)
15516
0
{
15517
0
    xmlParserInputBufferPtr input;
15518
0
    xmlParserInputPtr stream;
15519
15520
0
    if (fd < 0)
15521
0
        return (NULL);
15522
0
    if (ctxt == NULL)
15523
0
        return (NULL);
15524
0
    xmlInitParser();
15525
15526
0
    xmlCtxtReset(ctxt);
15527
15528
15529
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15530
0
    if (input == NULL)
15531
0
        return (NULL);
15532
0
    input->closecallback = NULL;
15533
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15534
0
    if (stream == NULL) {
15535
0
        xmlFreeParserInputBuffer(input);
15536
0
        return (NULL);
15537
0
    }
15538
0
    inputPush(ctxt, stream);
15539
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15540
0
}
15541
15542
/**
15543
 * xmlCtxtReadIO:
15544
 * @ctxt:  an XML parser context
15545
 * @ioread:  an I/O read function
15546
 * @ioclose:  an I/O close function
15547
 * @ioctx:  an I/O handler
15548
 * @URL:  the base URL to use for the document
15549
 * @encoding:  the document encoding, or NULL
15550
 * @options:  a combination of xmlParserOption
15551
 *
15552
 * parse an XML document from I/O functions and source and build a tree.
15553
 * This reuses the existing @ctxt parser context
15554
 *
15555
 * Returns the resulting document tree
15556
 */
15557
xmlDocPtr
15558
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15559
              xmlInputCloseCallback ioclose, void *ioctx,
15560
        const char *URL,
15561
              const char *encoding, int options)
15562
0
{
15563
0
    xmlParserInputBufferPtr input;
15564
0
    xmlParserInputPtr stream;
15565
15566
0
    if (ioread == NULL)
15567
0
        return (NULL);
15568
0
    if (ctxt == NULL)
15569
0
        return (NULL);
15570
0
    xmlInitParser();
15571
15572
0
    xmlCtxtReset(ctxt);
15573
15574
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15575
0
                                         XML_CHAR_ENCODING_NONE);
15576
0
    if (input == NULL) {
15577
0
        if (ioclose != NULL)
15578
0
            ioclose(ioctx);
15579
0
        return (NULL);
15580
0
    }
15581
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15582
0
    if (stream == NULL) {
15583
0
        xmlFreeParserInputBuffer(input);
15584
0
        return (NULL);
15585
0
    }
15586
0
    inputPush(ctxt, stream);
15587
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15588
0
}
15589