Coverage Report

Created: 2023-12-13 20:03

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/enc.h"
80
#include "private/error.h"
81
#include "private/html.h"
82
#include "private/io.h"
83
#include "private/parser.h"
84
#include "private/threads.h"
85
86
struct _xmlStartTag {
87
    const xmlChar *prefix;
88
    const xmlChar *URI;
89
    int line;
90
    int nsNr;
91
};
92
93
static void
94
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
95
96
static xmlParserCtxtPtr
97
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
98
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
99
        xmlParserCtxtPtr pctx);
100
101
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
83.1M
#define XML_MAX_HUGE_LENGTH 1000000000
116
117
96.3k
#define XML_PARSER_BIG_ENTITY 1000
118
#define XML_PARSER_LOT_ENTITY 5000
119
120
/*
121
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
122
 *    replacement over the size in byte of the input indicates that you have
123
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
124
 *    replacement per byte of input.
125
 */
126
2.65M
#define XML_PARSER_NON_LINEAR 10
127
128
/*
129
 * xmlParserEntityCheck
130
 *
131
 * Function to check non-linear entity expansion behaviour
132
 * This is here to detect and stop exponential linear entity expansion
133
 * This is not a limitation of the parser but a safety
134
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
135
 * parser option.
136
 */
137
static int
138
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
139
                     xmlEntityPtr ent, size_t replacement)
140
15.9M
{
141
15.9M
    size_t consumed = 0;
142
15.9M
    int i;
143
144
15.9M
    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
145
11.8M
        return (0);
146
4.10M
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
147
2.39k
        return (1);
148
149
    /*
150
     * This may look absurd but is needed to detect
151
     * entities problems
152
     */
153
4.10M
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
154
4.10M
  (ent->content != NULL) && (ent->checked == 0) &&
155
4.10M
  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
156
481k
  unsigned long oldnbent = ctxt->nbentities, diff;
157
481k
  xmlChar *rep;
158
159
481k
  ent->checked = 1;
160
161
481k
        ++ctxt->depth;
162
481k
  rep = xmlStringDecodeEntities(ctxt, ent->content,
163
481k
          XML_SUBSTITUTE_REF, 0, 0, 0);
164
481k
        --ctxt->depth;
165
481k
  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
166
1.23k
      ent->content[0] = 0;
167
1.23k
  }
168
169
481k
        diff = ctxt->nbentities - oldnbent + 1;
170
481k
        if (diff > INT_MAX / 2)
171
0
            diff = INT_MAX / 2;
172
481k
  ent->checked = diff * 2;
173
481k
  if (rep != NULL) {
174
480k
      if (xmlStrchr(rep, '<'))
175
8.94k
    ent->checked |= 1;
176
480k
      xmlFree(rep);
177
480k
      rep = NULL;
178
480k
  }
179
481k
    }
180
181
    /*
182
     * Prevent entity exponential check, not just replacement while
183
     * parsing the DTD
184
     * The check is potentially costly so do that only once in a thousand
185
     */
186
4.10M
    if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
187
4.10M
        (ctxt->nbentities % 1024 == 0)) {
188
0
  for (i = 0;i < ctxt->inputNr;i++) {
189
0
      consumed += ctxt->inputTab[i]->consumed +
190
0
                 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
191
0
  }
192
0
  if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
193
0
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
194
0
      ctxt->instate = XML_PARSER_EOF;
195
0
      return (1);
196
0
  }
197
0
  consumed = 0;
198
0
    }
199
200
201
202
4.10M
    if (replacement != 0) {
203
36.7k
  if (replacement < XML_MAX_TEXT_LENGTH)
204
36.7k
      return(0);
205
206
        /*
207
   * If the volume of entity copy reaches 10 times the
208
   * amount of parsed data and over the large text threshold
209
   * then that's very likely to be an abuse.
210
   */
211
0
        if (ctxt->input != NULL) {
212
0
      consumed = ctxt->input->consumed +
213
0
                 (ctxt->input->cur - ctxt->input->base);
214
0
  }
215
0
        consumed += ctxt->sizeentities;
216
217
0
        if (replacement < XML_PARSER_NON_LINEAR * consumed)
218
0
      return(0);
219
4.07M
    } else if (size != 0) {
220
        /*
221
         * Do the check based on the replacement size of the entity
222
         */
223
96.3k
        if (size < XML_PARSER_BIG_ENTITY)
224
91.7k
      return(0);
225
226
        /*
227
         * A limit on the amount of text data reasonably used
228
         */
229
4.59k
        if (ctxt->input != NULL) {
230
4.59k
            consumed = ctxt->input->consumed +
231
4.59k
                (ctxt->input->cur - ctxt->input->base);
232
4.59k
        }
233
4.59k
        consumed += ctxt->sizeentities;
234
235
4.59k
        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
236
4.59k
      (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
237
3.50k
            return (0);
238
3.97M
    } else if (ent != NULL) {
239
        /*
240
         * use the number of parsed entities in the replacement
241
         */
242
2.65M
        size = ent->checked / 2;
243
244
        /*
245
         * The amount of data parsed counting entities size only once
246
         */
247
2.65M
        if (ctxt->input != NULL) {
248
2.65M
            consumed = ctxt->input->consumed +
249
2.65M
                (ctxt->input->cur - ctxt->input->base);
250
2.65M
        }
251
2.65M
        consumed += ctxt->sizeentities;
252
253
        /*
254
         * Check the density of entities for the amount of data
255
   * knowing an entity reference will take at least 3 bytes
256
         */
257
2.65M
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
258
2.64M
            return (0);
259
2.65M
    } else {
260
        /*
261
         * strange we got no data for checking
262
         */
263
1.32M
  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
264
1.32M
       (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
265
1.32M
      (ctxt->nbentities <= 10000))
266
1.28M
      return (0);
267
1.32M
    }
268
41.2k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
269
41.2k
    return (1);
270
4.10M
}
271
272
/**
273
 * xmlParserMaxDepth:
274
 *
275
 * arbitrary depth limit for the XML documents that we allow to
276
 * process. This is not a limitation of the parser but a safety
277
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
278
 * parser option.
279
 */
280
unsigned int xmlParserMaxDepth = 256;
281
282
283
284
#define SAX2 1
285
1.56G
#define XML_PARSER_BIG_BUFFER_SIZE 300
286
1.10G
#define XML_PARSER_BUFFER_SIZE 100
287
1.27M
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
288
289
/**
290
 * XML_PARSER_CHUNK_SIZE
291
 *
292
 * When calling GROW that's the minimal amount of data
293
 * the parser expected to have received. It is not a hard
294
 * limit but an optimization when reading strings like Names
295
 * It is not strictly needed as long as inputs available characters
296
 * are followed by 0, which should be provided by the I/O level
297
 */
298
177M
#define XML_PARSER_CHUNK_SIZE 100
299
300
/*
301
 * List of XML prefixed PI allowed by W3C specs
302
 */
303
304
static const char* const xmlW3CPIs[] = {
305
    "xml-stylesheet",
306
    "xml-model",
307
    NULL
308
};
309
310
311
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
312
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
313
                                              const xmlChar **str);
314
315
static xmlParserErrors
316
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
317
                xmlSAXHandlerPtr sax,
318
          void *user_data, int depth, const xmlChar *URL,
319
          const xmlChar *ID, xmlNodePtr *list);
320
321
static int
322
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
323
                          const char *encoding);
324
#ifdef LIBXML_LEGACY_ENABLED
325
static void
326
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
327
                      xmlNodePtr lastNode);
328
#endif /* LIBXML_LEGACY_ENABLED */
329
330
static xmlParserErrors
331
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
332
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
333
334
static int
335
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
336
337
/************************************************************************
338
 *                  *
339
 *    Some factorized error routines        *
340
 *                  *
341
 ************************************************************************/
342
343
/**
344
 * xmlErrAttributeDup:
345
 * @ctxt:  an XML parser context
346
 * @prefix:  the attribute prefix
347
 * @localname:  the attribute localname
348
 *
349
 * Handle a redefinition of attribute error
350
 */
351
static void
352
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
353
                   const xmlChar * localname)
354
119k
{
355
119k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
356
119k
        (ctxt->instate == XML_PARSER_EOF))
357
0
  return;
358
119k
    if (ctxt != NULL)
359
119k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
360
361
119k
    if (prefix == NULL)
362
111k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
363
111k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
364
111k
                        (const char *) localname, NULL, NULL, 0, 0,
365
111k
                        "Attribute %s redefined\n", localname);
366
8.49k
    else
367
8.49k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
368
8.49k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
369
8.49k
                        (const char *) prefix, (const char *) localname,
370
8.49k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
371
8.49k
                        localname);
372
119k
    if (ctxt != NULL) {
373
119k
  ctxt->wellFormed = 0;
374
119k
  if (ctxt->recovery == 0)
375
74.8k
      ctxt->disableSAX = 1;
376
119k
    }
377
119k
}
378
379
/**
380
 * xmlFatalErr:
381
 * @ctxt:  an XML parser context
382
 * @error:  the error number
383
 * @extra:  extra information string
384
 *
385
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
386
 */
387
static void
388
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
389
16.6M
{
390
16.6M
    const char *errmsg;
391
392
16.6M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393
16.6M
        (ctxt->instate == XML_PARSER_EOF))
394
721k
  return;
395
15.9M
    switch (error) {
396
340k
        case XML_ERR_INVALID_HEX_CHARREF:
397
340k
            errmsg = "CharRef: invalid hexadecimal value";
398
340k
            break;
399
728k
        case XML_ERR_INVALID_DEC_CHARREF:
400
728k
            errmsg = "CharRef: invalid decimal value";
401
728k
            break;
402
0
        case XML_ERR_INVALID_CHARREF:
403
0
            errmsg = "CharRef: invalid value";
404
0
            break;
405
347k
        case XML_ERR_INTERNAL_ERROR:
406
347k
            errmsg = "internal error";
407
347k
            break;
408
0
        case XML_ERR_PEREF_AT_EOF:
409
0
            errmsg = "PEReference at end of document";
410
0
            break;
411
0
        case XML_ERR_PEREF_IN_PROLOG:
412
0
            errmsg = "PEReference in prolog";
413
0
            break;
414
0
        case XML_ERR_PEREF_IN_EPILOG:
415
0
            errmsg = "PEReference in epilog";
416
0
            break;
417
0
        case XML_ERR_PEREF_NO_NAME:
418
0
            errmsg = "PEReference: no name";
419
0
            break;
420
19.5k
        case XML_ERR_PEREF_SEMICOL_MISSING:
421
19.5k
            errmsg = "PEReference: expecting ';'";
422
19.5k
            break;
423
837k
        case XML_ERR_ENTITY_LOOP:
424
837k
            errmsg = "Detected an entity reference loop";
425
837k
            break;
426
0
        case XML_ERR_ENTITY_NOT_STARTED:
427
0
            errmsg = "EntityValue: \" or ' expected";
428
0
            break;
429
1.60k
        case XML_ERR_ENTITY_PE_INTERNAL:
430
1.60k
            errmsg = "PEReferences forbidden in internal subset";
431
1.60k
            break;
432
3.44k
        case XML_ERR_ENTITY_NOT_FINISHED:
433
3.44k
            errmsg = "EntityValue: \" or ' expected";
434
3.44k
            break;
435
404k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
436
404k
            errmsg = "AttValue: \" or ' expected";
437
404k
            break;
438
1.39M
        case XML_ERR_LT_IN_ATTRIBUTE:
439
1.39M
            errmsg = "Unescaped '<' not allowed in attributes values";
440
1.39M
            break;
441
8.59k
        case XML_ERR_LITERAL_NOT_STARTED:
442
8.59k
            errmsg = "SystemLiteral \" or ' expected";
443
8.59k
            break;
444
11.5k
        case XML_ERR_LITERAL_NOT_FINISHED:
445
11.5k
            errmsg = "Unfinished System or Public ID \" or ' expected";
446
11.5k
            break;
447
1.05M
        case XML_ERR_MISPLACED_CDATA_END:
448
1.05M
            errmsg = "Sequence ']]>' not allowed in content";
449
1.05M
            break;
450
7.58k
        case XML_ERR_URI_REQUIRED:
451
7.58k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
452
7.58k
            break;
453
1.05k
        case XML_ERR_PUBID_REQUIRED:
454
1.05k
            errmsg = "PUBLIC, the Public Identifier is missing";
455
1.05k
            break;
456
691k
        case XML_ERR_HYPHEN_IN_COMMENT:
457
691k
            errmsg = "Comment must not contain '--' (double-hyphen)";
458
691k
            break;
459
266k
        case XML_ERR_PI_NOT_STARTED:
460
266k
            errmsg = "xmlParsePI : no target name";
461
266k
            break;
462
7.69k
        case XML_ERR_RESERVED_XML_NAME:
463
7.69k
            errmsg = "Invalid PI name";
464
7.69k
            break;
465
522
        case XML_ERR_NOTATION_NOT_STARTED:
466
522
            errmsg = "NOTATION: Name expected here";
467
522
            break;
468
2.36k
        case XML_ERR_NOTATION_NOT_FINISHED:
469
2.36k
            errmsg = "'>' required to close NOTATION declaration";
470
2.36k
            break;
471
11.2k
        case XML_ERR_VALUE_REQUIRED:
472
11.2k
            errmsg = "Entity value required";
473
11.2k
            break;
474
1.21k
        case XML_ERR_URI_FRAGMENT:
475
1.21k
            errmsg = "Fragment not allowed";
476
1.21k
            break;
477
13.7k
        case XML_ERR_ATTLIST_NOT_STARTED:
478
13.7k
            errmsg = "'(' required to start ATTLIST enumeration";
479
13.7k
            break;
480
577
        case XML_ERR_NMTOKEN_REQUIRED:
481
577
            errmsg = "NmToken expected in ATTLIST enumeration";
482
577
            break;
483
2.40k
        case XML_ERR_ATTLIST_NOT_FINISHED:
484
2.40k
            errmsg = "')' required to finish ATTLIST enumeration";
485
2.40k
            break;
486
3.21k
        case XML_ERR_MIXED_NOT_STARTED:
487
3.21k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
488
3.21k
            break;
489
0
        case XML_ERR_PCDATA_REQUIRED:
490
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
491
0
            break;
492
73.7k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
493
73.7k
            errmsg = "ContentDecl : Name or '(' expected";
494
73.7k
            break;
495
15.2k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
496
15.2k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
497
15.2k
            break;
498
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
499
0
            errmsg =
500
0
                "PEReference: forbidden within markup decl in internal subset";
501
0
            break;
502
3.27M
        case XML_ERR_GT_REQUIRED:
503
3.27M
            errmsg = "expected '>'";
504
3.27M
            break;
505
484
        case XML_ERR_CONDSEC_INVALID:
506
484
            errmsg = "XML conditional section '[' expected";
507
484
            break;
508
36.9k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
509
36.9k
            errmsg = "Content error in the external subset";
510
36.9k
            break;
511
2.38k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
512
2.38k
            errmsg =
513
2.38k
                "conditional section INCLUDE or IGNORE keyword expected";
514
2.38k
            break;
515
2.88k
        case XML_ERR_CONDSEC_NOT_FINISHED:
516
2.88k
            errmsg = "XML conditional section not closed";
517
2.88k
            break;
518
222
        case XML_ERR_XMLDECL_NOT_STARTED:
519
222
            errmsg = "Text declaration '<?xml' required";
520
222
            break;
521
164k
        case XML_ERR_XMLDECL_NOT_FINISHED:
522
164k
            errmsg = "parsing XML declaration: '?>' expected";
523
164k
            break;
524
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
525
0
            errmsg = "external parsed entities cannot be standalone";
526
0
            break;
527
2.32M
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
528
2.32M
            errmsg = "EntityRef: expecting ';'";
529
2.32M
            break;
530
112k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
531
112k
            errmsg = "DOCTYPE improperly terminated";
532
112k
            break;
533
0
        case XML_ERR_LTSLASH_REQUIRED:
534
0
            errmsg = "EndTag: '</' not found";
535
0
            break;
536
8.98k
        case XML_ERR_EQUAL_REQUIRED:
537
8.98k
            errmsg = "expected '='";
538
8.98k
            break;
539
34.3k
        case XML_ERR_STRING_NOT_CLOSED:
540
34.3k
            errmsg = "String not closed expecting \" or '";
541
34.3k
            break;
542
8.08k
        case XML_ERR_STRING_NOT_STARTED:
543
8.08k
            errmsg = "String not started expecting ' or \"";
544
8.08k
            break;
545
1.07k
        case XML_ERR_ENCODING_NAME:
546
1.07k
            errmsg = "Invalid XML encoding name";
547
1.07k
            break;
548
997
        case XML_ERR_STANDALONE_VALUE:
549
997
            errmsg = "standalone accepts only 'yes' or 'no'";
550
997
            break;
551
50.4k
        case XML_ERR_DOCUMENT_EMPTY:
552
50.4k
            errmsg = "Document is empty";
553
50.4k
            break;
554
243k
        case XML_ERR_DOCUMENT_END:
555
243k
            errmsg = "Extra content at the end of the document";
556
243k
            break;
557
3.24M
        case XML_ERR_NOT_WELL_BALANCED:
558
3.24M
            errmsg = "chunk is not well balanced";
559
3.24M
            break;
560
0
        case XML_ERR_EXTRA_CONTENT:
561
0
            errmsg = "extra content at the end of well balanced chunk";
562
0
            break;
563
72.4k
        case XML_ERR_VERSION_MISSING:
564
72.4k
            errmsg = "Malformed declaration expecting version";
565
72.4k
            break;
566
153
        case XML_ERR_NAME_TOO_LONG:
567
153
            errmsg = "Name too long";
568
153
            break;
569
#if 0
570
        case:
571
            errmsg = "";
572
            break;
573
#endif
574
83.1k
        default:
575
83.1k
            errmsg = "Unregistered error message";
576
15.9M
    }
577
15.9M
    if (ctxt != NULL)
578
15.9M
  ctxt->errNo = error;
579
15.9M
    if (info == NULL) {
580
15.5M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581
15.5M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
582
15.5M
                        errmsg);
583
15.5M
    } else {
584
347k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
585
347k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
586
347k
                        errmsg, info);
587
347k
    }
588
15.9M
    if (ctxt != NULL) {
589
15.9M
  ctxt->wellFormed = 0;
590
15.9M
  if (ctxt->recovery == 0)
591
13.7M
      ctxt->disableSAX = 1;
592
15.9M
    }
593
15.9M
}
594
595
/**
596
 * xmlFatalErrMsg:
597
 * @ctxt:  an XML parser context
598
 * @error:  the error number
599
 * @msg:  the error message
600
 *
601
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
602
 */
603
static void LIBXML_ATTR_FORMAT(3,0)
604
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
605
               const char *msg)
606
134M
{
607
134M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
608
134M
        (ctxt->instate == XML_PARSER_EOF))
609
0
  return;
610
134M
    if (ctxt != NULL)
611
134M
  ctxt->errNo = error;
612
134M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
613
134M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
614
134M
    if (ctxt != NULL) {
615
134M
  ctxt->wellFormed = 0;
616
134M
  if (ctxt->recovery == 0)
617
109M
      ctxt->disableSAX = 1;
618
134M
    }
619
134M
}
620
621
/**
622
 * xmlWarningMsg:
623
 * @ctxt:  an XML parser context
624
 * @error:  the error number
625
 * @msg:  the error message
626
 * @str1:  extra data
627
 * @str2:  extra data
628
 *
629
 * Handle a warning.
630
 */
631
static void LIBXML_ATTR_FORMAT(3,0)
632
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633
              const char *msg, const xmlChar *str1, const xmlChar *str2)
634
395k
{
635
395k
    xmlStructuredErrorFunc schannel = NULL;
636
637
395k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638
395k
        (ctxt->instate == XML_PARSER_EOF))
639
0
  return;
640
395k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
641
395k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
642
258k
        schannel = ctxt->sax->serror;
643
395k
    if (ctxt != NULL) {
644
395k
        __xmlRaiseError(schannel,
645
395k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
646
395k
                    ctxt->userData,
647
395k
                    ctxt, NULL, XML_FROM_PARSER, error,
648
395k
                    XML_ERR_WARNING, NULL, 0,
649
395k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
650
395k
        msg, (const char *) str1, (const char *) str2);
651
395k
    } else {
652
0
        __xmlRaiseError(schannel, NULL, NULL,
653
0
                    ctxt, NULL, XML_FROM_PARSER, error,
654
0
                    XML_ERR_WARNING, NULL, 0,
655
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
656
0
        msg, (const char *) str1, (const char *) str2);
657
0
    }
658
395k
}
659
660
/**
661
 * xmlValidityError:
662
 * @ctxt:  an XML parser context
663
 * @error:  the error number
664
 * @msg:  the error message
665
 * @str1:  extra data
666
 *
667
 * Handle a validity error.
668
 */
669
static void LIBXML_ATTR_FORMAT(3,0)
670
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
671
              const char *msg, const xmlChar *str1, const xmlChar *str2)
672
20.7k
{
673
20.7k
    xmlStructuredErrorFunc schannel = NULL;
674
675
20.7k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676
20.7k
        (ctxt->instate == XML_PARSER_EOF))
677
0
  return;
678
20.7k
    if (ctxt != NULL) {
679
20.7k
  ctxt->errNo = error;
680
20.7k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
681
13.8k
      schannel = ctxt->sax->serror;
682
20.7k
    }
683
20.7k
    if (ctxt != NULL) {
684
20.7k
        __xmlRaiseError(schannel,
685
20.7k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
686
20.7k
                    ctxt, NULL, XML_FROM_DTD, error,
687
20.7k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
688
20.7k
        (const char *) str2, NULL, 0, 0,
689
20.7k
        msg, (const char *) str1, (const char *) str2);
690
20.7k
  ctxt->valid = 0;
691
20.7k
    } else {
692
0
        __xmlRaiseError(schannel, NULL, NULL,
693
0
                    ctxt, NULL, XML_FROM_DTD, error,
694
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
695
0
        (const char *) str2, NULL, 0, 0,
696
0
        msg, (const char *) str1, (const char *) str2);
697
0
    }
698
20.7k
}
699
700
/**
701
 * xmlFatalErrMsgInt:
702
 * @ctxt:  an XML parser context
703
 * @error:  the error number
704
 * @msg:  the error message
705
 * @val:  an integer value
706
 *
707
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
708
 */
709
static void LIBXML_ATTR_FORMAT(3,0)
710
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
711
                  const char *msg, int val)
712
122M
{
713
122M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714
122M
        (ctxt->instate == XML_PARSER_EOF))
715
0
  return;
716
122M
    if (ctxt != NULL)
717
122M
  ctxt->errNo = error;
718
122M
    __xmlRaiseError(NULL, NULL, NULL,
719
122M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
720
122M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
721
122M
    if (ctxt != NULL) {
722
122M
  ctxt->wellFormed = 0;
723
122M
  if (ctxt->recovery == 0)
724
112M
      ctxt->disableSAX = 1;
725
122M
    }
726
122M
}
727
728
/**
729
 * xmlFatalErrMsgStrIntStr:
730
 * @ctxt:  an XML parser context
731
 * @error:  the error number
732
 * @msg:  the error message
733
 * @str1:  an string info
734
 * @val:  an integer value
735
 * @str2:  an string info
736
 *
737
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
738
 */
739
static void LIBXML_ATTR_FORMAT(3,0)
740
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
741
                  const char *msg, const xmlChar *str1, int val,
742
      const xmlChar *str2)
743
21.6M
{
744
21.6M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
745
21.6M
        (ctxt->instate == XML_PARSER_EOF))
746
0
  return;
747
21.6M
    if (ctxt != NULL)
748
21.6M
  ctxt->errNo = error;
749
21.6M
    __xmlRaiseError(NULL, NULL, NULL,
750
21.6M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
751
21.6M
                    NULL, 0, (const char *) str1, (const char *) str2,
752
21.6M
        NULL, val, 0, msg, str1, val, str2);
753
21.6M
    if (ctxt != NULL) {
754
21.6M
  ctxt->wellFormed = 0;
755
21.6M
  if (ctxt->recovery == 0)
756
19.1M
      ctxt->disableSAX = 1;
757
21.6M
    }
758
21.6M
}
759
760
/**
761
 * xmlFatalErrMsgStr:
762
 * @ctxt:  an XML parser context
763
 * @error:  the error number
764
 * @msg:  the error message
765
 * @val:  a string value
766
 *
767
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
768
 */
769
static void LIBXML_ATTR_FORMAT(3,0)
770
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
771
                  const char *msg, const xmlChar * val)
772
18.8M
{
773
18.8M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
774
18.8M
        (ctxt->instate == XML_PARSER_EOF))
775
0
  return;
776
18.8M
    if (ctxt != NULL)
777
18.8M
  ctxt->errNo = error;
778
18.8M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
779
18.8M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
780
18.8M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
781
18.8M
                    val);
782
18.8M
    if (ctxt != NULL) {
783
18.8M
  ctxt->wellFormed = 0;
784
18.8M
  if (ctxt->recovery == 0)
785
15.0M
      ctxt->disableSAX = 1;
786
18.8M
    }
787
18.8M
}
788
789
/**
790
 * xmlErrMsgStr:
791
 * @ctxt:  an XML parser context
792
 * @error:  the error number
793
 * @msg:  the error message
794
 * @val:  a string value
795
 *
796
 * Handle a non fatal parser error
797
 */
798
static void LIBXML_ATTR_FORMAT(3,0)
799
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
800
                  const char *msg, const xmlChar * val)
801
95.7k
{
802
95.7k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
803
95.7k
        (ctxt->instate == XML_PARSER_EOF))
804
0
  return;
805
95.7k
    if (ctxt != NULL)
806
95.7k
  ctxt->errNo = error;
807
95.7k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
808
95.7k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
809
95.7k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
810
95.7k
                    val);
811
95.7k
}
812
813
/**
814
 * xmlNsErr:
815
 * @ctxt:  an XML parser context
816
 * @error:  the error number
817
 * @msg:  the message
818
 * @info1:  extra information string
819
 * @info2:  extra information string
820
 *
821
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
822
 */
823
static void LIBXML_ATTR_FORMAT(3,0)
824
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
825
         const char *msg,
826
         const xmlChar * info1, const xmlChar * info2,
827
         const xmlChar * info3)
828
1.39M
{
829
1.39M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
830
1.39M
        (ctxt->instate == XML_PARSER_EOF))
831
0
  return;
832
1.39M
    if (ctxt != NULL)
833
1.39M
  ctxt->errNo = error;
834
1.39M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
835
1.39M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
836
1.39M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
837
1.39M
                    info1, info2, info3);
838
1.39M
    if (ctxt != NULL)
839
1.39M
  ctxt->nsWellFormed = 0;
840
1.39M
}
841
842
/**
843
 * xmlNsWarn
844
 * @ctxt:  an XML parser context
845
 * @error:  the error number
846
 * @msg:  the message
847
 * @info1:  extra information string
848
 * @info2:  extra information string
849
 *
850
 * Handle a namespace warning error
851
 */
852
static void LIBXML_ATTR_FORMAT(3,0)
853
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
854
         const char *msg,
855
         const xmlChar * info1, const xmlChar * info2,
856
         const xmlChar * info3)
857
52.0k
{
858
52.0k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
859
52.0k
        (ctxt->instate == XML_PARSER_EOF))
860
0
  return;
861
52.0k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
862
52.0k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
863
52.0k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
864
52.0k
                    info1, info2, info3);
865
52.0k
}
866
867
/************************************************************************
868
 *                  *
869
 *    Library wide options          *
870
 *                  *
871
 ************************************************************************/
872
873
/**
874
  * xmlHasFeature:
875
  * @feature: the feature to be examined
876
  *
877
  * Examines if the library has been compiled with a given feature.
878
  *
879
  * Returns a non-zero value if the feature exist, otherwise zero.
880
  * Returns zero (0) if the feature does not exist or an unknown
881
  * unknown feature is requested, non-zero otherwise.
882
  */
883
int
884
xmlHasFeature(xmlFeature feature)
885
0
{
886
0
    switch (feature) {
887
0
  case XML_WITH_THREAD:
888
0
#ifdef LIBXML_THREAD_ENABLED
889
0
      return(1);
890
#else
891
      return(0);
892
#endif
893
0
        case XML_WITH_TREE:
894
0
#ifdef LIBXML_TREE_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_OUTPUT:
900
0
#ifdef LIBXML_OUTPUT_ENABLED
901
0
            return(1);
902
#else
903
            return(0);
904
#endif
905
0
        case XML_WITH_PUSH:
906
0
#ifdef LIBXML_PUSH_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_READER:
912
0
#ifdef LIBXML_READER_ENABLED
913
0
            return(1);
914
#else
915
            return(0);
916
#endif
917
0
        case XML_WITH_PATTERN:
918
0
#ifdef LIBXML_PATTERN_ENABLED
919
0
            return(1);
920
#else
921
            return(0);
922
#endif
923
0
        case XML_WITH_WRITER:
924
0
#ifdef LIBXML_WRITER_ENABLED
925
0
            return(1);
926
#else
927
            return(0);
928
#endif
929
0
        case XML_WITH_SAX1:
930
0
#ifdef LIBXML_SAX1_ENABLED
931
0
            return(1);
932
#else
933
            return(0);
934
#endif
935
0
        case XML_WITH_FTP:
936
#ifdef LIBXML_FTP_ENABLED
937
            return(1);
938
#else
939
0
            return(0);
940
0
#endif
941
0
        case XML_WITH_HTTP:
942
#ifdef LIBXML_HTTP_ENABLED
943
            return(1);
944
#else
945
0
            return(0);
946
0
#endif
947
0
        case XML_WITH_VALID:
948
0
#ifdef LIBXML_VALID_ENABLED
949
0
            return(1);
950
#else
951
            return(0);
952
#endif
953
0
        case XML_WITH_HTML:
954
0
#ifdef LIBXML_HTML_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_LEGACY:
960
#ifdef LIBXML_LEGACY_ENABLED
961
            return(1);
962
#else
963
0
            return(0);
964
0
#endif
965
0
        case XML_WITH_C14N:
966
0
#ifdef LIBXML_C14N_ENABLED
967
0
            return(1);
968
#else
969
            return(0);
970
#endif
971
0
        case XML_WITH_CATALOG:
972
0
#ifdef LIBXML_CATALOG_ENABLED
973
0
            return(1);
974
#else
975
            return(0);
976
#endif
977
0
        case XML_WITH_XPATH:
978
0
#ifdef LIBXML_XPATH_ENABLED
979
0
            return(1);
980
#else
981
            return(0);
982
#endif
983
0
        case XML_WITH_XPTR:
984
0
#ifdef LIBXML_XPTR_ENABLED
985
0
            return(1);
986
#else
987
            return(0);
988
#endif
989
0
        case XML_WITH_XINCLUDE:
990
0
#ifdef LIBXML_XINCLUDE_ENABLED
991
0
            return(1);
992
#else
993
            return(0);
994
#endif
995
0
        case XML_WITH_ICONV:
996
0
#ifdef LIBXML_ICONV_ENABLED
997
0
            return(1);
998
#else
999
            return(0);
1000
#endif
1001
0
        case XML_WITH_ISO8859X:
1002
0
#ifdef LIBXML_ISO8859X_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_UNICODE:
1008
0
#ifdef LIBXML_UNICODE_ENABLED
1009
0
            return(1);
1010
#else
1011
            return(0);
1012
#endif
1013
0
        case XML_WITH_REGEXP:
1014
0
#ifdef LIBXML_REGEXP_ENABLED
1015
0
            return(1);
1016
#else
1017
            return(0);
1018
#endif
1019
0
        case XML_WITH_AUTOMATA:
1020
0
#ifdef LIBXML_AUTOMATA_ENABLED
1021
0
            return(1);
1022
#else
1023
            return(0);
1024
#endif
1025
0
        case XML_WITH_EXPR:
1026
#ifdef LIBXML_EXPR_ENABLED
1027
            return(1);
1028
#else
1029
0
            return(0);
1030
0
#endif
1031
0
        case XML_WITH_SCHEMAS:
1032
0
#ifdef LIBXML_SCHEMAS_ENABLED
1033
0
            return(1);
1034
#else
1035
            return(0);
1036
#endif
1037
0
        case XML_WITH_SCHEMATRON:
1038
0
#ifdef LIBXML_SCHEMATRON_ENABLED
1039
0
            return(1);
1040
#else
1041
            return(0);
1042
#endif
1043
0
        case XML_WITH_MODULES:
1044
0
#ifdef LIBXML_MODULES_ENABLED
1045
0
            return(1);
1046
#else
1047
            return(0);
1048
#endif
1049
0
        case XML_WITH_DEBUG:
1050
#ifdef LIBXML_DEBUG_ENABLED
1051
            return(1);
1052
#else
1053
0
            return(0);
1054
0
#endif
1055
0
        case XML_WITH_DEBUG_MEM:
1056
#ifdef DEBUG_MEMORY_LOCATION
1057
            return(1);
1058
#else
1059
0
            return(0);
1060
0
#endif
1061
0
        case XML_WITH_DEBUG_RUN:
1062
0
            return(0);
1063
0
        case XML_WITH_ZLIB:
1064
0
#ifdef LIBXML_ZLIB_ENABLED
1065
0
            return(1);
1066
#else
1067
            return(0);
1068
#endif
1069
0
        case XML_WITH_LZMA:
1070
0
#ifdef LIBXML_LZMA_ENABLED
1071
0
            return(1);
1072
#else
1073
            return(0);
1074
#endif
1075
0
        case XML_WITH_ICU:
1076
#ifdef LIBXML_ICU_ENABLED
1077
            return(1);
1078
#else
1079
0
            return(0);
1080
0
#endif
1081
0
        default:
1082
0
      break;
1083
0
     }
1084
0
     return(0);
1085
0
}
1086
1087
/************************************************************************
1088
 *                  *
1089
 *    SAX2 defaulted attributes handling      *
1090
 *                  *
1091
 ************************************************************************/
1092
1093
/**
1094
 * xmlDetectSAX2:
1095
 * @ctxt:  an XML parser context
1096
 *
1097
 * Do the SAX2 detection and specific initialization
1098
 */
1099
static void
1100
6.39M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1101
6.39M
    xmlSAXHandlerPtr sax;
1102
1103
    /* Avoid unused variable warning if features are disabled. */
1104
6.39M
    (void) sax;
1105
1106
6.39M
    if (ctxt == NULL) return;
1107
6.39M
    sax = ctxt->sax;
1108
6.39M
#ifdef LIBXML_SAX1_ENABLED
1109
6.39M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1110
6.39M
        ((sax->startElementNs != NULL) ||
1111
1.64M
         (sax->endElementNs != NULL) ||
1112
1.64M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1113
1.64M
        ctxt->sax2 = 1;
1114
#else
1115
    ctxt->sax2 = 1;
1116
#endif /* LIBXML_SAX1_ENABLED */
1117
1118
6.39M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1119
6.39M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1120
6.39M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1121
6.39M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1122
6.39M
    (ctxt->str_xml_ns == NULL)) {
1123
0
        xmlErrMemory(ctxt, NULL);
1124
0
    }
1125
6.39M
}
1126
1127
typedef struct _xmlDefAttrs xmlDefAttrs;
1128
typedef xmlDefAttrs *xmlDefAttrsPtr;
1129
struct _xmlDefAttrs {
1130
    int nbAttrs;  /* number of defaulted attributes on that element */
1131
    int maxAttrs;       /* the size of the array */
1132
#if __STDC_VERSION__ >= 199901L
1133
    /* Using a C99 flexible array member avoids UBSan errors. */
1134
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1135
#else
1136
    const xmlChar *values[5];
1137
#endif
1138
};
1139
1140
/**
1141
 * xmlAttrNormalizeSpace:
1142
 * @src: the source string
1143
 * @dst: the target string
1144
 *
1145
 * Normalize the space in non CDATA attribute values:
1146
 * If the attribute type is not CDATA, then the XML processor MUST further
1147
 * process the normalized attribute value by discarding any leading and
1148
 * trailing space (#x20) characters, and by replacing sequences of space
1149
 * (#x20) characters by a single space (#x20) character.
1150
 * Note that the size of dst need to be at least src, and if one doesn't need
1151
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1152
 * passing src as dst is just fine.
1153
 *
1154
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1155
 *         is needed.
1156
 */
1157
static xmlChar *
1158
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1159
137k
{
1160
137k
    if ((src == NULL) || (dst == NULL))
1161
0
        return(NULL);
1162
1163
205k
    while (*src == 0x20) src++;
1164
4.00M
    while (*src != 0) {
1165
3.86M
  if (*src == 0x20) {
1166
394k
      while (*src == 0x20) src++;
1167
92.4k
      if (*src != 0)
1168
79.1k
    *dst++ = 0x20;
1169
3.77M
  } else {
1170
3.77M
      *dst++ = *src++;
1171
3.77M
  }
1172
3.86M
    }
1173
137k
    *dst = 0;
1174
137k
    if (dst == src)
1175
111k
       return(NULL);
1176
26.4k
    return(dst);
1177
137k
}
1178
1179
/**
1180
 * xmlAttrNormalizeSpace2:
1181
 * @src: the source string
1182
 *
1183
 * Normalize the space in non CDATA attribute values, a slightly more complex
1184
 * front end to avoid allocation problems when running on attribute values
1185
 * coming from the input.
1186
 *
1187
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1188
 *         is needed.
1189
 */
1190
static const xmlChar *
1191
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1192
70.5k
{
1193
70.5k
    int i;
1194
70.5k
    int remove_head = 0;
1195
70.5k
    int need_realloc = 0;
1196
70.5k
    const xmlChar *cur;
1197
1198
70.5k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1199
0
        return(NULL);
1200
70.5k
    i = *len;
1201
70.5k
    if (i <= 0)
1202
2.64k
        return(NULL);
1203
1204
67.8k
    cur = src;
1205
85.8k
    while (*cur == 0x20) {
1206
17.9k
        cur++;
1207
17.9k
  remove_head++;
1208
17.9k
    }
1209
1.52M
    while (*cur != 0) {
1210
1.47M
  if (*cur == 0x20) {
1211
59.4k
      cur++;
1212
59.4k
      if ((*cur == 0x20) || (*cur == 0)) {
1213
10.4k
          need_realloc = 1;
1214
10.4k
    break;
1215
10.4k
      }
1216
59.4k
  } else
1217
1.41M
      cur++;
1218
1.47M
    }
1219
67.8k
    if (need_realloc) {
1220
10.4k
        xmlChar *ret;
1221
1222
10.4k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1223
10.4k
  if (ret == NULL) {
1224
0
      xmlErrMemory(ctxt, NULL);
1225
0
      return(NULL);
1226
0
  }
1227
10.4k
  xmlAttrNormalizeSpace(ret, ret);
1228
10.4k
  *len = strlen((const char *)ret);
1229
10.4k
        return(ret);
1230
57.3k
    } else if (remove_head) {
1231
1.39k
        *len -= remove_head;
1232
1.39k
        memmove(src, src + remove_head, 1 + *len);
1233
1.39k
  return(src);
1234
1.39k
    }
1235
55.9k
    return(NULL);
1236
67.8k
}
1237
1238
/**
1239
 * xmlAddDefAttrs:
1240
 * @ctxt:  an XML parser context
1241
 * @fullname:  the element fullname
1242
 * @fullattr:  the attribute fullname
1243
 * @value:  the attribute value
1244
 *
1245
 * Add a defaulted attribute for an element
1246
 */
1247
static void
1248
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1249
               const xmlChar *fullname,
1250
               const xmlChar *fullattr,
1251
401k
               const xmlChar *value) {
1252
401k
    xmlDefAttrsPtr defaults;
1253
401k
    int len;
1254
401k
    const xmlChar *name;
1255
401k
    const xmlChar *prefix;
1256
1257
    /*
1258
     * Allows to detect attribute redefinitions
1259
     */
1260
401k
    if (ctxt->attsSpecial != NULL) {
1261
361k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1262
227k
      return;
1263
361k
    }
1264
1265
173k
    if (ctxt->attsDefault == NULL) {
1266
48.2k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1267
48.2k
  if (ctxt->attsDefault == NULL)
1268
0
      goto mem_error;
1269
48.2k
    }
1270
1271
    /*
1272
     * split the element name into prefix:localname , the string found
1273
     * are within the DTD and then not associated to namespace names.
1274
     */
1275
173k
    name = xmlSplitQName3(fullname, &len);
1276
173k
    if (name == NULL) {
1277
148k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1278
148k
  prefix = NULL;
1279
148k
    } else {
1280
25.3k
        name = xmlDictLookup(ctxt->dict, name, -1);
1281
25.3k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1282
25.3k
    }
1283
1284
    /*
1285
     * make sure there is some storage
1286
     */
1287
173k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1288
173k
    if (defaults == NULL) {
1289
93.0k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1290
93.0k
                     (4 * 5) * sizeof(const xmlChar *));
1291
93.0k
  if (defaults == NULL)
1292
0
      goto mem_error;
1293
93.0k
  defaults->nbAttrs = 0;
1294
93.0k
  defaults->maxAttrs = 4;
1295
93.0k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1296
93.0k
                          defaults, NULL) < 0) {
1297
0
      xmlFree(defaults);
1298
0
      goto mem_error;
1299
0
  }
1300
93.0k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1301
2.98k
        xmlDefAttrsPtr temp;
1302
1303
2.98k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1304
2.98k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1305
2.98k
  if (temp == NULL)
1306
0
      goto mem_error;
1307
2.98k
  defaults = temp;
1308
2.98k
  defaults->maxAttrs *= 2;
1309
2.98k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1310
2.98k
                          defaults, NULL) < 0) {
1311
0
      xmlFree(defaults);
1312
0
      goto mem_error;
1313
0
  }
1314
2.98k
    }
1315
1316
    /*
1317
     * Split the element name into prefix:localname , the string found
1318
     * are within the DTD and hen not associated to namespace names.
1319
     */
1320
173k
    name = xmlSplitQName3(fullattr, &len);
1321
173k
    if (name == NULL) {
1322
114k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1323
114k
  prefix = NULL;
1324
114k
    } else {
1325
59.2k
        name = xmlDictLookup(ctxt->dict, name, -1);
1326
59.2k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1327
59.2k
    }
1328
1329
173k
    defaults->values[5 * defaults->nbAttrs] = name;
1330
173k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1331
    /* intern the string and precompute the end */
1332
173k
    len = xmlStrlen(value);
1333
173k
    value = xmlDictLookup(ctxt->dict, value, len);
1334
173k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1335
173k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1336
173k
    if (ctxt->external)
1337
99.4k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1338
74.0k
    else
1339
74.0k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1340
173k
    defaults->nbAttrs++;
1341
1342
173k
    return;
1343
1344
0
mem_error:
1345
0
    xmlErrMemory(ctxt, NULL);
1346
0
    return;
1347
173k
}
1348
1349
/**
1350
 * xmlAddSpecialAttr:
1351
 * @ctxt:  an XML parser context
1352
 * @fullname:  the element fullname
1353
 * @fullattr:  the attribute fullname
1354
 * @type:  the attribute type
1355
 *
1356
 * Register this attribute type
1357
 */
1358
static void
1359
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1360
      const xmlChar *fullname,
1361
      const xmlChar *fullattr,
1362
      int type)
1363
2.79M
{
1364
2.79M
    if (ctxt->attsSpecial == NULL) {
1365
103k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1366
103k
  if (ctxt->attsSpecial == NULL)
1367
0
      goto mem_error;
1368
103k
    }
1369
1370
2.79M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1371
1.08M
        return;
1372
1373
1.70M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1374
1.70M
                     (void *) (ptrdiff_t) type);
1375
1.70M
    return;
1376
1377
0
mem_error:
1378
0
    xmlErrMemory(ctxt, NULL);
1379
0
    return;
1380
2.79M
}
1381
1382
/**
1383
 * xmlCleanSpecialAttrCallback:
1384
 *
1385
 * Removes CDATA attributes from the special attribute table
1386
 */
1387
static void
1388
xmlCleanSpecialAttrCallback(void *payload, void *data,
1389
                            const xmlChar *fullname, const xmlChar *fullattr,
1390
1.70M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1391
1.70M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1392
1393
1.70M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1394
507k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1395
507k
    }
1396
1.70M
}
1397
1398
/**
1399
 * xmlCleanSpecialAttr:
1400
 * @ctxt:  an XML parser context
1401
 *
1402
 * Trim the list of attributes defined to remove all those of type
1403
 * CDATA as they are not special. This call should be done when finishing
1404
 * to parse the DTD and before starting to parse the document root.
1405
 */
1406
static void
1407
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1408
438k
{
1409
438k
    if (ctxt->attsSpecial == NULL)
1410
337k
        return;
1411
1412
101k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1413
1414
101k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1415
20.3k
        xmlHashFree(ctxt->attsSpecial, NULL);
1416
20.3k
        ctxt->attsSpecial = NULL;
1417
20.3k
    }
1418
101k
    return;
1419
438k
}
1420
1421
/**
1422
 * xmlCheckLanguageID:
1423
 * @lang:  pointer to the string value
1424
 *
1425
 * Checks that the value conforms to the LanguageID production:
1426
 *
1427
 * NOTE: this is somewhat deprecated, those productions were removed from
1428
 *       the XML Second edition.
1429
 *
1430
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1431
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1432
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1433
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1434
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1435
 * [38] Subcode ::= ([a-z] | [A-Z])+
1436
 *
1437
 * The current REC reference the successors of RFC 1766, currently 5646
1438
 *
1439
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1440
 * langtag       = language
1441
 *                 ["-" script]
1442
 *                 ["-" region]
1443
 *                 *("-" variant)
1444
 *                 *("-" extension)
1445
 *                 ["-" privateuse]
1446
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1447
 *                 ["-" extlang]       ; sometimes followed by
1448
 *                                     ; extended language subtags
1449
 *               / 4ALPHA              ; or reserved for future use
1450
 *               / 5*8ALPHA            ; or registered language subtag
1451
 *
1452
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1453
 *                 *2("-" 3ALPHA)      ; permanently reserved
1454
 *
1455
 * script        = 4ALPHA              ; ISO 15924 code
1456
 *
1457
 * region        = 2ALPHA              ; ISO 3166-1 code
1458
 *               / 3DIGIT              ; UN M.49 code
1459
 *
1460
 * variant       = 5*8alphanum         ; registered variants
1461
 *               / (DIGIT 3alphanum)
1462
 *
1463
 * extension     = singleton 1*("-" (2*8alphanum))
1464
 *
1465
 *                                     ; Single alphanumerics
1466
 *                                     ; "x" reserved for private use
1467
 * singleton     = DIGIT               ; 0 - 9
1468
 *               / %x41-57             ; A - W
1469
 *               / %x59-5A             ; Y - Z
1470
 *               / %x61-77             ; a - w
1471
 *               / %x79-7A             ; y - z
1472
 *
1473
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1474
 * The parser below doesn't try to cope with extension or privateuse
1475
 * that could be added but that's not interoperable anyway
1476
 *
1477
 * Returns 1 if correct 0 otherwise
1478
 **/
1479
int
1480
xmlCheckLanguageID(const xmlChar * lang)
1481
38.5k
{
1482
38.5k
    const xmlChar *cur = lang, *nxt;
1483
1484
38.5k
    if (cur == NULL)
1485
1.21k
        return (0);
1486
37.2k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1487
37.2k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1488
37.2k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1489
37.2k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1490
        /*
1491
         * Still allow IANA code and user code which were coming
1492
         * from the previous version of the XML-1.0 specification
1493
         * it's deprecated but we should not fail
1494
         */
1495
1.35k
        cur += 2;
1496
15.1k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1497
15.1k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1498
13.8k
            cur++;
1499
1.35k
        return(cur[0] == 0);
1500
1.35k
    }
1501
35.9k
    nxt = cur;
1502
135k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1503
135k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1504
99.3k
           nxt++;
1505
35.9k
    if (nxt - cur >= 4) {
1506
        /*
1507
         * Reserved
1508
         */
1509
2.09k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1510
1.78k
            return(0);
1511
303
        return(1);
1512
2.09k
    }
1513
33.8k
    if (nxt - cur < 2)
1514
2.12k
        return(0);
1515
    /* we got an ISO 639 code */
1516
31.7k
    if (nxt[0] == 0)
1517
20.2k
        return(1);
1518
11.5k
    if (nxt[0] != '-')
1519
1.26k
        return(0);
1520
1521
10.2k
    nxt++;
1522
10.2k
    cur = nxt;
1523
    /* now we can have extlang or script or region or variant */
1524
10.2k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1525
1.38k
        goto region_m49;
1526
1527
46.4k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1528
46.4k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1529
37.5k
           nxt++;
1530
8.86k
    if (nxt - cur == 4)
1531
2.92k
        goto script;
1532
5.94k
    if (nxt - cur == 2)
1533
1.49k
        goto region;
1534
4.45k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535
726
        goto variant;
1536
3.72k
    if (nxt - cur != 3)
1537
863
        return(0);
1538
    /* we parsed an extlang */
1539
2.86k
    if (nxt[0] == 0)
1540
11
        return(1);
1541
2.85k
    if (nxt[0] != '-')
1542
576
        return(0);
1543
1544
2.27k
    nxt++;
1545
2.27k
    cur = nxt;
1546
    /* now we can have script or region or variant */
1547
2.27k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1548
170
        goto region_m49;
1549
1550
16.9k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1551
16.9k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1552
14.8k
           nxt++;
1553
2.10k
    if (nxt - cur == 2)
1554
297
        goto region;
1555
1.80k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1556
582
        goto variant;
1557
1.22k
    if (nxt - cur != 4)
1558
563
        return(0);
1559
    /* we parsed a script */
1560
3.58k
script:
1561
3.58k
    if (nxt[0] == 0)
1562
74
        return(1);
1563
3.51k
    if (nxt[0] != '-')
1564
693
        return(0);
1565
1566
2.82k
    nxt++;
1567
2.82k
    cur = nxt;
1568
    /* now we can have region or variant */
1569
2.82k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1570
355
        goto region_m49;
1571
1572
18.1k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1573
18.1k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1574
15.6k
           nxt++;
1575
1576
2.46k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1577
407
        goto variant;
1578
2.05k
    if (nxt - cur != 2)
1579
785
        return(0);
1580
    /* we parsed a region */
1581
4.07k
region:
1582
4.07k
    if (nxt[0] == 0)
1583
584
        return(1);
1584
3.49k
    if (nxt[0] != '-')
1585
2.00k
        return(0);
1586
1587
1.48k
    nxt++;
1588
1.48k
    cur = nxt;
1589
    /* now we can just have a variant */
1590
89.3k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1591
89.3k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1592
87.8k
           nxt++;
1593
1594
1.48k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1595
1.07k
        return(0);
1596
1597
    /* we parsed a variant */
1598
2.12k
variant:
1599
2.12k
    if (nxt[0] == 0)
1600
145
        return(1);
1601
1.97k
    if (nxt[0] != '-')
1602
1.55k
        return(0);
1603
    /* extensions and private use subtags not checked */
1604
428
    return (1);
1605
1606
1.91k
region_m49:
1607
1.91k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1608
1.91k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1609
1.01k
        nxt += 3;
1610
1.01k
        goto region;
1611
1.01k
    }
1612
902
    return(0);
1613
1.91k
}
1614
1615
/************************************************************************
1616
 *                  *
1617
 *    Parser stacks related functions and macros    *
1618
 *                  *
1619
 ************************************************************************/
1620
1621
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1622
                                            const xmlChar ** str);
1623
1624
#ifdef SAX2
1625
/**
1626
 * nsPush:
1627
 * @ctxt:  an XML parser context
1628
 * @prefix:  the namespace prefix or NULL
1629
 * @URL:  the namespace name
1630
 *
1631
 * Pushes a new parser namespace on top of the ns stack
1632
 *
1633
 * Returns -1 in case of error, -2 if the namespace should be discarded
1634
 *     and the index in the stack otherwise.
1635
 */
1636
static int
1637
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1638
1.29M
{
1639
1.29M
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1640
266k
        int i;
1641
385k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1642
208k
      if (ctxt->nsTab[i] == prefix) {
1643
    /* in scope */
1644
89.4k
          if (ctxt->nsTab[i + 1] == URL)
1645
29.7k
        return(-2);
1646
    /* out of scope keep it */
1647
59.6k
    break;
1648
89.4k
      }
1649
208k
  }
1650
266k
    }
1651
1.26M
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1652
251k
  ctxt->nsMax = 10;
1653
251k
  ctxt->nsNr = 0;
1654
251k
  ctxt->nsTab = (const xmlChar **)
1655
251k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1656
251k
  if (ctxt->nsTab == NULL) {
1657
0
      xmlErrMemory(ctxt, NULL);
1658
0
      ctxt->nsMax = 0;
1659
0
            return (-1);
1660
0
  }
1661
1.01M
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1662
22.8k
        const xmlChar ** tmp;
1663
22.8k
        ctxt->nsMax *= 2;
1664
22.8k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1665
22.8k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1666
22.8k
        if (tmp == NULL) {
1667
0
            xmlErrMemory(ctxt, NULL);
1668
0
      ctxt->nsMax /= 2;
1669
0
            return (-1);
1670
0
        }
1671
22.8k
  ctxt->nsTab = tmp;
1672
22.8k
    }
1673
1.26M
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1674
1.26M
    ctxt->nsTab[ctxt->nsNr++] = URL;
1675
1.26M
    return (ctxt->nsNr);
1676
1.26M
}
1677
/**
1678
 * nsPop:
1679
 * @ctxt: an XML parser context
1680
 * @nr:  the number to pop
1681
 *
1682
 * Pops the top @nr parser prefix/namespace from the ns stack
1683
 *
1684
 * Returns the number of namespaces removed
1685
 */
1686
static int
1687
nsPop(xmlParserCtxtPtr ctxt, int nr)
1688
229k
{
1689
229k
    int i;
1690
1691
229k
    if (ctxt->nsTab == NULL) return(0);
1692
229k
    if (ctxt->nsNr < nr) {
1693
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1694
0
        nr = ctxt->nsNr;
1695
0
    }
1696
229k
    if (ctxt->nsNr <= 0)
1697
0
        return (0);
1698
1699
731k
    for (i = 0;i < nr;i++) {
1700
501k
         ctxt->nsNr--;
1701
501k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1702
501k
    }
1703
229k
    return(nr);
1704
229k
}
1705
#endif
1706
1707
static int
1708
344k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1709
344k
    const xmlChar **atts;
1710
344k
    int *attallocs;
1711
344k
    int maxatts;
1712
1713
344k
    if (ctxt->atts == NULL) {
1714
342k
  maxatts = 55; /* allow for 10 attrs by default */
1715
342k
  atts = (const xmlChar **)
1716
342k
         xmlMalloc(maxatts * sizeof(xmlChar *));
1717
342k
  if (atts == NULL) goto mem_error;
1718
342k
  ctxt->atts = atts;
1719
342k
  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1720
342k
  if (attallocs == NULL) goto mem_error;
1721
342k
  ctxt->attallocs = attallocs;
1722
342k
  ctxt->maxatts = maxatts;
1723
342k
    } else if (nr + 5 > ctxt->maxatts) {
1724
1.50k
  maxatts = (nr + 5) * 2;
1725
1.50k
  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1726
1.50k
             maxatts * sizeof(const xmlChar *));
1727
1.50k
  if (atts == NULL) goto mem_error;
1728
1.50k
  ctxt->atts = atts;
1729
1.50k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1730
1.50k
                               (maxatts / 5) * sizeof(int));
1731
1.50k
  if (attallocs == NULL) goto mem_error;
1732
1.50k
  ctxt->attallocs = attallocs;
1733
1.50k
  ctxt->maxatts = maxatts;
1734
1.50k
    }
1735
344k
    return(ctxt->maxatts);
1736
0
mem_error:
1737
0
    xmlErrMemory(ctxt, NULL);
1738
0
    return(-1);
1739
344k
}
1740
1741
/**
1742
 * inputPush:
1743
 * @ctxt:  an XML parser context
1744
 * @value:  the parser input
1745
 *
1746
 * Pushes a new parser input on top of the input stack
1747
 *
1748
 * Returns -1 in case of error, the index in the stack otherwise
1749
 */
1750
int
1751
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1752
7.66M
{
1753
7.66M
    if ((ctxt == NULL) || (value == NULL))
1754
0
        return(-1);
1755
7.66M
    if (ctxt->inputNr >= ctxt->inputMax) {
1756
7.30k
        ctxt->inputMax *= 2;
1757
7.30k
        ctxt->inputTab =
1758
7.30k
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1759
7.30k
                                             ctxt->inputMax *
1760
7.30k
                                             sizeof(ctxt->inputTab[0]));
1761
7.30k
        if (ctxt->inputTab == NULL) {
1762
0
            xmlErrMemory(ctxt, NULL);
1763
0
      ctxt->inputMax /= 2;
1764
0
            return (-1);
1765
0
        }
1766
7.30k
    }
1767
7.66M
    ctxt->inputTab[ctxt->inputNr] = value;
1768
7.66M
    ctxt->input = value;
1769
7.66M
    return (ctxt->inputNr++);
1770
7.66M
}
1771
/**
1772
 * inputPop:
1773
 * @ctxt: an XML parser context
1774
 *
1775
 * Pops the top parser input from the input stack
1776
 *
1777
 * Returns the input just removed
1778
 */
1779
xmlParserInputPtr
1780
inputPop(xmlParserCtxtPtr ctxt)
1781
19.8M
{
1782
19.8M
    xmlParserInputPtr ret;
1783
1784
19.8M
    if (ctxt == NULL)
1785
0
        return(NULL);
1786
19.8M
    if (ctxt->inputNr <= 0)
1787
12.2M
        return (NULL);
1788
7.58M
    ctxt->inputNr--;
1789
7.58M
    if (ctxt->inputNr > 0)
1790
1.76M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1791
5.82M
    else
1792
5.82M
        ctxt->input = NULL;
1793
7.58M
    ret = ctxt->inputTab[ctxt->inputNr];
1794
7.58M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1795
7.58M
    return (ret);
1796
19.8M
}
1797
/**
1798
 * nodePush:
1799
 * @ctxt:  an XML parser context
1800
 * @value:  the element node
1801
 *
1802
 * Pushes a new element node on top of the node stack
1803
 *
1804
 * Returns -1 in case of error, the index in the stack otherwise
1805
 */
1806
int
1807
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1808
31.4M
{
1809
31.4M
    if (ctxt == NULL) return(0);
1810
31.4M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1811
93.0k
        xmlNodePtr *tmp;
1812
1813
93.0k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1814
93.0k
                                      ctxt->nodeMax * 2 *
1815
93.0k
                                      sizeof(ctxt->nodeTab[0]));
1816
93.0k
        if (tmp == NULL) {
1817
0
            xmlErrMemory(ctxt, NULL);
1818
0
            return (-1);
1819
0
        }
1820
93.0k
        ctxt->nodeTab = tmp;
1821
93.0k
  ctxt->nodeMax *= 2;
1822
93.0k
    }
1823
31.4M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1824
31.4M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1825
3
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1826
3
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1827
3
        xmlParserMaxDepth);
1828
3
  xmlHaltParser(ctxt);
1829
3
  return(-1);
1830
3
    }
1831
31.4M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1832
31.4M
    ctxt->node = value;
1833
31.4M
    return (ctxt->nodeNr++);
1834
31.4M
}
1835
1836
/**
1837
 * nodePop:
1838
 * @ctxt: an XML parser context
1839
 *
1840
 * Pops the top element node from the node stack
1841
 *
1842
 * Returns the node just removed
1843
 */
1844
xmlNodePtr
1845
nodePop(xmlParserCtxtPtr ctxt)
1846
30.0M
{
1847
30.0M
    xmlNodePtr ret;
1848
1849
30.0M
    if (ctxt == NULL) return(NULL);
1850
30.0M
    if (ctxt->nodeNr <= 0)
1851
5.61M
        return (NULL);
1852
24.3M
    ctxt->nodeNr--;
1853
24.3M
    if (ctxt->nodeNr > 0)
1854
22.0M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1855
2.36M
    else
1856
2.36M
        ctxt->node = NULL;
1857
24.3M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1858
24.3M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1859
24.3M
    return (ret);
1860
30.0M
}
1861
1862
/**
1863
 * nameNsPush:
1864
 * @ctxt:  an XML parser context
1865
 * @value:  the element name
1866
 * @prefix:  the element prefix
1867
 * @URI:  the element namespace name
1868
 * @line:  the current line number for error messages
1869
 * @nsNr:  the number of namespaces pushed on the namespace table
1870
 *
1871
 * Pushes a new element name/prefix/URL on top of the name stack
1872
 *
1873
 * Returns -1 in case of error, the index in the stack otherwise
1874
 */
1875
static int
1876
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1877
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1878
102M
{
1879
102M
    xmlStartTag *tag;
1880
1881
102M
    if (ctxt->nameNr >= ctxt->nameMax) {
1882
395k
        const xmlChar * *tmp;
1883
395k
        xmlStartTag *tmp2;
1884
395k
        ctxt->nameMax *= 2;
1885
395k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1886
395k
                                    ctxt->nameMax *
1887
395k
                                    sizeof(ctxt->nameTab[0]));
1888
395k
        if (tmp == NULL) {
1889
0
      ctxt->nameMax /= 2;
1890
0
      goto mem_error;
1891
0
        }
1892
395k
  ctxt->nameTab = tmp;
1893
395k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1894
395k
                                    ctxt->nameMax *
1895
395k
                                    sizeof(ctxt->pushTab[0]));
1896
395k
        if (tmp2 == NULL) {
1897
0
      ctxt->nameMax /= 2;
1898
0
      goto mem_error;
1899
0
        }
1900
395k
  ctxt->pushTab = tmp2;
1901
101M
    } else if (ctxt->pushTab == NULL) {
1902
5.26M
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1903
5.26M
                                            sizeof(ctxt->pushTab[0]));
1904
5.26M
        if (ctxt->pushTab == NULL)
1905
0
            goto mem_error;
1906
5.26M
    }
1907
102M
    ctxt->nameTab[ctxt->nameNr] = value;
1908
102M
    ctxt->name = value;
1909
102M
    tag = &ctxt->pushTab[ctxt->nameNr];
1910
102M
    tag->prefix = prefix;
1911
102M
    tag->URI = URI;
1912
102M
    tag->line = line;
1913
102M
    tag->nsNr = nsNr;
1914
102M
    return (ctxt->nameNr++);
1915
0
mem_error:
1916
0
    xmlErrMemory(ctxt, NULL);
1917
0
    return (-1);
1918
102M
}
1919
#ifdef LIBXML_PUSH_ENABLED
1920
/**
1921
 * nameNsPop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element/prefix/URI name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
static const xmlChar *
1929
nameNsPop(xmlParserCtxtPtr ctxt)
1930
1.07M
{
1931
1.07M
    const xmlChar *ret;
1932
1933
1.07M
    if (ctxt->nameNr <= 0)
1934
0
        return (NULL);
1935
1.07M
    ctxt->nameNr--;
1936
1.07M
    if (ctxt->nameNr > 0)
1937
1.03M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
42.6k
    else
1939
42.6k
        ctxt->name = NULL;
1940
1.07M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
1.07M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
1.07M
    return (ret);
1943
1.07M
}
1944
#endif /* LIBXML_PUSH_ENABLED */
1945
1946
/**
1947
 * namePush:
1948
 * @ctxt:  an XML parser context
1949
 * @value:  the element name
1950
 *
1951
 * Pushes a new element name on top of the name stack
1952
 *
1953
 * Returns -1 in case of error, the index in the stack otherwise
1954
 */
1955
int
1956
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1957
0
{
1958
0
    if (ctxt == NULL) return (-1);
1959
1960
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1961
0
        const xmlChar * *tmp;
1962
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1963
0
                                    ctxt->nameMax * 2 *
1964
0
                                    sizeof(ctxt->nameTab[0]));
1965
0
        if (tmp == NULL) {
1966
0
      goto mem_error;
1967
0
        }
1968
0
  ctxt->nameTab = tmp;
1969
0
        ctxt->nameMax *= 2;
1970
0
    }
1971
0
    ctxt->nameTab[ctxt->nameNr] = value;
1972
0
    ctxt->name = value;
1973
0
    return (ctxt->nameNr++);
1974
0
mem_error:
1975
0
    xmlErrMemory(ctxt, NULL);
1976
0
    return (-1);
1977
0
}
1978
/**
1979
 * namePop:
1980
 * @ctxt: an XML parser context
1981
 *
1982
 * Pops the top element name from the name stack
1983
 *
1984
 * Returns the name just removed
1985
 */
1986
const xmlChar *
1987
namePop(xmlParserCtxtPtr ctxt)
1988
89.7M
{
1989
89.7M
    const xmlChar *ret;
1990
1991
89.7M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1992
0
        return (NULL);
1993
89.7M
    ctxt->nameNr--;
1994
89.7M
    if (ctxt->nameNr > 0)
1995
80.1M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1996
9.57M
    else
1997
9.57M
        ctxt->name = NULL;
1998
89.7M
    ret = ctxt->nameTab[ctxt->nameNr];
1999
89.7M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2000
89.7M
    return (ret);
2001
89.7M
}
2002
2003
174M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2004
174M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2005
415k
        int *tmp;
2006
2007
415k
  ctxt->spaceMax *= 2;
2008
415k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2009
415k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2010
415k
        if (tmp == NULL) {
2011
0
      xmlErrMemory(ctxt, NULL);
2012
0
      ctxt->spaceMax /=2;
2013
0
      return(-1);
2014
0
  }
2015
415k
  ctxt->spaceTab = tmp;
2016
415k
    }
2017
174M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2018
174M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2019
174M
    return(ctxt->spaceNr++);
2020
174M
}
2021
2022
163M
static int spacePop(xmlParserCtxtPtr ctxt) {
2023
163M
    int ret;
2024
163M
    if (ctxt->spaceNr <= 0) return(0);
2025
163M
    ctxt->spaceNr--;
2026
163M
    if (ctxt->spaceNr > 0)
2027
163M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2028
108k
    else
2029
108k
        ctxt->space = &ctxt->spaceTab[0];
2030
163M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2031
163M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2032
163M
    return(ret);
2033
163M
}
2034
2035
/*
2036
 * Macros for accessing the content. Those should be used only by the parser,
2037
 * and not exported.
2038
 *
2039
 * Dirty macros, i.e. one often need to make assumption on the context to
2040
 * use them
2041
 *
2042
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2043
 *           To be used with extreme caution since operations consuming
2044
 *           characters may move the input buffer to a different location !
2045
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2046
 *           This should be used internally by the parser
2047
 *           only to compare to ASCII values otherwise it would break when
2048
 *           running with UTF-8 encoding.
2049
 *   RAW     same as CUR but in the input buffer, bypass any token
2050
 *           extraction that may have been done
2051
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2052
 *           to compare on ASCII based substring.
2053
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2054
 *           strings without newlines within the parser.
2055
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2056
 *           defined char within the parser.
2057
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2058
 *
2059
 *   NEXT    Skip to the next character, this does the proper decoding
2060
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2061
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2062
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2063
 *           to the number of xmlChars used for the encoding [0-5].
2064
 *   CUR_SCHAR  same but operate on a string instead of the context
2065
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2066
 *            the index
2067
 *   GROW, SHRINK  handling of input buffers
2068
 */
2069
2070
1.98G
#define RAW (*ctxt->input->cur)
2071
264M
#define CUR (*ctxt->input->cur)
2072
1.18G
#define NXT(val) ctxt->input->cur[(val)]
2073
199M
#define CUR_PTR ctxt->input->cur
2074
6.42M
#define BASE_PTR ctxt->input->base
2075
2076
#define CMP4( s, c1, c2, c3, c4 ) \
2077
1.17G
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2078
586M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2079
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2080
1.15G
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2081
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2082
1.13G
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2083
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2084
1.12G
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2085
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2086
1.11G
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2087
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2088
554M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2089
554M
    ((unsigned char *) s)[ 8 ] == c9 )
2090
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2091
272k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2092
272k
    ((unsigned char *) s)[ 9 ] == c10 )
2093
2094
144M
#define SKIP(val) do {             \
2095
144M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2096
144M
    if (*ctxt->input->cur == 0)           \
2097
144M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2098
144M
  } while (0)
2099
2100
408k
#define SKIPL(val) do {             \
2101
408k
    int skipl;                \
2102
18.0M
    for(skipl=0; skipl<val; skipl++) {         \
2103
17.6M
  if (*(ctxt->input->cur) == '\n') {       \
2104
231k
  ctxt->input->line++; ctxt->input->col = 1;      \
2105
17.4M
  } else ctxt->input->col++;         \
2106
17.6M
  ctxt->input->cur++;           \
2107
17.6M
    }                  \
2108
408k
    if (*ctxt->input->cur == 0)           \
2109
408k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2110
408k
  } while (0)
2111
2112
1.26G
#define SHRINK if ((ctxt->progressive == 0) &&       \
2113
1.26G
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2114
1.26G
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2115
1.26G
  xmlSHRINK (ctxt);
2116
2117
1.74M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2118
1.74M
    xmlParserInputShrink(ctxt->input);
2119
1.74M
    if (*ctxt->input->cur == 0)
2120
115k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2121
1.74M
}
2122
2123
2.86G
#define GROW if ((ctxt->progressive == 0) &&       \
2124
2.86G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2125
2.86G
  xmlGROW (ctxt);
2126
2127
168M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2128
168M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2129
168M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2130
2131
168M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2132
168M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2133
168M
         ((ctxt->input->buf) &&
2134
0
          (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2135
168M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2136
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2137
0
        xmlHaltParser(ctxt);
2138
0
  return;
2139
0
    }
2140
168M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2141
168M
    if ((ctxt->input->cur > ctxt->input->end) ||
2142
168M
        (ctxt->input->cur < ctxt->input->base)) {
2143
0
        xmlHaltParser(ctxt);
2144
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2145
0
  return;
2146
0
    }
2147
168M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2148
5.79M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2149
168M
}
2150
2151
470M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2152
2153
464M
#define NEXT xmlNextChar(ctxt)
2154
2155
255M
#define NEXT1 {               \
2156
255M
  ctxt->input->col++;           \
2157
255M
  ctxt->input->cur++;           \
2158
255M
  if (*ctxt->input->cur == 0)         \
2159
255M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2160
255M
    }
2161
2162
3.31G
#define NEXTL(l) do {             \
2163
3.31G
    if (*(ctxt->input->cur) == '\n') {         \
2164
51.5M
  ctxt->input->line++; ctxt->input->col = 1;      \
2165
3.25G
    } else ctxt->input->col++;           \
2166
3.31G
    ctxt->input->cur += l;        \
2167
3.31G
  } while (0)
2168
2169
3.45G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2170
1.05G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2171
2172
#define COPY_BUF(l,b,i,v)           \
2173
4.07G
    if (l == 1) b[i++] = v;           \
2174
4.07G
    else i += xmlCopyCharMultiByte(&b[i],v)
2175
2176
#define CUR_CONSUMED \
2177
1.33G
    (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2178
2179
/**
2180
 * xmlSkipBlankChars:
2181
 * @ctxt:  the XML parser context
2182
 *
2183
 * skip all blanks character found at that point in the input streams.
2184
 * It pops up finished entities in the process if allowable at that point.
2185
 *
2186
 * Returns the number of space chars skipped
2187
 */
2188
2189
int
2190
470M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2191
470M
    int res = 0;
2192
2193
    /*
2194
     * It's Okay to use CUR/NEXT here since all the blanks are on
2195
     * the ASCII range.
2196
     */
2197
470M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2198
470M
        (ctxt->instate == XML_PARSER_START)) {
2199
415M
  const xmlChar *cur;
2200
  /*
2201
   * if we are in the document content, go really fast
2202
   */
2203
415M
  cur = ctxt->input->cur;
2204
415M
  while (IS_BLANK_CH(*cur)) {
2205
137M
      if (*cur == '\n') {
2206
5.13M
    ctxt->input->line++; ctxt->input->col = 1;
2207
131M
      } else {
2208
131M
    ctxt->input->col++;
2209
131M
      }
2210
137M
      cur++;
2211
137M
      if (res < INT_MAX)
2212
137M
    res++;
2213
137M
      if (*cur == 0) {
2214
280k
    ctxt->input->cur = cur;
2215
280k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2216
280k
    cur = ctxt->input->cur;
2217
280k
      }
2218
137M
  }
2219
415M
  ctxt->input->cur = cur;
2220
415M
    } else {
2221
55.1M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2222
2223
229M
  while (1) {
2224
229M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2225
168M
    NEXT;
2226
168M
      } else if (CUR == '%') {
2227
                /*
2228
                 * Need to handle support of entities branching here
2229
                 */
2230
6.00M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2231
1.48M
                    break;
2232
4.51M
          xmlParsePEReference(ctxt);
2233
55.1M
            } else if (CUR == 0) {
2234
1.49M
                if (ctxt->inputNr <= 1)
2235
49.9k
                    break;
2236
1.44M
                xmlPopInput(ctxt);
2237
53.6M
            } else {
2238
53.6M
                break;
2239
53.6M
            }
2240
2241
            /*
2242
             * Also increase the counter when entering or exiting a PERef.
2243
             * The spec says: "When a parameter-entity reference is recognized
2244
             * in the DTD and included, its replacement text MUST be enlarged
2245
             * by the attachment of one leading and one following space (#x20)
2246
             * character."
2247
             */
2248
174M
      if (res < INT_MAX)
2249
174M
    res++;
2250
174M
        }
2251
55.1M
    }
2252
470M
    return(res);
2253
470M
}
2254
2255
/************************************************************************
2256
 *                  *
2257
 *    Commodity functions to handle entities      *
2258
 *                  *
2259
 ************************************************************************/
2260
2261
/**
2262
 * xmlPopInput:
2263
 * @ctxt:  an XML parser context
2264
 *
2265
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2266
 *          pop it and return the next char.
2267
 *
2268
 * Returns the current xmlChar in the parser context
2269
 */
2270
xmlChar
2271
1.46M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2272
1.46M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2273
1.46M
    if (xmlParserDebugEntities)
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Popping input %d\n", ctxt->inputNr);
2276
1.46M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2277
1.46M
        (ctxt->instate != XML_PARSER_EOF))
2278
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2279
0
                    "Unfinished entity outside the DTD");
2280
1.46M
    xmlFreeInputStream(inputPop(ctxt));
2281
1.46M
    if (*ctxt->input->cur == 0)
2282
360
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2283
1.46M
    return(CUR);
2284
1.46M
}
2285
2286
/**
2287
 * xmlPushInput:
2288
 * @ctxt:  an XML parser context
2289
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2290
 *
2291
 * xmlPushInput: switch to a new input stream which is stacked on top
2292
 *               of the previous one(s).
2293
 * Returns -1 in case of error or the index in the input stack
2294
 */
2295
int
2296
1.83M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2297
1.83M
    int ret;
2298
1.83M
    if (input == NULL) return(-1);
2299
2300
1.83M
    if (xmlParserDebugEntities) {
2301
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2302
0
      xmlGenericError(xmlGenericErrorContext,
2303
0
        "%s(%d): ", ctxt->input->filename,
2304
0
        ctxt->input->line);
2305
0
  xmlGenericError(xmlGenericErrorContext,
2306
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2307
0
    }
2308
1.83M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2309
1.83M
        (ctxt->inputNr > 1024)) {
2310
1.63k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2311
298k
        while (ctxt->inputNr > 1)
2312
296k
            xmlFreeInputStream(inputPop(ctxt));
2313
1.63k
  return(-1);
2314
1.63k
    }
2315
1.83M
    ret = inputPush(ctxt, input);
2316
1.83M
    if (ctxt->instate == XML_PARSER_EOF)
2317
0
        return(-1);
2318
1.83M
    GROW;
2319
1.83M
    return(ret);
2320
1.83M
}
2321
2322
/**
2323
 * xmlParseCharRef:
2324
 * @ctxt:  an XML parser context
2325
 *
2326
 * DEPRECATED: Internal function, don't use.
2327
 *
2328
 * parse Reference declarations
2329
 *
2330
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2331
 *                  '&#x' [0-9a-fA-F]+ ';'
2332
 *
2333
 * [ WFC: Legal Character ]
2334
 * Characters referred to using character references must match the
2335
 * production for Char.
2336
 *
2337
 * Returns the value parsed (as an int), 0 in case of error
2338
 */
2339
int
2340
11.8M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2341
11.8M
    int val = 0;
2342
11.8M
    int count = 0;
2343
2344
    /*
2345
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2346
     */
2347
11.8M
    if ((RAW == '&') && (NXT(1) == '#') &&
2348
11.8M
        (NXT(2) == 'x')) {
2349
2.64M
  SKIP(3);
2350
2.64M
  GROW;
2351
6.61M
  while (RAW != ';') { /* loop blocked by count */
2352
4.30M
      if (count++ > 20) {
2353
41.8k
    count = 0;
2354
41.8k
    GROW;
2355
41.8k
                if (ctxt->instate == XML_PARSER_EOF)
2356
0
                    return(0);
2357
41.8k
      }
2358
4.30M
      if ((RAW >= '0') && (RAW <= '9'))
2359
1.80M
          val = val * 16 + (CUR - '0');
2360
2.50M
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2361
1.94M
          val = val * 16 + (CUR - 'a') + 10;
2362
558k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2363
220k
          val = val * 16 + (CUR - 'A') + 10;
2364
337k
      else {
2365
337k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2366
337k
    val = 0;
2367
337k
    break;
2368
337k
      }
2369
3.96M
      if (val > 0x110000)
2370
469k
          val = 0x110000;
2371
2372
3.96M
      NEXT;
2373
3.96M
      count++;
2374
3.96M
  }
2375
2.64M
  if (RAW == ';') {
2376
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2377
2.30M
      ctxt->input->col++;
2378
2.30M
      ctxt->input->cur++;
2379
2.30M
  }
2380
9.24M
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2381
9.24M
  SKIP(2);
2382
9.24M
  GROW;
2383
34.3M
  while (RAW != ';') { /* loop blocked by count */
2384
25.8M
      if (count++ > 20) {
2385
79.7k
    count = 0;
2386
79.7k
    GROW;
2387
79.7k
                if (ctxt->instate == XML_PARSER_EOF)
2388
0
                    return(0);
2389
79.7k
      }
2390
25.8M
      if ((RAW >= '0') && (RAW <= '9'))
2391
25.1M
          val = val * 10 + (CUR - '0');
2392
724k
      else {
2393
724k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2394
724k
    val = 0;
2395
724k
    break;
2396
724k
      }
2397
25.1M
      if (val > 0x110000)
2398
895k
          val = 0x110000;
2399
2400
25.1M
      NEXT;
2401
25.1M
      count++;
2402
25.1M
  }
2403
9.24M
  if (RAW == ';') {
2404
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2405
8.51M
      ctxt->input->col++;
2406
8.51M
      ctxt->input->cur++;
2407
8.51M
  }
2408
9.24M
    } else {
2409
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2410
0
    }
2411
2412
    /*
2413
     * [ WFC: Legal Character ]
2414
     * Characters referred to using character references must match the
2415
     * production for Char.
2416
     */
2417
11.8M
    if (val >= 0x110000) {
2418
8.23k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2419
8.23k
                "xmlParseCharRef: character reference out of bounds\n",
2420
8.23k
          val);
2421
11.8M
    } else if (IS_CHAR(val)) {
2422
10.8M
        return(val);
2423
10.8M
    } else {
2424
1.07M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2425
1.07M
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2426
1.07M
                    val);
2427
1.07M
    }
2428
1.08M
    return(0);
2429
11.8M
}
2430
2431
/**
2432
 * xmlParseStringCharRef:
2433
 * @ctxt:  an XML parser context
2434
 * @str:  a pointer to an index in the string
2435
 *
2436
 * parse Reference declarations, variant parsing from a string rather
2437
 * than an an input flow.
2438
 *
2439
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2440
 *                  '&#x' [0-9a-fA-F]+ ';'
2441
 *
2442
 * [ WFC: Legal Character ]
2443
 * Characters referred to using character references must match the
2444
 * production for Char.
2445
 *
2446
 * Returns the value parsed (as an int), 0 in case of error, str will be
2447
 *         updated to the current value of the index
2448
 */
2449
static int
2450
1.80M
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2451
1.80M
    const xmlChar *ptr;
2452
1.80M
    xmlChar cur;
2453
1.80M
    int val = 0;
2454
2455
1.80M
    if ((str == NULL) || (*str == NULL)) return(0);
2456
1.80M
    ptr = *str;
2457
1.80M
    cur = *ptr;
2458
1.80M
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2459
171k
  ptr += 3;
2460
171k
  cur = *ptr;
2461
469k
  while (cur != ';') { /* Non input consuming loop */
2462
300k
      if ((cur >= '0') && (cur <= '9'))
2463
124k
          val = val * 16 + (cur - '0');
2464
175k
      else if ((cur >= 'a') && (cur <= 'f'))
2465
88.1k
          val = val * 16 + (cur - 'a') + 10;
2466
87.1k
      else if ((cur >= 'A') && (cur <= 'F'))
2467
84.6k
          val = val * 16 + (cur - 'A') + 10;
2468
2.48k
      else {
2469
2.48k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2470
2.48k
    val = 0;
2471
2.48k
    break;
2472
2.48k
      }
2473
297k
      if (val > 0x110000)
2474
61.1k
          val = 0x110000;
2475
2476
297k
      ptr++;
2477
297k
      cur = *ptr;
2478
297k
  }
2479
171k
  if (cur == ';')
2480
168k
      ptr++;
2481
1.63M
    } else if  ((cur == '&') && (ptr[1] == '#')){
2482
1.63M
  ptr += 2;
2483
1.63M
  cur = *ptr;
2484
6.11M
  while (cur != ';') { /* Non input consuming loops */
2485
4.47M
      if ((cur >= '0') && (cur <= '9'))
2486
4.47M
          val = val * 10 + (cur - '0');
2487
3.71k
      else {
2488
3.71k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2489
3.71k
    val = 0;
2490
3.71k
    break;
2491
3.71k
      }
2492
4.47M
      if (val > 0x110000)
2493
1.17k
          val = 0x110000;
2494
2495
4.47M
      ptr++;
2496
4.47M
      cur = *ptr;
2497
4.47M
  }
2498
1.63M
  if (cur == ';')
2499
1.63M
      ptr++;
2500
1.63M
    } else {
2501
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2502
0
  return(0);
2503
0
    }
2504
1.80M
    *str = ptr;
2505
2506
    /*
2507
     * [ WFC: Legal Character ]
2508
     * Characters referred to using character references must match the
2509
     * production for Char.
2510
     */
2511
1.80M
    if (val >= 0x110000) {
2512
295
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2513
295
                "xmlParseStringCharRef: character reference out of bounds\n",
2514
295
                val);
2515
1.80M
    } else if (IS_CHAR(val)) {
2516
1.80M
        return(val);
2517
1.80M
    } else {
2518
6.80k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2519
6.80k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2520
6.80k
        val);
2521
6.80k
    }
2522
7.10k
    return(0);
2523
1.80M
}
2524
2525
/**
2526
 * xmlParserHandlePEReference:
2527
 * @ctxt:  the parser context
2528
 *
2529
 * [69] PEReference ::= '%' Name ';'
2530
 *
2531
 * [ WFC: No Recursion ]
2532
 * A parsed entity must not contain a recursive
2533
 * reference to itself, either directly or indirectly.
2534
 *
2535
 * [ WFC: Entity Declared ]
2536
 * In a document without any DTD, a document with only an internal DTD
2537
 * subset which contains no parameter entity references, or a document
2538
 * with "standalone='yes'", ...  ... The declaration of a parameter
2539
 * entity must precede any reference to it...
2540
 *
2541
 * [ VC: Entity Declared ]
2542
 * In a document with an external subset or external parameter entities
2543
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2544
 * must precede any reference to it...
2545
 *
2546
 * [ WFC: In DTD ]
2547
 * Parameter-entity references may only appear in the DTD.
2548
 * NOTE: misleading but this is handled.
2549
 *
2550
 * A PEReference may have been detected in the current input stream
2551
 * the handling is done accordingly to
2552
 *      http://www.w3.org/TR/REC-xml#entproc
2553
 * i.e.
2554
 *   - Included in literal in entity values
2555
 *   - Included as Parameter Entity reference within DTDs
2556
 */
2557
void
2558
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2559
0
    switch(ctxt->instate) {
2560
0
  case XML_PARSER_CDATA_SECTION:
2561
0
      return;
2562
0
        case XML_PARSER_COMMENT:
2563
0
      return;
2564
0
  case XML_PARSER_START_TAG:
2565
0
      return;
2566
0
  case XML_PARSER_END_TAG:
2567
0
      return;
2568
0
        case XML_PARSER_EOF:
2569
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2570
0
      return;
2571
0
        case XML_PARSER_PROLOG:
2572
0
  case XML_PARSER_START:
2573
0
  case XML_PARSER_MISC:
2574
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2575
0
      return;
2576
0
  case XML_PARSER_ENTITY_DECL:
2577
0
        case XML_PARSER_CONTENT:
2578
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2579
0
        case XML_PARSER_PI:
2580
0
  case XML_PARSER_SYSTEM_LITERAL:
2581
0
  case XML_PARSER_PUBLIC_LITERAL:
2582
      /* we just ignore it there */
2583
0
      return;
2584
0
        case XML_PARSER_EPILOG:
2585
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2586
0
      return;
2587
0
  case XML_PARSER_ENTITY_VALUE:
2588
      /*
2589
       * NOTE: in the case of entity values, we don't do the
2590
       *       substitution here since we need the literal
2591
       *       entity value to be able to save the internal
2592
       *       subset of the document.
2593
       *       This will be handled by xmlStringDecodeEntities
2594
       */
2595
0
      return;
2596
0
        case XML_PARSER_DTD:
2597
      /*
2598
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2599
       * In the internal DTD subset, parameter-entity references
2600
       * can occur only where markup declarations can occur, not
2601
       * within markup declarations.
2602
       * In that case this is handled in xmlParseMarkupDecl
2603
       */
2604
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2605
0
    return;
2606
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2607
0
    return;
2608
0
            break;
2609
0
        case XML_PARSER_IGNORE:
2610
0
            return;
2611
0
    }
2612
2613
0
    xmlParsePEReference(ctxt);
2614
0
}
2615
2616
/*
2617
 * Macro used to grow the current buffer.
2618
 * buffer##_size is expected to be a size_t
2619
 * mem_error: is expected to handle memory allocation failures
2620
 */
2621
1.57M
#define growBuffer(buffer, n) {           \
2622
1.57M
    xmlChar *tmp;             \
2623
1.57M
    size_t new_size = buffer##_size * 2 + n;                            \
2624
1.57M
    if (new_size < buffer##_size) goto mem_error;                       \
2625
1.57M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2626
1.57M
    if (tmp == NULL) goto mem_error;         \
2627
1.57M
    buffer = tmp;             \
2628
1.57M
    buffer##_size = new_size;                                           \
2629
1.57M
}
2630
2631
/**
2632
 * xmlStringLenDecodeEntities:
2633
 * @ctxt:  the parser context
2634
 * @str:  the input string
2635
 * @len: the string length
2636
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637
 * @end:  an end marker xmlChar, 0 if none
2638
 * @end2:  an end marker xmlChar, 0 if none
2639
 * @end3:  an end marker xmlChar, 0 if none
2640
 *
2641
 * Takes a entity string content and process to do the adequate substitutions.
2642
 *
2643
 * [67] Reference ::= EntityRef | CharRef
2644
 *
2645
 * [69] PEReference ::= '%' Name ';'
2646
 *
2647
 * Returns A newly allocated string with the substitution done. The caller
2648
 *      must deallocate it !
2649
 */
2650
xmlChar *
2651
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2652
11.1M
          int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2653
11.1M
    xmlChar *buffer = NULL;
2654
11.1M
    size_t buffer_size = 0;
2655
11.1M
    size_t nbchars = 0;
2656
2657
11.1M
    xmlChar *current = NULL;
2658
11.1M
    xmlChar *rep = NULL;
2659
11.1M
    const xmlChar *last;
2660
11.1M
    xmlEntityPtr ent;
2661
11.1M
    int c,l;
2662
2663
11.1M
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2664
0
  return(NULL);
2665
11.1M
    last = str + len;
2666
2667
11.1M
    if (((ctxt->depth > 40) &&
2668
11.1M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2669
11.1M
  (ctxt->depth > 1024)) {
2670
13.0k
  xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2671
13.0k
  return(NULL);
2672
13.0k
    }
2673
2674
    /*
2675
     * allocate a translation buffer.
2676
     */
2677
11.0M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2678
11.0M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2679
11.0M
    if (buffer == NULL) goto mem_error;
2680
2681
    /*
2682
     * OK loop until we reach one of the ending char or a size limit.
2683
     * we are operating on already parsed values.
2684
     */
2685
11.0M
    if (str < last)
2686
10.3M
  c = CUR_SCHAR(str, l);
2687
723k
    else
2688
723k
        c = 0;
2689
1.02G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2690
1.02G
           (c != end2) && (c != end3) &&
2691
1.02G
           (ctxt->instate != XML_PARSER_EOF)) {
2692
2693
1.01G
  if (c == 0) break;
2694
1.01G
        if ((c == '&') && (str[1] == '#')) {
2695
1.80M
      int val = xmlParseStringCharRef(ctxt, &str);
2696
1.80M
      if (val == 0)
2697
7.10k
                goto int_error;
2698
1.80M
      COPY_BUF(0,buffer,nbchars,val);
2699
1.80M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2700
24.9k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2701
24.9k
      }
2702
1.01G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2703
921k
      if (xmlParserDebugEntities)
2704
0
    xmlGenericError(xmlGenericErrorContext,
2705
0
      "String decoding Entity Reference: %.30s\n",
2706
0
      str);
2707
921k
      ent = xmlParseStringEntityRef(ctxt, &str);
2708
921k
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2709
921k
      if (ent != NULL)
2710
786k
          ctxt->nbentities += ent->checked / 2;
2711
921k
      if ((ent != NULL) &&
2712
921k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2713
119k
    if (ent->content != NULL) {
2714
119k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2715
119k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2716
15.5k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2717
15.5k
        }
2718
119k
    } else {
2719
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2720
0
          "predefined entity has no content\n");
2721
0
                    goto int_error;
2722
0
    }
2723
801k
      } else if ((ent != NULL) && (ent->content != NULL)) {
2724
649k
    ctxt->depth++;
2725
649k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2726
649k
                            0, 0, 0);
2727
649k
    ctxt->depth--;
2728
649k
    if (rep == NULL) {
2729
467k
                    ent->content[0] = 0;
2730
467k
                    goto int_error;
2731
467k
                }
2732
2733
182k
                current = rep;
2734
11.5M
                while (*current != 0) { /* non input consuming loop */
2735
11.3M
                    buffer[nbchars++] = *current++;
2736
11.3M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2737
21.0k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2738
20
                            goto int_error;
2739
63.1k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2740
63.1k
                    }
2741
11.3M
                }
2742
182k
                xmlFree(rep);
2743
182k
                rep = NULL;
2744
182k
      } else if (ent != NULL) {
2745
17.3k
    int i = xmlStrlen(ent->name);
2746
17.3k
    const xmlChar *cur = ent->name;
2747
2748
17.3k
    buffer[nbchars++] = '&';
2749
17.3k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2750
124
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2751
124
    }
2752
93.3k
    for (;i > 0;i--)
2753
76.0k
        buffer[nbchars++] = *cur++;
2754
17.3k
    buffer[nbchars++] = ';';
2755
17.3k
      }
2756
1.01G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2757
1.91M
      if (xmlParserDebugEntities)
2758
0
    xmlGenericError(xmlGenericErrorContext,
2759
0
      "String decoding PE Reference: %.30s\n", str);
2760
1.91M
      ent = xmlParseStringPEReference(ctxt, &str);
2761
1.91M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2762
1.91M
      if (ent != NULL)
2763
1.28M
          ctxt->nbentities += ent->checked / 2;
2764
1.91M
      if (ent != NULL) {
2765
1.28M
                if (ent->content == NULL) {
2766
        /*
2767
         * Note: external parsed entities will not be loaded,
2768
         * it is not required for a non-validating parser to
2769
         * complete external PEReferences coming from the
2770
         * internal subset
2771
         */
2772
38.3k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2773
38.3k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2774
38.3k
      (ctxt->validate != 0)) {
2775
10.5k
      xmlLoadEntityContent(ctxt, ent);
2776
27.8k
        } else {
2777
27.8k
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2778
27.8k
      "not validating will not read content for PE entity %s\n",
2779
27.8k
                          ent->name, NULL);
2780
27.8k
        }
2781
38.3k
    }
2782
1.28M
    ctxt->depth++;
2783
1.28M
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2784
1.28M
                            0, 0, 0);
2785
1.28M
    ctxt->depth--;
2786
1.28M
    if (rep == NULL) {
2787
250k
                    if (ent->content != NULL)
2788
217k
                        ent->content[0] = 0;
2789
250k
                    goto int_error;
2790
250k
                }
2791
1.03M
                current = rep;
2792
51.7M
                while (*current != 0) { /* non input consuming loop */
2793
50.7M
                    buffer[nbchars++] = *current++;
2794
50.7M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2795
112k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2796
1.08k
                            goto int_error;
2797
335k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798
335k
                    }
2799
50.7M
                }
2800
1.03M
                xmlFree(rep);
2801
1.03M
                rep = NULL;
2802
1.03M
      }
2803
1.01G
  } else {
2804
1.01G
      COPY_BUF(l,buffer,nbchars,c);
2805
1.01G
      str += l;
2806
1.01G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2807
1.59M
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2808
1.59M
      }
2809
1.01G
  }
2810
1.01G
  if (str < last)
2811
1.00G
      c = CUR_SCHAR(str, l);
2812
9.64M
  else
2813
9.64M
      c = 0;
2814
1.01G
    }
2815
10.3M
    buffer[nbchars] = 0;
2816
10.3M
    return(buffer);
2817
2818
0
mem_error:
2819
0
    xmlErrMemory(ctxt, NULL);
2820
726k
int_error:
2821
726k
    if (rep != NULL)
2822
1.10k
        xmlFree(rep);
2823
726k
    if (buffer != NULL)
2824
726k
        xmlFree(buffer);
2825
726k
    return(NULL);
2826
0
}
2827
2828
/**
2829
 * xmlStringDecodeEntities:
2830
 * @ctxt:  the parser context
2831
 * @str:  the input string
2832
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2833
 * @end:  an end marker xmlChar, 0 if none
2834
 * @end2:  an end marker xmlChar, 0 if none
2835
 * @end3:  an end marker xmlChar, 0 if none
2836
 *
2837
 * Takes a entity string content and process to do the adequate substitutions.
2838
 *
2839
 * [67] Reference ::= EntityRef | CharRef
2840
 *
2841
 * [69] PEReference ::= '%' Name ';'
2842
 *
2843
 * Returns A newly allocated string with the substitution done. The caller
2844
 *      must deallocate it !
2845
 */
2846
xmlChar *
2847
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2848
11.0M
            xmlChar end, xmlChar  end2, xmlChar end3) {
2849
11.0M
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2850
11.0M
    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2851
11.0M
           end, end2, end3));
2852
11.0M
}
2853
2854
/************************************************************************
2855
 *                  *
2856
 *    Commodity functions, cleanup needed ?     *
2857
 *                  *
2858
 ************************************************************************/
2859
2860
/**
2861
 * areBlanks:
2862
 * @ctxt:  an XML parser context
2863
 * @str:  a xmlChar *
2864
 * @len:  the size of @str
2865
 * @blank_chars: we know the chars are blanks
2866
 *
2867
 * Is this a sequence of blank chars that one can ignore ?
2868
 *
2869
 * Returns 1 if ignorable 0 otherwise.
2870
 */
2871
2872
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2873
35.8M
                     int blank_chars) {
2874
35.8M
    int i, ret;
2875
35.8M
    xmlNodePtr lastChild;
2876
2877
    /*
2878
     * Don't spend time trying to differentiate them, the same callback is
2879
     * used !
2880
     */
2881
35.8M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2882
679k
  return(0);
2883
2884
    /*
2885
     * Check for xml:space value.
2886
     */
2887
35.1M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2888
35.1M
        (*(ctxt->space) == -2))
2889
19.5M
  return(0);
2890
2891
    /*
2892
     * Check that the string is made of blanks
2893
     */
2894
15.6M
    if (blank_chars == 0) {
2895
32.7M
  for (i = 0;i < len;i++)
2896
28.4M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2897
7.21M
    }
2898
2899
    /*
2900
     * Look if the element is mixed content in the DTD if available
2901
     */
2902
12.7M
    if (ctxt->node == NULL) return(0);
2903
10.6M
    if (ctxt->myDoc != NULL) {
2904
10.6M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2905
10.6M
        if (ret == 0) return(1);
2906
9.60M
        if (ret == 1) return(0);
2907
9.60M
    }
2908
2909
    /*
2910
     * Otherwise, heuristic :-\
2911
     */
2912
9.58M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2913
8.41M
    if ((ctxt->node->children == NULL) &&
2914
8.41M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2915
2916
8.40M
    lastChild = xmlGetLastChild(ctxt->node);
2917
8.40M
    if (lastChild == NULL) {
2918
1.97M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2919
1.97M
            (ctxt->node->content != NULL)) return(0);
2920
6.43M
    } else if (xmlNodeIsText(lastChild))
2921
2.30M
        return(0);
2922
4.12M
    else if ((ctxt->node->children != NULL) &&
2923
4.12M
             (xmlNodeIsText(ctxt->node->children)))
2924
109k
        return(0);
2925
5.99M
    return(1);
2926
8.40M
}
2927
2928
/************************************************************************
2929
 *                  *
2930
 *    Extra stuff for namespace support     *
2931
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2932
 *                  *
2933
 ************************************************************************/
2934
2935
/**
2936
 * xmlSplitQName:
2937
 * @ctxt:  an XML parser context
2938
 * @name:  an XML parser context
2939
 * @prefix:  a xmlChar **
2940
 *
2941
 * parse an UTF8 encoded XML qualified name string
2942
 *
2943
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2944
 *
2945
 * [NS 6] Prefix ::= NCName
2946
 *
2947
 * [NS 7] LocalPart ::= NCName
2948
 *
2949
 * Returns the local part, and prefix is updated
2950
 *   to get the Prefix if any.
2951
 */
2952
2953
xmlChar *
2954
37.2M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2955
37.2M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2956
37.2M
    xmlChar *buffer = NULL;
2957
37.2M
    int len = 0;
2958
37.2M
    int max = XML_MAX_NAMELEN;
2959
37.2M
    xmlChar *ret = NULL;
2960
37.2M
    const xmlChar *cur = name;
2961
37.2M
    int c;
2962
2963
37.2M
    if (prefix == NULL) return(NULL);
2964
37.2M
    *prefix = NULL;
2965
2966
37.2M
    if (cur == NULL) return(NULL);
2967
2968
#ifndef XML_XML_NAMESPACE
2969
    /* xml: prefix is not really a namespace */
2970
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2971
        (cur[2] == 'l') && (cur[3] == ':'))
2972
  return(xmlStrdup(name));
2973
#endif
2974
2975
    /* nasty but well=formed */
2976
37.2M
    if (cur[0] == ':')
2977
17.1k
  return(xmlStrdup(name));
2978
2979
37.2M
    c = *cur++;
2980
160M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2981
123M
  buf[len++] = c;
2982
123M
  c = *cur++;
2983
123M
    }
2984
37.2M
    if (len >= max) {
2985
  /*
2986
   * Okay someone managed to make a huge name, so he's ready to pay
2987
   * for the processing speed.
2988
   */
2989
18.0k
  max = len * 2;
2990
2991
18.0k
  buffer = (xmlChar *) xmlMallocAtomic(max);
2992
18.0k
  if (buffer == NULL) {
2993
0
      xmlErrMemory(ctxt, NULL);
2994
0
      return(NULL);
2995
0
  }
2996
18.0k
  memcpy(buffer, buf, len);
2997
18.4M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2998
18.4M
      if (len + 10 > max) {
2999
31.9k
          xmlChar *tmp;
3000
3001
31.9k
    max *= 2;
3002
31.9k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3003
31.9k
    if (tmp == NULL) {
3004
0
        xmlFree(buffer);
3005
0
        xmlErrMemory(ctxt, NULL);
3006
0
        return(NULL);
3007
0
    }
3008
31.9k
    buffer = tmp;
3009
31.9k
      }
3010
18.4M
      buffer[len++] = c;
3011
18.4M
      c = *cur++;
3012
18.4M
  }
3013
18.0k
  buffer[len] = 0;
3014
18.0k
    }
3015
3016
37.2M
    if ((c == ':') && (*cur == 0)) {
3017
57.0k
        if (buffer != NULL)
3018
89
      xmlFree(buffer);
3019
57.0k
  *prefix = NULL;
3020
57.0k
  return(xmlStrdup(name));
3021
57.0k
    }
3022
3023
37.1M
    if (buffer == NULL)
3024
37.1M
  ret = xmlStrndup(buf, len);
3025
17.9k
    else {
3026
17.9k
  ret = buffer;
3027
17.9k
  buffer = NULL;
3028
17.9k
  max = XML_MAX_NAMELEN;
3029
17.9k
    }
3030
3031
3032
37.1M
    if (c == ':') {
3033
11.2M
  c = *cur;
3034
11.2M
        *prefix = ret;
3035
11.2M
  if (c == 0) {
3036
0
      return(xmlStrndup(BAD_CAST "", 0));
3037
0
  }
3038
11.2M
  len = 0;
3039
3040
  /*
3041
   * Check that the first character is proper to start
3042
   * a new name
3043
   */
3044
11.2M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3045
11.2M
        ((c >= 0x41) && (c <= 0x5A)) ||
3046
11.2M
        (c == '_') || (c == ':'))) {
3047
9.13k
      int l;
3048
9.13k
      int first = CUR_SCHAR(cur, l);
3049
3050
9.13k
      if (!IS_LETTER(first) && (first != '_')) {
3051
5.77k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3052
5.77k
          "Name %s is not XML Namespace compliant\n",
3053
5.77k
          name);
3054
5.77k
      }
3055
9.13k
  }
3056
11.2M
  cur++;
3057
3058
46.2M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3059
34.9M
      buf[len++] = c;
3060
34.9M
      c = *cur++;
3061
34.9M
  }
3062
11.2M
  if (len >= max) {
3063
      /*
3064
       * Okay someone managed to make a huge name, so he's ready to pay
3065
       * for the processing speed.
3066
       */
3067
14.7k
      max = len * 2;
3068
3069
14.7k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3070
14.7k
      if (buffer == NULL) {
3071
0
          xmlErrMemory(ctxt, NULL);
3072
0
    return(NULL);
3073
0
      }
3074
14.7k
      memcpy(buffer, buf, len);
3075
39.6M
      while (c != 0) { /* tested bigname2.xml */
3076
39.6M
    if (len + 10 > max) {
3077
28.3k
        xmlChar *tmp;
3078
3079
28.3k
        max *= 2;
3080
28.3k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3081
28.3k
        if (tmp == NULL) {
3082
0
      xmlErrMemory(ctxt, NULL);
3083
0
      xmlFree(buffer);
3084
0
      return(NULL);
3085
0
        }
3086
28.3k
        buffer = tmp;
3087
28.3k
    }
3088
39.6M
    buffer[len++] = c;
3089
39.6M
    c = *cur++;
3090
39.6M
      }
3091
14.7k
      buffer[len] = 0;
3092
14.7k
  }
3093
3094
11.2M
  if (buffer == NULL)
3095
11.2M
      ret = xmlStrndup(buf, len);
3096
14.7k
  else {
3097
14.7k
      ret = buffer;
3098
14.7k
  }
3099
11.2M
    }
3100
3101
37.1M
    return(ret);
3102
37.1M
}
3103
3104
/************************************************************************
3105
 *                  *
3106
 *      The parser itself       *
3107
 *  Relates to http://www.w3.org/TR/REC-xml       *
3108
 *                  *
3109
 ************************************************************************/
3110
3111
/************************************************************************
3112
 *                  *
3113
 *  Routines to parse Name, NCName and NmToken      *
3114
 *                  *
3115
 ************************************************************************/
3116
#ifdef DEBUG
3117
static unsigned long nbParseName = 0;
3118
static unsigned long nbParseNmToken = 0;
3119
static unsigned long nbParseNCName = 0;
3120
static unsigned long nbParseNCNameComplex = 0;
3121
static unsigned long nbParseNameComplex = 0;
3122
static unsigned long nbParseStringName = 0;
3123
#endif
3124
3125
/*
3126
 * The two following functions are related to the change of accepted
3127
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3128
 * They correspond to the modified production [4] and the new production [4a]
3129
 * changes in that revision. Also note that the macros used for the
3130
 * productions Letter, Digit, CombiningChar and Extender are not needed
3131
 * anymore.
3132
 * We still keep compatibility to pre-revision5 parsing semantic if the
3133
 * new XML_PARSE_OLD10 option is given to the parser.
3134
 */
3135
static int
3136
10.1M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3137
10.1M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3138
        /*
3139
   * Use the new checks of production [4] [4a] amd [5] of the
3140
   * Update 5 of XML-1.0
3141
   */
3142
6.18M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3143
6.18M
      (((c >= 'a') && (c <= 'z')) ||
3144
6.08M
       ((c >= 'A') && (c <= 'Z')) ||
3145
6.08M
       (c == '_') || (c == ':') ||
3146
6.08M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3147
6.08M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3148
6.08M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3149
6.08M
       ((c >= 0x370) && (c <= 0x37D)) ||
3150
6.08M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3151
6.08M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3152
6.08M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3153
6.08M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3154
6.08M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3155
6.08M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3156
6.08M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3157
6.08M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3158
2.45M
      return(1);
3159
6.18M
    } else {
3160
3.97M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3161
1.37M
      return(1);
3162
3.97M
    }
3163
6.32M
    return(0);
3164
10.1M
}
3165
3166
static int
3167
71.0M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3168
71.0M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3169
        /*
3170
   * Use the new checks of production [4] [4a] amd [5] of the
3171
   * Update 5 of XML-1.0
3172
   */
3173
57.8M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3174
57.8M
      (((c >= 'a') && (c <= 'z')) ||
3175
57.7M
       ((c >= 'A') && (c <= 'Z')) ||
3176
57.7M
       ((c >= '0') && (c <= '9')) || /* !start */
3177
57.7M
       (c == '_') || (c == ':') ||
3178
57.7M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3179
57.7M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3180
57.7M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3181
57.7M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3182
57.7M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3183
57.7M
       ((c >= 0x370) && (c <= 0x37D)) ||
3184
57.7M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3185
57.7M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3186
57.7M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3187
57.7M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3188
57.7M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189
57.7M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190
57.7M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191
57.7M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192
57.7M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3193
54.8M
       return(1);
3194
57.8M
    } else {
3195
13.1M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3196
13.1M
            (c == '.') || (c == '-') ||
3197
13.1M
      (c == '_') || (c == ':') ||
3198
13.1M
      (IS_COMBINING(c)) ||
3199
13.1M
      (IS_EXTENDER(c)))
3200
11.6M
      return(1);
3201
13.1M
    }
3202
4.48M
    return(0);
3203
71.0M
}
3204
3205
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3206
                                          int *len, int *alloc, int normalize);
3207
3208
static const xmlChar *
3209
105M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3210
105M
    int len = 0, l;
3211
105M
    int c;
3212
105M
    int count = 0;
3213
105M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3214
75.5M
                    XML_MAX_TEXT_LENGTH :
3215
105M
                    XML_MAX_NAME_LENGTH;
3216
3217
#ifdef DEBUG
3218
    nbParseNameComplex++;
3219
#endif
3220
3221
    /*
3222
     * Handler for more complex cases
3223
     */
3224
105M
    GROW;
3225
105M
    if (ctxt->instate == XML_PARSER_EOF)
3226
0
        return(NULL);
3227
105M
    c = CUR_CHAR(l);
3228
105M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3229
        /*
3230
   * Use the new checks of production [4] [4a] amd [5] of the
3231
   * Update 5 of XML-1.0
3232
   */
3233
58.2M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3234
58.2M
      (!(((c >= 'a') && (c <= 'z')) ||
3235
57.7M
         ((c >= 'A') && (c <= 'Z')) ||
3236
57.7M
         (c == '_') || (c == ':') ||
3237
57.7M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3238
57.7M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3239
57.7M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3240
57.7M
         ((c >= 0x370) && (c <= 0x37D)) ||
3241
57.7M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3242
57.7M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3243
57.7M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3244
57.7M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3245
57.7M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3246
57.7M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3247
57.7M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3248
57.7M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3249
55.3M
      return(NULL);
3250
55.3M
  }
3251
2.88M
  len += l;
3252
2.88M
  NEXTL(l);
3253
2.88M
  c = CUR_CHAR(l);
3254
64.7M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3255
64.7M
         (((c >= 'a') && (c <= 'z')) ||
3256
63.6M
          ((c >= 'A') && (c <= 'Z')) ||
3257
63.6M
          ((c >= '0') && (c <= '9')) || /* !start */
3258
63.6M
          (c == '_') || (c == ':') ||
3259
63.6M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3260
63.6M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3261
63.6M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3262
63.6M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3263
63.6M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3264
63.6M
          ((c >= 0x370) && (c <= 0x37D)) ||
3265
63.6M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3266
63.6M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3267
63.6M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3268
63.6M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3269
63.6M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270
63.6M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271
63.6M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272
63.6M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273
63.6M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3274
63.6M
    )) {
3275
61.8M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3276
467k
    count = 0;
3277
467k
    GROW;
3278
467k
                if (ctxt->instate == XML_PARSER_EOF)
3279
0
                    return(NULL);
3280
467k
      }
3281
61.8M
            if (len <= INT_MAX - l)
3282
61.8M
          len += l;
3283
61.8M
      NEXTL(l);
3284
61.8M
      c = CUR_CHAR(l);
3285
61.8M
  }
3286
47.5M
    } else {
3287
47.5M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3288
47.5M
      (!IS_LETTER(c) && (c != '_') &&
3289
46.7M
       (c != ':'))) {
3290
45.4M
      return(NULL);
3291
45.4M
  }
3292
2.08M
  len += l;
3293
2.08M
  NEXTL(l);
3294
2.08M
  c = CUR_CHAR(l);
3295
3296
58.8M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3297
58.8M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298
58.0M
    (c == '.') || (c == '-') ||
3299
58.0M
    (c == '_') || (c == ':') ||
3300
58.0M
    (IS_COMBINING(c)) ||
3301
58.0M
    (IS_EXTENDER(c)))) {
3302
56.7M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3303
412k
    count = 0;
3304
412k
    GROW;
3305
412k
                if (ctxt->instate == XML_PARSER_EOF)
3306
0
                    return(NULL);
3307
412k
      }
3308
56.7M
            if (len <= INT_MAX - l)
3309
56.7M
          len += l;
3310
56.7M
      NEXTL(l);
3311
56.7M
      c = CUR_CHAR(l);
3312
56.7M
  }
3313
2.08M
    }
3314
4.96M
    if (len > maxLength) {
3315
26
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316
26
        return(NULL);
3317
26
    }
3318
4.96M
    if (ctxt->input->cur - ctxt->input->base < len) {
3319
        /*
3320
         * There were a couple of bugs where PERefs lead to to a change
3321
         * of the buffer. Check the buffer size to avoid passing an invalid
3322
         * pointer to xmlDictLookup.
3323
         */
3324
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3325
0
                    "unexpected change of input buffer");
3326
0
        return (NULL);
3327
0
    }
3328
4.96M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3329
14.7k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3330
4.94M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3331
4.96M
}
3332
3333
/**
3334
 * xmlParseName:
3335
 * @ctxt:  an XML parser context
3336
 *
3337
 * DEPRECATED: Internal function, don't use.
3338
 *
3339
 * parse an XML name.
3340
 *
3341
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3342
 *                  CombiningChar | Extender
3343
 *
3344
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3345
 *
3346
 * [6] Names ::= Name (#x20 Name)*
3347
 *
3348
 * Returns the Name parsed or NULL
3349
 */
3350
3351
const xmlChar *
3352
311M
xmlParseName(xmlParserCtxtPtr ctxt) {
3353
311M
    const xmlChar *in;
3354
311M
    const xmlChar *ret;
3355
311M
    size_t count = 0;
3356
311M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3357
240M
                       XML_MAX_TEXT_LENGTH :
3358
311M
                       XML_MAX_NAME_LENGTH;
3359
3360
311M
    GROW;
3361
3362
#ifdef DEBUG
3363
    nbParseName++;
3364
#endif
3365
3366
    /*
3367
     * Accelerator for simple ASCII names
3368
     */
3369
311M
    in = ctxt->input->cur;
3370
311M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3371
311M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3372
311M
  (*in == '_') || (*in == ':')) {
3373
210M
  in++;
3374
1.12G
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3375
1.12G
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3376
1.12G
         ((*in >= 0x30) && (*in <= 0x39)) ||
3377
1.12G
         (*in == '_') || (*in == '-') ||
3378
1.12G
         (*in == ':') || (*in == '.'))
3379
910M
      in++;
3380
210M
  if ((*in > 0) && (*in < 0x80)) {
3381
206M
      count = in - ctxt->input->cur;
3382
206M
            if (count > maxLength) {
3383
14
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384
14
                return(NULL);
3385
14
            }
3386
206M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3387
206M
      ctxt->input->cur = in;
3388
206M
      ctxt->input->col += count;
3389
206M
      if (ret == NULL)
3390
0
          xmlErrMemory(ctxt, NULL);
3391
206M
      return(ret);
3392
206M
  }
3393
210M
    }
3394
    /* accelerator for special cases */
3395
105M
    return(xmlParseNameComplex(ctxt));
3396
311M
}
3397
3398
static const xmlChar *
3399
6.18M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3400
6.18M
    int len = 0, l;
3401
6.18M
    int c;
3402
6.18M
    int count = 0;
3403
6.18M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3404
2.31M
                    XML_MAX_TEXT_LENGTH :
3405
6.18M
                    XML_MAX_NAME_LENGTH;
3406
6.18M
    size_t startPosition = 0;
3407
3408
#ifdef DEBUG
3409
    nbParseNCNameComplex++;
3410
#endif
3411
3412
    /*
3413
     * Handler for more complex cases
3414
     */
3415
6.18M
    GROW;
3416
6.18M
    startPosition = CUR_PTR - BASE_PTR;
3417
6.18M
    c = CUR_CHAR(l);
3418
6.18M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
6.18M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
5.94M
  return(NULL);
3421
5.94M
    }
3422
3423
10.9M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
10.9M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
10.6M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3426
89.0k
      count = 0;
3427
89.0k
      GROW;
3428
89.0k
            if (ctxt->instate == XML_PARSER_EOF)
3429
0
                return(NULL);
3430
89.0k
  }
3431
10.6M
        if (len <= INT_MAX - l)
3432
10.6M
      len += l;
3433
10.6M
  NEXTL(l);
3434
10.6M
  c = CUR_CHAR(l);
3435
10.6M
  if (c == 0) {
3436
15.2k
      count = 0;
3437
      /*
3438
       * when shrinking to extend the buffer we really need to preserve
3439
       * the part of the name we already parsed. Hence rolling back
3440
       * by current length.
3441
       */
3442
15.2k
      ctxt->input->cur -= l;
3443
15.2k
      GROW;
3444
15.2k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
15.2k
      ctxt->input->cur += l;
3447
15.2k
      c = CUR_CHAR(l);
3448
15.2k
  }
3449
10.6M
    }
3450
241k
    if (len > maxLength) {
3451
27
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3452
27
        return(NULL);
3453
27
    }
3454
241k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3455
241k
}
3456
3457
/**
3458
 * xmlParseNCName:
3459
 * @ctxt:  an XML parser context
3460
 * @len:  length of the string parsed
3461
 *
3462
 * parse an XML name.
3463
 *
3464
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3465
 *                      CombiningChar | Extender
3466
 *
3467
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3468
 *
3469
 * Returns the Name parsed or NULL
3470
 */
3471
3472
static const xmlChar *
3473
58.4M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3474
58.4M
    const xmlChar *in, *e;
3475
58.4M
    const xmlChar *ret;
3476
58.4M
    size_t count = 0;
3477
58.4M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3478
5.84M
                       XML_MAX_TEXT_LENGTH :
3479
58.4M
                       XML_MAX_NAME_LENGTH;
3480
3481
#ifdef DEBUG
3482
    nbParseNCName++;
3483
#endif
3484
3485
    /*
3486
     * Accelerator for simple ASCII names
3487
     */
3488
58.4M
    in = ctxt->input->cur;
3489
58.4M
    e = ctxt->input->end;
3490
58.4M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3491
58.4M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3492
58.4M
   (*in == '_')) && (in < e)) {
3493
52.4M
  in++;
3494
160M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3495
160M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3496
160M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3497
160M
          (*in == '_') || (*in == '-') ||
3498
160M
          (*in == '.')) && (in < e))
3499
107M
      in++;
3500
52.4M
  if (in >= e)
3501
2.77k
      goto complex;
3502
52.4M
  if ((*in > 0) && (*in < 0x80)) {
3503
52.2M
      count = in - ctxt->input->cur;
3504
52.2M
            if (count > maxLength) {
3505
20
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3506
20
                return(NULL);
3507
20
            }
3508
52.2M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3509
52.2M
      ctxt->input->cur = in;
3510
52.2M
      ctxt->input->col += count;
3511
52.2M
      if (ret == NULL) {
3512
0
          xmlErrMemory(ctxt, NULL);
3513
0
      }
3514
52.2M
      return(ret);
3515
52.2M
  }
3516
52.4M
    }
3517
6.18M
complex:
3518
6.18M
    return(xmlParseNCNameComplex(ctxt));
3519
58.4M
}
3520
3521
/**
3522
 * xmlParseNameAndCompare:
3523
 * @ctxt:  an XML parser context
3524
 *
3525
 * parse an XML name and compares for match
3526
 * (specialized for endtag parsing)
3527
 *
3528
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3529
 * and the name for mismatch
3530
 */
3531
3532
static const xmlChar *
3533
37.6M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3534
37.6M
    register const xmlChar *cmp = other;
3535
37.6M
    register const xmlChar *in;
3536
37.6M
    const xmlChar *ret;
3537
3538
37.6M
    GROW;
3539
37.6M
    if (ctxt->instate == XML_PARSER_EOF)
3540
0
        return(NULL);
3541
3542
37.6M
    in = ctxt->input->cur;
3543
198M
    while (*in != 0 && *in == *cmp) {
3544
160M
  ++in;
3545
160M
  ++cmp;
3546
160M
    }
3547
37.6M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3548
  /* success */
3549
28.6M
  ctxt->input->col += in - ctxt->input->cur;
3550
28.6M
  ctxt->input->cur = in;
3551
28.6M
  return (const xmlChar*) 1;
3552
28.6M
    }
3553
    /* failure (or end of input buffer), check with full function */
3554
8.96M
    ret = xmlParseName (ctxt);
3555
    /* strings coming from the dictionary direct compare possible */
3556
8.96M
    if (ret == other) {
3557
154k
  return (const xmlChar*) 1;
3558
154k
    }
3559
8.81M
    return ret;
3560
8.96M
}
3561
3562
/**
3563
 * xmlParseStringName:
3564
 * @ctxt:  an XML parser context
3565
 * @str:  a pointer to the string pointer (IN/OUT)
3566
 *
3567
 * parse an XML name.
3568
 *
3569
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3570
 *                  CombiningChar | Extender
3571
 *
3572
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3573
 *
3574
 * [6] Names ::= Name (#x20 Name)*
3575
 *
3576
 * Returns the Name parsed or NULL. The @str pointer
3577
 * is updated to the current location in the string.
3578
 */
3579
3580
static xmlChar *
3581
4.04M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3582
4.04M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3583
4.04M
    const xmlChar *cur = *str;
3584
4.04M
    int len = 0, l;
3585
4.04M
    int c;
3586
4.04M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3587
1.81M
                    XML_MAX_TEXT_LENGTH :
3588
4.04M
                    XML_MAX_NAME_LENGTH;
3589
3590
#ifdef DEBUG
3591
    nbParseStringName++;
3592
#endif
3593
3594
4.04M
    c = CUR_SCHAR(cur, l);
3595
4.04M
    if (!xmlIsNameStartChar(ctxt, c)) {
3596
480k
  return(NULL);
3597
480k
    }
3598
3599
3.56M
    COPY_BUF(l,buf,len,c);
3600
3.56M
    cur += l;
3601
3.56M
    c = CUR_SCHAR(cur, l);
3602
32.0M
    while (xmlIsNameChar(ctxt, c)) {
3603
28.5M
  COPY_BUF(l,buf,len,c);
3604
28.5M
  cur += l;
3605
28.5M
  c = CUR_SCHAR(cur, l);
3606
28.5M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3607
      /*
3608
       * Okay someone managed to make a huge name, so he's ready to pay
3609
       * for the processing speed.
3610
       */
3611
5.50k
      xmlChar *buffer;
3612
5.50k
      int max = len * 2;
3613
3614
5.50k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3615
5.50k
      if (buffer == NULL) {
3616
0
          xmlErrMemory(ctxt, NULL);
3617
0
    return(NULL);
3618
0
      }
3619
5.50k
      memcpy(buffer, buf, len);
3620
5.95M
      while (xmlIsNameChar(ctxt, c)) {
3621
5.94M
    if (len + 10 > max) {
3622
9.75k
        xmlChar *tmp;
3623
3624
9.75k
        max *= 2;
3625
9.75k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3626
9.75k
        if (tmp == NULL) {
3627
0
      xmlErrMemory(ctxt, NULL);
3628
0
      xmlFree(buffer);
3629
0
      return(NULL);
3630
0
        }
3631
9.75k
        buffer = tmp;
3632
9.75k
    }
3633
5.94M
    COPY_BUF(l,buffer,len,c);
3634
5.94M
    cur += l;
3635
5.94M
    c = CUR_SCHAR(cur, l);
3636
5.94M
                if (len > maxLength) {
3637
4
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638
4
                    xmlFree(buffer);
3639
4
                    return(NULL);
3640
4
                }
3641
5.94M
      }
3642
5.50k
      buffer[len] = 0;
3643
5.50k
      *str = cur;
3644
5.50k
      return(buffer);
3645
5.50k
  }
3646
28.5M
    }
3647
3.55M
    if (len > maxLength) {
3648
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3649
0
        return(NULL);
3650
0
    }
3651
3.55M
    *str = cur;
3652
3.55M
    return(xmlStrndup(buf, len));
3653
3.55M
}
3654
3655
/**
3656
 * xmlParseNmtoken:
3657
 * @ctxt:  an XML parser context
3658
 *
3659
 * DEPRECATED: Internal function, don't use.
3660
 *
3661
 * parse an XML Nmtoken.
3662
 *
3663
 * [7] Nmtoken ::= (NameChar)+
3664
 *
3665
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3666
 *
3667
 * Returns the Nmtoken parsed or NULL
3668
 */
3669
3670
xmlChar *
3671
800k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3672
800k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3673
800k
    int len = 0, l;
3674
800k
    int c;
3675
800k
    int count = 0;
3676
800k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3677
322k
                    XML_MAX_TEXT_LENGTH :
3678
800k
                    XML_MAX_NAME_LENGTH;
3679
3680
#ifdef DEBUG
3681
    nbParseNmToken++;
3682
#endif
3683
3684
800k
    GROW;
3685
800k
    if (ctxt->instate == XML_PARSER_EOF)
3686
0
        return(NULL);
3687
800k
    c = CUR_CHAR(l);
3688
3689
5.41M
    while (xmlIsNameChar(ctxt, c)) {
3690
4.61M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3691
0
      count = 0;
3692
0
      GROW;
3693
0
  }
3694
4.61M
  COPY_BUF(l,buf,len,c);
3695
4.61M
  NEXTL(l);
3696
4.61M
  c = CUR_CHAR(l);
3697
4.61M
  if (c == 0) {
3698
856
      count = 0;
3699
856
      GROW;
3700
856
      if (ctxt->instate == XML_PARSER_EOF)
3701
0
    return(NULL);
3702
856
            c = CUR_CHAR(l);
3703
856
  }
3704
4.61M
  if (len >= XML_MAX_NAMELEN) {
3705
      /*
3706
       * Okay someone managed to make a huge token, so he's ready to pay
3707
       * for the processing speed.
3708
       */
3709
4.19k
      xmlChar *buffer;
3710
4.19k
      int max = len * 2;
3711
3712
4.19k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3713
4.19k
      if (buffer == NULL) {
3714
0
          xmlErrMemory(ctxt, NULL);
3715
0
    return(NULL);
3716
0
      }
3717
4.19k
      memcpy(buffer, buf, len);
3718
16.7M
      while (xmlIsNameChar(ctxt, c)) {
3719
16.7M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3720
166k
        count = 0;
3721
166k
        GROW;
3722
166k
                    if (ctxt->instate == XML_PARSER_EOF) {
3723
0
                        xmlFree(buffer);
3724
0
                        return(NULL);
3725
0
                    }
3726
166k
    }
3727
16.7M
    if (len + 10 > max) {
3728
10.9k
        xmlChar *tmp;
3729
3730
10.9k
        max *= 2;
3731
10.9k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3732
10.9k
        if (tmp == NULL) {
3733
0
      xmlErrMemory(ctxt, NULL);
3734
0
      xmlFree(buffer);
3735
0
      return(NULL);
3736
0
        }
3737
10.9k
        buffer = tmp;
3738
10.9k
    }
3739
16.7M
    COPY_BUF(l,buffer,len,c);
3740
16.7M
    NEXTL(l);
3741
16.7M
    c = CUR_CHAR(l);
3742
16.7M
                if (len > maxLength) {
3743
17
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3744
17
                    xmlFree(buffer);
3745
17
                    return(NULL);
3746
17
                }
3747
16.7M
      }
3748
4.18k
      buffer[len] = 0;
3749
4.18k
      return(buffer);
3750
4.19k
  }
3751
4.61M
    }
3752
795k
    if (len == 0)
3753
39.5k
        return(NULL);
3754
756k
    if (len > maxLength) {
3755
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3756
0
        return(NULL);
3757
0
    }
3758
756k
    return(xmlStrndup(buf, len));
3759
756k
}
3760
3761
/**
3762
 * xmlParseEntityValue:
3763
 * @ctxt:  an XML parser context
3764
 * @orig:  if non-NULL store a copy of the original entity value
3765
 *
3766
 * DEPRECATED: Internal function, don't use.
3767
 *
3768
 * parse a value for ENTITY declarations
3769
 *
3770
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3771
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3772
 *
3773
 * Returns the EntityValue parsed with reference substituted or NULL
3774
 */
3775
3776
xmlChar *
3777
2.10M
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3778
2.10M
    xmlChar *buf = NULL;
3779
2.10M
    int len = 0;
3780
2.10M
    int size = XML_PARSER_BUFFER_SIZE;
3781
2.10M
    int c, l;
3782
2.10M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3783
643k
                    XML_MAX_HUGE_LENGTH :
3784
2.10M
                    XML_MAX_TEXT_LENGTH;
3785
2.10M
    xmlChar stop;
3786
2.10M
    xmlChar *ret = NULL;
3787
2.10M
    const xmlChar *cur = NULL;
3788
2.10M
    xmlParserInputPtr input;
3789
3790
2.10M
    if (RAW == '"') stop = '"';
3791
535k
    else if (RAW == '\'') stop = '\'';
3792
0
    else {
3793
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3794
0
  return(NULL);
3795
0
    }
3796
2.10M
    buf = (xmlChar *) xmlMallocAtomic(size);
3797
2.10M
    if (buf == NULL) {
3798
0
  xmlErrMemory(ctxt, NULL);
3799
0
  return(NULL);
3800
0
    }
3801
3802
    /*
3803
     * The content of the entity definition is copied in a buffer.
3804
     */
3805
3806
2.10M
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3807
2.10M
    input = ctxt->input;
3808
2.10M
    GROW;
3809
2.10M
    if (ctxt->instate == XML_PARSER_EOF)
3810
0
        goto error;
3811
2.10M
    NEXT;
3812
2.10M
    c = CUR_CHAR(l);
3813
    /*
3814
     * NOTE: 4.4.5 Included in Literal
3815
     * When a parameter entity reference appears in a literal entity
3816
     * value, ... a single or double quote character in the replacement
3817
     * text is always treated as a normal data character and will not
3818
     * terminate the literal.
3819
     * In practice it means we stop the loop only when back at parsing
3820
     * the initial entity and the quote is found
3821
     */
3822
120M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3823
120M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3824
117M
  if (len + 5 >= size) {
3825
434k
      xmlChar *tmp;
3826
3827
434k
      size *= 2;
3828
434k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3829
434k
      if (tmp == NULL) {
3830
0
    xmlErrMemory(ctxt, NULL);
3831
0
                goto error;
3832
0
      }
3833
434k
      buf = tmp;
3834
434k
  }
3835
117M
  COPY_BUF(l,buf,len,c);
3836
117M
  NEXTL(l);
3837
3838
117M
  GROW;
3839
117M
  c = CUR_CHAR(l);
3840
117M
  if (c == 0) {
3841
2.00k
      GROW;
3842
2.00k
      c = CUR_CHAR(l);
3843
2.00k
  }
3844
3845
117M
        if (len > maxLength) {
3846
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3847
0
                           "entity value too long\n");
3848
0
            goto error;
3849
0
        }
3850
117M
    }
3851
2.10M
    buf[len] = 0;
3852
2.10M
    if (ctxt->instate == XML_PARSER_EOF)
3853
0
        goto error;
3854
2.10M
    if (c != stop) {
3855
3.44k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3856
3.44k
        goto error;
3857
3.44k
    }
3858
2.10M
    NEXT;
3859
3860
    /*
3861
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3862
     * reference constructs. Note Charref will be handled in
3863
     * xmlStringDecodeEntities()
3864
     */
3865
2.10M
    cur = buf;
3866
95.2M
    while (*cur != 0) { /* non input consuming */
3867
93.1M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3868
1.20M
      xmlChar *name;
3869
1.20M
      xmlChar tmp = *cur;
3870
1.20M
            int nameOk = 0;
3871
3872
1.20M
      cur++;
3873
1.20M
      name = xmlParseStringName(ctxt, &cur);
3874
1.20M
            if (name != NULL) {
3875
1.19M
                nameOk = 1;
3876
1.19M
                xmlFree(name);
3877
1.19M
            }
3878
1.20M
            if ((nameOk == 0) || (*cur != ';')) {
3879
18.7k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3880
18.7k
      "EntityValue: '%c' forbidden except for entities references\n",
3881
18.7k
                            tmp);
3882
18.7k
                goto error;
3883
18.7k
      }
3884
1.18M
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3885
1.18M
    (ctxt->inputNr == 1)) {
3886
1.60k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3887
1.60k
                goto error;
3888
1.60k
      }
3889
1.18M
      if (*cur == 0)
3890
0
          break;
3891
1.18M
  }
3892
93.1M
  cur++;
3893
93.1M
    }
3894
3895
    /*
3896
     * Then PEReference entities are substituted.
3897
     *
3898
     * NOTE: 4.4.7 Bypassed
3899
     * When a general entity reference appears in the EntityValue in
3900
     * an entity declaration, it is bypassed and left as is.
3901
     * so XML_SUBSTITUTE_REF is not set here.
3902
     */
3903
2.08M
    ++ctxt->depth;
3904
2.08M
    ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3905
2.08M
                                  0, 0, 0);
3906
2.08M
    --ctxt->depth;
3907
2.08M
    if (orig != NULL) {
3908
2.08M
        *orig = buf;
3909
2.08M
        buf = NULL;
3910
2.08M
    }
3911
3912
2.10M
error:
3913
2.10M
    if (buf != NULL)
3914
23.8k
        xmlFree(buf);
3915
2.10M
    return(ret);
3916
2.08M
}
3917
3918
/**
3919
 * xmlParseAttValueComplex:
3920
 * @ctxt:  an XML parser context
3921
 * @len:   the resulting attribute len
3922
 * @normalize:  whether to apply the inner normalization
3923
 *
3924
 * parse a value for an attribute, this is the fallback function
3925
 * of xmlParseAttValue() when the attribute parsing requires handling
3926
 * of non-ASCII characters, or normalization compaction.
3927
 *
3928
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3929
 */
3930
static xmlChar *
3931
6.92M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3932
6.92M
    xmlChar limit = 0;
3933
6.92M
    xmlChar *buf = NULL;
3934
6.92M
    xmlChar *rep = NULL;
3935
6.92M
    size_t len = 0;
3936
6.92M
    size_t buf_size = 0;
3937
6.92M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3938
5.12M
                       XML_MAX_HUGE_LENGTH :
3939
6.92M
                       XML_MAX_TEXT_LENGTH;
3940
6.92M
    int c, l, in_space = 0;
3941
6.92M
    xmlChar *current = NULL;
3942
6.92M
    xmlEntityPtr ent;
3943
3944
6.92M
    if (NXT(0) == '"') {
3945
3.85M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3946
3.85M
  limit = '"';
3947
3.85M
        NEXT;
3948
3.85M
    } else if (NXT(0) == '\'') {
3949
3.06M
  limit = '\'';
3950
3.06M
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3951
3.06M
        NEXT;
3952
3.06M
    } else {
3953
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3954
0
  return(NULL);
3955
0
    }
3956
3957
    /*
3958
     * allocate a translation buffer.
3959
     */
3960
6.92M
    buf_size = XML_PARSER_BUFFER_SIZE;
3961
6.92M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3962
6.92M
    if (buf == NULL) goto mem_error;
3963
3964
    /*
3965
     * OK loop until we reach one of the ending char or a size limit.
3966
     */
3967
6.92M
    c = CUR_CHAR(l);
3968
362M
    while (((NXT(0) != limit) && /* checked */
3969
362M
            (IS_CHAR(c)) && (c != '<')) &&
3970
362M
            (ctxt->instate != XML_PARSER_EOF)) {
3971
355M
  if (c == '&') {
3972
8.50M
      in_space = 0;
3973
8.50M
      if (NXT(1) == '#') {
3974
2.13M
    int val = xmlParseCharRef(ctxt);
3975
3976
2.13M
    if (val == '&') {
3977
170k
        if (ctxt->replaceEntities) {
3978
18.0k
      if (len + 10 > buf_size) {
3979
360
          growBuffer(buf, 10);
3980
360
      }
3981
18.0k
      buf[len++] = '&';
3982
152k
        } else {
3983
      /*
3984
       * The reparsing will be done in xmlStringGetNodeList()
3985
       * called by the attribute() function in SAX.c
3986
       */
3987
152k
      if (len + 10 > buf_size) {
3988
612
          growBuffer(buf, 10);
3989
612
      }
3990
152k
      buf[len++] = '&';
3991
152k
      buf[len++] = '#';
3992
152k
      buf[len++] = '3';
3993
152k
      buf[len++] = '8';
3994
152k
      buf[len++] = ';';
3995
152k
        }
3996
1.96M
    } else if (val != 0) {
3997
1.68M
        if (len + 10 > buf_size) {
3998
7.96k
      growBuffer(buf, 10);
3999
7.96k
        }
4000
1.68M
        len += xmlCopyChar(0, &buf[len], val);
4001
1.68M
    }
4002
6.37M
      } else {
4003
6.37M
    ent = xmlParseEntityRef(ctxt);
4004
6.37M
    ctxt->nbentities++;
4005
6.37M
    if (ent != NULL)
4006
2.18M
        ctxt->nbentities += ent->owner;
4007
6.37M
    if ((ent != NULL) &&
4008
6.37M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4009
1.91M
        if (len + 10 > buf_size) {
4010
312
      growBuffer(buf, 10);
4011
312
        }
4012
1.91M
        if ((ctxt->replaceEntities == 0) &&
4013
1.91M
            (ent->content[0] == '&')) {
4014
937k
      buf[len++] = '&';
4015
937k
      buf[len++] = '#';
4016
937k
      buf[len++] = '3';
4017
937k
      buf[len++] = '8';
4018
937k
      buf[len++] = ';';
4019
982k
        } else {
4020
982k
      buf[len++] = ent->content[0];
4021
982k
        }
4022
4.45M
    } else if ((ent != NULL) &&
4023
4.45M
               (ctxt->replaceEntities != 0)) {
4024
72.9k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4025
72.9k
      ++ctxt->depth;
4026
72.9k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4027
72.9k
                  XML_SUBSTITUTE_REF,
4028
72.9k
                  0, 0, 0);
4029
72.9k
      --ctxt->depth;
4030
72.9k
      if (rep != NULL) {
4031
69.3k
          current = rep;
4032
3.00M
          while (*current != 0) { /* non input consuming */
4033
2.93M
                                if ((*current == 0xD) || (*current == 0xA) ||
4034
2.93M
                                    (*current == 0x9)) {
4035
67.5k
                                    buf[len++] = 0x20;
4036
67.5k
                                    current++;
4037
67.5k
                                } else
4038
2.86M
                                    buf[len++] = *current++;
4039
2.93M
        if (len + 10 > buf_size) {
4040
6.84k
            growBuffer(buf, 10);
4041
6.84k
        }
4042
2.93M
          }
4043
69.3k
          xmlFree(rep);
4044
69.3k
          rep = NULL;
4045
69.3k
      }
4046
72.9k
        } else {
4047
0
      if (len + 10 > buf_size) {
4048
0
          growBuffer(buf, 10);
4049
0
      }
4050
0
      if (ent->content != NULL)
4051
0
          buf[len++] = ent->content[0];
4052
0
        }
4053
4.38M
    } else if (ent != NULL) {
4054
191k
        int i = xmlStrlen(ent->name);
4055
191k
        const xmlChar *cur = ent->name;
4056
4057
        /*
4058
         * This may look absurd but is needed to detect
4059
         * entities problems
4060
         */
4061
191k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4062
191k
      (ent->content != NULL) && (ent->checked == 0)) {
4063
23.1k
      unsigned long oldnbent = ctxt->nbentities, diff;
4064
4065
23.1k
      ++ctxt->depth;
4066
23.1k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4067
23.1k
              XML_SUBSTITUTE_REF, 0, 0, 0);
4068
23.1k
      --ctxt->depth;
4069
4070
23.1k
                        diff = ctxt->nbentities - oldnbent + 1;
4071
23.1k
                        if (diff > INT_MAX / 2)
4072
0
                            diff = INT_MAX / 2;
4073
23.1k
                        ent->checked = diff * 2;
4074
23.1k
      if (rep != NULL) {
4075
22.7k
          if (xmlStrchr(rep, '<'))
4076
1.30k
              ent->checked |= 1;
4077
22.7k
          xmlFree(rep);
4078
22.7k
          rep = NULL;
4079
22.7k
      } else {
4080
316
                            ent->content[0] = 0;
4081
316
                        }
4082
23.1k
        }
4083
4084
        /*
4085
         * Just output the reference
4086
         */
4087
191k
        buf[len++] = '&';
4088
193k
        while (len + i + 10 > buf_size) {
4089
3.80k
      growBuffer(buf, i + 10);
4090
3.80k
        }
4091
704k
        for (;i > 0;i--)
4092
513k
      buf[len++] = *cur++;
4093
191k
        buf[len++] = ';';
4094
191k
    }
4095
6.37M
      }
4096
347M
  } else {
4097
347M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4098
35.0M
          if ((len != 0) || (!normalize)) {
4099
34.8M
        if ((!normalize) || (!in_space)) {
4100
34.2M
      COPY_BUF(l,buf,len,0x20);
4101
34.3M
      while (len + 10 > buf_size) {
4102
176k
          growBuffer(buf, 10);
4103
176k
      }
4104
34.2M
        }
4105
34.8M
        in_space = 1;
4106
34.8M
    }
4107
312M
      } else {
4108
312M
          in_space = 0;
4109
312M
    COPY_BUF(l,buf,len,c);
4110
312M
    if (len + 10 > buf_size) {
4111
1.04M
        growBuffer(buf, 10);
4112
1.04M
    }
4113
312M
      }
4114
347M
      NEXTL(l);
4115
347M
  }
4116
355M
  GROW;
4117
355M
  c = CUR_CHAR(l);
4118
355M
        if (len > maxLength) {
4119
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4120
0
                           "AttValue length too long\n");
4121
0
            goto mem_error;
4122
0
        }
4123
355M
    }
4124
6.92M
    if (ctxt->instate == XML_PARSER_EOF)
4125
0
        goto error;
4126
4127
6.92M
    if ((in_space) && (normalize)) {
4128
67.0k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4129
32.9k
    }
4130
6.92M
    buf[len] = 0;
4131
6.92M
    if (RAW == '<') {
4132
1.39M
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4133
5.52M
    } else if (RAW != limit) {
4134
744k
  if ((c != 0) && (!IS_CHAR(c))) {
4135
513k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4136
513k
         "invalid character in attribute value\n");
4137
513k
  } else {
4138
231k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4139
231k
         "AttValue: ' expected\n");
4140
231k
        }
4141
744k
    } else
4142
4.78M
  NEXT;
4143
4144
6.92M
    if (attlen != NULL) *attlen = len;
4145
6.92M
    return(buf);
4146
4147
0
mem_error:
4148
0
    xmlErrMemory(ctxt, NULL);
4149
0
error:
4150
0
    if (buf != NULL)
4151
0
        xmlFree(buf);
4152
0
    if (rep != NULL)
4153
0
        xmlFree(rep);
4154
0
    return(NULL);
4155
0
}
4156
4157
/**
4158
 * xmlParseAttValue:
4159
 * @ctxt:  an XML parser context
4160
 *
4161
 * DEPRECATED: Internal function, don't use.
4162
 *
4163
 * parse a value for an attribute
4164
 * Note: the parser won't do substitution of entities here, this
4165
 * will be handled later in xmlStringGetNodeList
4166
 *
4167
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4168
 *                   "'" ([^<&'] | Reference)* "'"
4169
 *
4170
 * 3.3.3 Attribute-Value Normalization:
4171
 * Before the value of an attribute is passed to the application or
4172
 * checked for validity, the XML processor must normalize it as follows:
4173
 * - a character reference is processed by appending the referenced
4174
 *   character to the attribute value
4175
 * - an entity reference is processed by recursively processing the
4176
 *   replacement text of the entity
4177
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4178
 *   appending #x20 to the normalized value, except that only a single
4179
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4180
 *   parsed entity or the literal entity value of an internal parsed entity
4181
 * - other characters are processed by appending them to the normalized value
4182
 * If the declared value is not CDATA, then the XML processor must further
4183
 * process the normalized attribute value by discarding any leading and
4184
 * trailing space (#x20) characters, and by replacing sequences of space
4185
 * (#x20) characters by a single space (#x20) character.
4186
 * All attributes for which no declaration has been read should be treated
4187
 * by a non-validating parser as if declared CDATA.
4188
 *
4189
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4190
 */
4191
4192
4193
xmlChar *
4194
72.0M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4195
72.0M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4196
72.0M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4197
72.0M
}
4198
4199
/**
4200
 * xmlParseSystemLiteral:
4201
 * @ctxt:  an XML parser context
4202
 *
4203
 * DEPRECATED: Internal function, don't use.
4204
 *
4205
 * parse an XML Literal
4206
 *
4207
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4208
 *
4209
 * Returns the SystemLiteral parsed or NULL
4210
 */
4211
4212
xmlChar *
4213
387k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4214
387k
    xmlChar *buf = NULL;
4215
387k
    int len = 0;
4216
387k
    int size = XML_PARSER_BUFFER_SIZE;
4217
387k
    int cur, l;
4218
387k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4219
130k
                    XML_MAX_TEXT_LENGTH :
4220
387k
                    XML_MAX_NAME_LENGTH;
4221
387k
    xmlChar stop;
4222
387k
    int state = ctxt->instate;
4223
387k
    int count = 0;
4224
4225
387k
    SHRINK;
4226
387k
    if (RAW == '"') {
4227
316k
        NEXT;
4228
316k
  stop = '"';
4229
316k
    } else if (RAW == '\'') {
4230
63.3k
        NEXT;
4231
63.3k
  stop = '\'';
4232
63.3k
    } else {
4233
7.53k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4234
7.53k
  return(NULL);
4235
7.53k
    }
4236
4237
380k
    buf = (xmlChar *) xmlMallocAtomic(size);
4238
380k
    if (buf == NULL) {
4239
0
        xmlErrMemory(ctxt, NULL);
4240
0
  return(NULL);
4241
0
    }
4242
380k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4243
380k
    cur = CUR_CHAR(l);
4244
19.3M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4245
18.9M
  if (len + 5 >= size) {
4246
43.1k
      xmlChar *tmp;
4247
4248
43.1k
      size *= 2;
4249
43.1k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4250
43.1k
      if (tmp == NULL) {
4251
0
          xmlFree(buf);
4252
0
    xmlErrMemory(ctxt, NULL);
4253
0
    ctxt->instate = (xmlParserInputState) state;
4254
0
    return(NULL);
4255
0
      }
4256
43.1k
      buf = tmp;
4257
43.1k
  }
4258
18.9M
  count++;
4259
18.9M
  if (count > 50) {
4260
274k
      SHRINK;
4261
274k
      GROW;
4262
274k
      count = 0;
4263
274k
            if (ctxt->instate == XML_PARSER_EOF) {
4264
0
          xmlFree(buf);
4265
0
    return(NULL);
4266
0
            }
4267
274k
  }
4268
18.9M
  COPY_BUF(l,buf,len,cur);
4269
18.9M
  NEXTL(l);
4270
18.9M
  cur = CUR_CHAR(l);
4271
18.9M
  if (cur == 0) {
4272
5.57k
      GROW;
4273
5.57k
      SHRINK;
4274
5.57k
      cur = CUR_CHAR(l);
4275
5.57k
  }
4276
18.9M
        if (len > maxLength) {
4277
45
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4278
45
            xmlFree(buf);
4279
45
            ctxt->instate = (xmlParserInputState) state;
4280
45
            return(NULL);
4281
45
        }
4282
18.9M
    }
4283
380k
    buf[len] = 0;
4284
380k
    ctxt->instate = (xmlParserInputState) state;
4285
380k
    if (!IS_CHAR(cur)) {
4286
7.38k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4287
372k
    } else {
4288
372k
  NEXT;
4289
372k
    }
4290
380k
    return(buf);
4291
380k
}
4292
4293
/**
4294
 * xmlParsePubidLiteral:
4295
 * @ctxt:  an XML parser context
4296
 *
4297
 * DEPRECATED: Internal function, don't use.
4298
 *
4299
 * parse an XML public literal
4300
 *
4301
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4302
 *
4303
 * Returns the PubidLiteral parsed or NULL.
4304
 */
4305
4306
xmlChar *
4307
134k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4308
134k
    xmlChar *buf = NULL;
4309
134k
    int len = 0;
4310
134k
    int size = XML_PARSER_BUFFER_SIZE;
4311
134k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4312
48.0k
                    XML_MAX_TEXT_LENGTH :
4313
134k
                    XML_MAX_NAME_LENGTH;
4314
134k
    xmlChar cur;
4315
134k
    xmlChar stop;
4316
134k
    int count = 0;
4317
134k
    xmlParserInputState oldstate = ctxt->instate;
4318
4319
134k
    SHRINK;
4320
134k
    if (RAW == '"') {
4321
82.0k
        NEXT;
4322
82.0k
  stop = '"';
4323
82.0k
    } else if (RAW == '\'') {
4324
51.8k
        NEXT;
4325
51.8k
  stop = '\'';
4326
51.8k
    } else {
4327
1.05k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4328
1.05k
  return(NULL);
4329
1.05k
    }
4330
133k
    buf = (xmlChar *) xmlMallocAtomic(size);
4331
133k
    if (buf == NULL) {
4332
0
  xmlErrMemory(ctxt, NULL);
4333
0
  return(NULL);
4334
0
    }
4335
133k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4336
133k
    cur = CUR;
4337
4.97M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4338
4.84M
  if (len + 1 >= size) {
4339
5.48k
      xmlChar *tmp;
4340
4341
5.48k
      size *= 2;
4342
5.48k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4343
5.48k
      if (tmp == NULL) {
4344
0
    xmlErrMemory(ctxt, NULL);
4345
0
    xmlFree(buf);
4346
0
    return(NULL);
4347
0
      }
4348
5.48k
      buf = tmp;
4349
5.48k
  }
4350
4.84M
  buf[len++] = cur;
4351
4.84M
  count++;
4352
4.84M
  if (count > 50) {
4353
45.9k
      SHRINK;
4354
45.9k
      GROW;
4355
45.9k
      count = 0;
4356
45.9k
            if (ctxt->instate == XML_PARSER_EOF) {
4357
0
    xmlFree(buf);
4358
0
    return(NULL);
4359
0
            }
4360
45.9k
  }
4361
4.84M
  NEXT;
4362
4.84M
  cur = CUR;
4363
4.84M
  if (cur == 0) {
4364
863
      GROW;
4365
863
      SHRINK;
4366
863
      cur = CUR;
4367
863
  }
4368
4.84M
        if (len > maxLength) {
4369
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4370
0
            xmlFree(buf);
4371
0
            return(NULL);
4372
0
        }
4373
4.84M
    }
4374
133k
    buf[len] = 0;
4375
133k
    if (cur != stop) {
4376
4.15k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4377
129k
    } else {
4378
129k
  NEXT;
4379
129k
    }
4380
133k
    ctxt->instate = oldstate;
4381
133k
    return(buf);
4382
133k
}
4383
4384
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4385
4386
/*
4387
 * used for the test in the inner loop of the char data testing
4388
 */
4389
static const unsigned char test_char_data[256] = {
4390
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4392
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4395
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4396
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4397
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4398
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4399
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4400
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4401
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4402
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4403
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4404
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4405
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4406
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4407
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4416
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4417
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4418
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4420
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4422
};
4423
4424
/**
4425
 * xmlParseCharData:
4426
 * @ctxt:  an XML parser context
4427
 * @cdata:  int indicating whether we are within a CDATA section
4428
 *
4429
 * DEPRECATED: Internal function, don't use.
4430
 *
4431
 * parse a CharData section.
4432
 * if we are within a CDATA section ']]>' marks an end of section.
4433
 *
4434
 * The right angle bracket (>) may be represented using the string "&gt;",
4435
 * and must, for compatibility, be escaped using "&gt;" or a character
4436
 * reference when it appears in the string "]]>" in content, when that
4437
 * string is not marking the end of a CDATA section.
4438
 *
4439
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4440
 */
4441
4442
void
4443
302M
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4444
302M
    const xmlChar *in;
4445
302M
    int nbchar = 0;
4446
302M
    int line = ctxt->input->line;
4447
302M
    int col = ctxt->input->col;
4448
302M
    int ccol;
4449
4450
302M
    SHRINK;
4451
302M
    GROW;
4452
    /*
4453
     * Accelerated common case where input don't need to be
4454
     * modified before passing it to the handler.
4455
     */
4456
302M
    if (!cdata) {
4457
302M
  in = ctxt->input->cur;
4458
338M
  do {
4459
394M
get_more_space:
4460
654M
      while (*in == 0x20) { in++; ctxt->input->col++; }
4461
394M
      if (*in == 0xA) {
4462
78.0M
    do {
4463
78.0M
        ctxt->input->line++; ctxt->input->col = 1;
4464
78.0M
        in++;
4465
78.0M
    } while (*in == 0xA);
4466
56.7M
    goto get_more_space;
4467
56.7M
      }
4468
338M
      if (*in == '<') {
4469
46.5M
    nbchar = in - ctxt->input->cur;
4470
46.5M
    if (nbchar > 0) {
4471
46.4M
        const xmlChar *tmp = ctxt->input->cur;
4472
46.4M
        ctxt->input->cur = in;
4473
4474
46.4M
        if ((ctxt->sax != NULL) &&
4475
46.4M
            (ctxt->sax->ignorableWhitespace !=
4476
46.4M
             ctxt->sax->characters)) {
4477
18.9M
      if (areBlanks(ctxt, tmp, nbchar, 1)) {
4478
4.49M
          if (ctxt->sax->ignorableWhitespace != NULL)
4479
4.49M
        ctxt->sax->ignorableWhitespace(ctxt->userData,
4480
4.49M
                   tmp, nbchar);
4481
14.5M
      } else {
4482
14.5M
          if (ctxt->sax->characters != NULL)
4483
14.5M
        ctxt->sax->characters(ctxt->userData,
4484
14.5M
                  tmp, nbchar);
4485
14.5M
          if (*ctxt->space == -1)
4486
3.94M
              *ctxt->space = -2;
4487
14.5M
      }
4488
27.4M
        } else if ((ctxt->sax != NULL) &&
4489
27.4M
                   (ctxt->sax->characters != NULL)) {
4490
27.4M
      ctxt->sax->characters(ctxt->userData,
4491
27.4M
                tmp, nbchar);
4492
27.4M
        }
4493
46.4M
    }
4494
46.5M
    return;
4495
46.5M
      }
4496
4497
351M
get_more:
4498
351M
            ccol = ctxt->input->col;
4499
3.71G
      while (test_char_data[*in]) {
4500
3.36G
    in++;
4501
3.36G
    ccol++;
4502
3.36G
      }
4503
351M
      ctxt->input->col = ccol;
4504
351M
      if (*in == 0xA) {
4505
105M
    do {
4506
105M
        ctxt->input->line++; ctxt->input->col = 1;
4507
105M
        in++;
4508
105M
    } while (*in == 0xA);
4509
54.7M
    goto get_more;
4510
54.7M
      }
4511
297M
      if (*in == ']') {
4512
6.39M
    if ((in[1] == ']') && (in[2] == '>')) {
4513
767k
        xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4514
767k
        ctxt->input->cur = in + 1;
4515
767k
        return;
4516
767k
    }
4517
5.62M
    in++;
4518
5.62M
    ctxt->input->col++;
4519
5.62M
    goto get_more;
4520
6.39M
      }
4521
290M
      nbchar = in - ctxt->input->cur;
4522
290M
      if (nbchar > 0) {
4523
137M
    if ((ctxt->sax != NULL) &&
4524
137M
        (ctxt->sax->ignorableWhitespace !=
4525
137M
         ctxt->sax->characters) &&
4526
137M
        (IS_BLANK_CH(*ctxt->input->cur))) {
4527
15.2M
        const xmlChar *tmp = ctxt->input->cur;
4528
15.2M
        ctxt->input->cur = in;
4529
4530
15.2M
        if (areBlanks(ctxt, tmp, nbchar, 0)) {
4531
2.47M
            if (ctxt->sax->ignorableWhitespace != NULL)
4532
2.47M
          ctxt->sax->ignorableWhitespace(ctxt->userData,
4533
2.47M
                 tmp, nbchar);
4534
12.7M
        } else {
4535
12.7M
            if (ctxt->sax->characters != NULL)
4536
12.7M
          ctxt->sax->characters(ctxt->userData,
4537
12.7M
              tmp, nbchar);
4538
12.7M
      if (*ctxt->space == -1)
4539
4.40M
          *ctxt->space = -2;
4540
12.7M
        }
4541
15.2M
                    line = ctxt->input->line;
4542
15.2M
                    col = ctxt->input->col;
4543
121M
    } else if (ctxt->sax != NULL) {
4544
121M
        if (ctxt->sax->characters != NULL)
4545
121M
      ctxt->sax->characters(ctxt->userData,
4546
121M
                ctxt->input->cur, nbchar);
4547
121M
                    line = ctxt->input->line;
4548
121M
                    col = ctxt->input->col;
4549
121M
    }
4550
                /* something really bad happened in the SAX callback */
4551
137M
                if (ctxt->instate != XML_PARSER_CONTENT)
4552
0
                    return;
4553
137M
      }
4554
290M
      ctxt->input->cur = in;
4555
290M
      if (*in == 0xD) {
4556
35.4M
    in++;
4557
35.4M
    if (*in == 0xA) {
4558
35.2M
        ctxt->input->cur = in;
4559
35.2M
        in++;
4560
35.2M
        ctxt->input->line++; ctxt->input->col = 1;
4561
35.2M
        continue; /* while */
4562
35.2M
    }
4563
212k
    in--;
4564
212k
      }
4565
255M
      if (*in == '<') {
4566
105M
    return;
4567
105M
      }
4568
149M
      if (*in == '&') {
4569
18.9M
    return;
4570
18.9M
      }
4571
130M
      SHRINK;
4572
130M
      GROW;
4573
130M
            if (ctxt->instate == XML_PARSER_EOF)
4574
0
    return;
4575
130M
      in = ctxt->input->cur;
4576
166M
  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4577
131M
  nbchar = 0;
4578
131M
    }
4579
131M
    ctxt->input->line = line;
4580
131M
    ctxt->input->col = col;
4581
131M
    xmlParseCharDataComplex(ctxt, cdata);
4582
131M
}
4583
4584
/**
4585
 * xmlParseCharDataComplex:
4586
 * @ctxt:  an XML parser context
4587
 * @cdata:  int indicating whether we are within a CDATA section
4588
 *
4589
 * parse a CharData section.this is the fallback function
4590
 * of xmlParseCharData() when the parsing requires handling
4591
 * of non-ASCII characters.
4592
 */
4593
static void
4594
131M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4595
131M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4596
131M
    int nbchar = 0;
4597
131M
    int cur, l;
4598
131M
    int count = 0;
4599
4600
131M
    SHRINK;
4601
131M
    GROW;
4602
131M
    cur = CUR_CHAR(l);
4603
1.66G
    while ((cur != '<') && /* checked */
4604
1.66G
           (cur != '&') &&
4605
1.66G
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4606
1.53G
  if ((cur == ']') && (NXT(1) == ']') &&
4607
1.53G
      (NXT(2) == '>')) {
4608
282k
      if (cdata) break;
4609
282k
      else {
4610
282k
    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4611
282k
      }
4612
282k
  }
4613
1.53G
  COPY_BUF(l,buf,nbchar,cur);
4614
  /* move current position before possible calling of ctxt->sax->characters */
4615
1.53G
  NEXTL(l);
4616
1.53G
  cur = CUR_CHAR(l);
4617
1.53G
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4618
3.75M
      buf[nbchar] = 0;
4619
4620
      /*
4621
       * OK the segment is to be consumed as chars.
4622
       */
4623
3.75M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4624
190k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4625
3.51k
        if (ctxt->sax->ignorableWhitespace != NULL)
4626
3.51k
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4627
3.51k
                                     buf, nbchar);
4628
186k
    } else {
4629
186k
        if (ctxt->sax->characters != NULL)
4630
186k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4631
186k
        if ((ctxt->sax->characters !=
4632
186k
             ctxt->sax->ignorableWhitespace) &&
4633
186k
      (*ctxt->space == -1))
4634
6.58k
      *ctxt->space = -2;
4635
186k
    }
4636
190k
      }
4637
3.75M
      nbchar = 0;
4638
            /* something really bad happened in the SAX callback */
4639
3.75M
            if (ctxt->instate != XML_PARSER_CONTENT)
4640
0
                return;
4641
3.75M
  }
4642
1.53G
  count++;
4643
1.53G
  if (count > 50) {
4644
26.8M
      SHRINK;
4645
26.8M
      GROW;
4646
26.8M
      count = 0;
4647
26.8M
            if (ctxt->instate == XML_PARSER_EOF)
4648
0
    return;
4649
26.8M
  }
4650
1.53G
    }
4651
131M
    if (nbchar != 0) {
4652
9.99M
        buf[nbchar] = 0;
4653
  /*
4654
   * OK the segment is to be consumed as chars.
4655
   */
4656
9.99M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4657
1.46M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4658
20.7k
    if (ctxt->sax->ignorableWhitespace != NULL)
4659
20.7k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4660
1.44M
      } else {
4661
1.44M
    if (ctxt->sax->characters != NULL)
4662
1.44M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4663
1.44M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4664
1.44M
        (*ctxt->space == -1))
4665
306k
        *ctxt->space = -2;
4666
1.44M
      }
4667
1.46M
  }
4668
9.99M
    }
4669
131M
    if ((cur != 0) && (!IS_CHAR(cur))) {
4670
  /* Generate the error and skip the offending character */
4671
120M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4672
120M
                          "PCDATA invalid Char value %d\n",
4673
120M
                    cur);
4674
120M
  NEXTL(l);
4675
120M
    }
4676
131M
}
4677
4678
/**
4679
 * xmlParseExternalID:
4680
 * @ctxt:  an XML parser context
4681
 * @publicID:  a xmlChar** receiving PubidLiteral
4682
 * @strict: indicate whether we should restrict parsing to only
4683
 *          production [75], see NOTE below
4684
 *
4685
 * DEPRECATED: Internal function, don't use.
4686
 *
4687
 * Parse an External ID or a Public ID
4688
 *
4689
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4690
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4691
 *
4692
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4693
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4694
 *
4695
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4696
 *
4697
 * Returns the function returns SystemLiteral and in the second
4698
 *                case publicID receives PubidLiteral, is strict is off
4699
 *                it is possible to return NULL and have publicID set.
4700
 */
4701
4702
xmlChar *
4703
739k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4704
739k
    xmlChar *URI = NULL;
4705
4706
739k
    SHRINK;
4707
4708
739k
    *publicID = NULL;
4709
739k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4710
257k
        SKIP(6);
4711
257k
  if (SKIP_BLANKS == 0) {
4712
709
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4713
709
                     "Space required after 'SYSTEM'\n");
4714
709
  }
4715
257k
  URI = xmlParseSystemLiteral(ctxt);
4716
257k
  if (URI == NULL) {
4717
1.00k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4718
1.00k
        }
4719
482k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4720
134k
        SKIP(6);
4721
134k
  if (SKIP_BLANKS == 0) {
4722
1.07k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4723
1.07k
        "Space required after 'PUBLIC'\n");
4724
1.07k
  }
4725
134k
  *publicID = xmlParsePubidLiteral(ctxt);
4726
134k
  if (*publicID == NULL) {
4727
1.05k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4728
1.05k
  }
4729
134k
  if (strict) {
4730
      /*
4731
       * We don't handle [83] so "S SystemLiteral" is required.
4732
       */
4733
129k
      if (SKIP_BLANKS == 0) {
4734
5.94k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4735
5.94k
      "Space required after the Public Identifier\n");
4736
5.94k
      }
4737
129k
  } else {
4738
      /*
4739
       * We handle [83] so we return immediately, if
4740
       * "S SystemLiteral" is not detected. We skip blanks if no
4741
             * system literal was found, but this is harmless since we must
4742
             * be at the end of a NotationDecl.
4743
       */
4744
5.14k
      if (SKIP_BLANKS == 0) return(NULL);
4745
572
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4746
572
  }
4747
130k
  URI = xmlParseSystemLiteral(ctxt);
4748
130k
  if (URI == NULL) {
4749
6.57k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4750
6.57k
        }
4751
130k
    }
4752
734k
    return(URI);
4753
739k
}
4754
4755
/**
4756
 * xmlParseCommentComplex:
4757
 * @ctxt:  an XML parser context
4758
 * @buf:  the already parsed part of the buffer
4759
 * @len:  number of bytes in the buffer
4760
 * @size:  allocated size of the buffer
4761
 *
4762
 * Skip an XML (SGML) comment <!-- .... -->
4763
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4764
 *  must not occur within comments. "
4765
 * This is the slow routine in case the accelerator for ascii didn't work
4766
 *
4767
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4768
 */
4769
static void
4770
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4771
1.57M
                       size_t len, size_t size) {
4772
1.57M
    int q, ql;
4773
1.57M
    int r, rl;
4774
1.57M
    int cur, l;
4775
1.57M
    size_t count = 0;
4776
1.57M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4777
1.35M
                       XML_MAX_HUGE_LENGTH :
4778
1.57M
                       XML_MAX_TEXT_LENGTH;
4779
1.57M
    int inputid;
4780
4781
1.57M
    inputid = ctxt->input->id;
4782
4783
1.57M
    if (buf == NULL) {
4784
47.7k
        len = 0;
4785
47.7k
  size = XML_PARSER_BUFFER_SIZE;
4786
47.7k
  buf = (xmlChar *) xmlMallocAtomic(size);
4787
47.7k
  if (buf == NULL) {
4788
0
      xmlErrMemory(ctxt, NULL);
4789
0
      return;
4790
0
  }
4791
47.7k
    }
4792
1.57M
    GROW; /* Assure there's enough input data */
4793
1.57M
    q = CUR_CHAR(ql);
4794
1.57M
    if (q == 0)
4795
146k
        goto not_terminated;
4796
1.43M
    if (!IS_CHAR(q)) {
4797
376k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4798
376k
                          "xmlParseComment: invalid xmlChar value %d\n",
4799
376k
                    q);
4800
376k
  xmlFree (buf);
4801
376k
  return;
4802
376k
    }
4803
1.05M
    NEXTL(ql);
4804
1.05M
    r = CUR_CHAR(rl);
4805
1.05M
    if (r == 0)
4806
62.6k
        goto not_terminated;
4807
993k
    if (!IS_CHAR(r)) {
4808
24.3k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4809
24.3k
                          "xmlParseComment: invalid xmlChar value %d\n",
4810
24.3k
                    q);
4811
24.3k
  xmlFree (buf);
4812
24.3k
  return;
4813
24.3k
    }
4814
968k
    NEXTL(rl);
4815
968k
    cur = CUR_CHAR(l);
4816
968k
    if (cur == 0)
4817
39.7k
        goto not_terminated;
4818
396M
    while (IS_CHAR(cur) && /* checked */
4819
396M
           ((cur != '>') ||
4820
395M
      (r != '-') || (q != '-'))) {
4821
395M
  if ((r == '-') && (q == '-')) {
4822
691k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4823
691k
  }
4824
395M
  if (len + 5 >= size) {
4825
542k
      xmlChar *new_buf;
4826
542k
            size_t new_size;
4827
4828
542k
      new_size = size * 2;
4829
542k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4830
542k
      if (new_buf == NULL) {
4831
0
    xmlFree (buf);
4832
0
    xmlErrMemory(ctxt, NULL);
4833
0
    return;
4834
0
      }
4835
542k
      buf = new_buf;
4836
542k
            size = new_size;
4837
542k
  }
4838
395M
  COPY_BUF(ql,buf,len,q);
4839
395M
  q = r;
4840
395M
  ql = rl;
4841
395M
  r = cur;
4842
395M
  rl = l;
4843
4844
395M
  count++;
4845
395M
  if (count > 50) {
4846
7.43M
      SHRINK;
4847
7.43M
      GROW;
4848
7.43M
      count = 0;
4849
7.43M
            if (ctxt->instate == XML_PARSER_EOF) {
4850
0
    xmlFree(buf);
4851
0
    return;
4852
0
            }
4853
7.43M
  }
4854
395M
  NEXTL(l);
4855
395M
  cur = CUR_CHAR(l);
4856
395M
  if (cur == 0) {
4857
125k
      SHRINK;
4858
125k
      GROW;
4859
125k
      cur = CUR_CHAR(l);
4860
125k
  }
4861
4862
395M
        if (len > maxLength) {
4863
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4864
0
                         "Comment too big found", NULL);
4865
0
            xmlFree (buf);
4866
0
            return;
4867
0
        }
4868
395M
    }
4869
928k
    buf[len] = 0;
4870
928k
    if (cur == 0) {
4871
125k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872
125k
                       "Comment not terminated \n<!--%.50s\n", buf);
4873
803k
    } else if (!IS_CHAR(cur)) {
4874
218k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4875
218k
                          "xmlParseComment: invalid xmlChar value %d\n",
4876
218k
                    cur);
4877
584k
    } else {
4878
584k
  if (inputid != ctxt->input->id) {
4879
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4880
0
               "Comment doesn't start and stop in the same"
4881
0
                           " entity\n");
4882
0
  }
4883
584k
        NEXT;
4884
584k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4885
584k
      (!ctxt->disableSAX))
4886
51.2k
      ctxt->sax->comment(ctxt->userData, buf);
4887
584k
    }
4888
928k
    xmlFree(buf);
4889
928k
    return;
4890
248k
not_terminated:
4891
248k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4892
248k
       "Comment not terminated\n", NULL);
4893
248k
    xmlFree(buf);
4894
248k
    return;
4895
928k
}
4896
4897
/**
4898
 * xmlParseComment:
4899
 * @ctxt:  an XML parser context
4900
 *
4901
 * DEPRECATED: Internal function, don't use.
4902
 *
4903
 * Skip an XML (SGML) comment <!-- .... -->
4904
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4905
 *  must not occur within comments. "
4906
 *
4907
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4908
 */
4909
void
4910
13.0M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4911
13.0M
    xmlChar *buf = NULL;
4912
13.0M
    size_t size = XML_PARSER_BUFFER_SIZE;
4913
13.0M
    size_t len = 0;
4914
13.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4915
10.2M
                       XML_MAX_HUGE_LENGTH :
4916
13.0M
                       XML_MAX_TEXT_LENGTH;
4917
13.0M
    xmlParserInputState state;
4918
13.0M
    const xmlChar *in;
4919
13.0M
    size_t nbchar = 0;
4920
13.0M
    int ccol;
4921
13.0M
    int inputid;
4922
4923
    /*
4924
     * Check that there is a comment right here.
4925
     */
4926
13.0M
    if ((RAW != '<') || (NXT(1) != '!') ||
4927
13.0M
        (NXT(2) != '-') || (NXT(3) != '-')) return;
4928
12.9M
    state = ctxt->instate;
4929
12.9M
    ctxt->instate = XML_PARSER_COMMENT;
4930
12.9M
    inputid = ctxt->input->id;
4931
12.9M
    SKIP(4);
4932
12.9M
    SHRINK;
4933
12.9M
    GROW;
4934
4935
    /*
4936
     * Accelerated common case where input don't need to be
4937
     * modified before passing it to the handler.
4938
     */
4939
12.9M
    in = ctxt->input->cur;
4940
12.9M
    do {
4941
12.9M
  if (*in == 0xA) {
4942
787k
      do {
4943
787k
    ctxt->input->line++; ctxt->input->col = 1;
4944
787k
    in++;
4945
787k
      } while (*in == 0xA);
4946
747k
  }
4947
45.1M
get_more:
4948
45.1M
        ccol = ctxt->input->col;
4949
1.12G
  while (((*in > '-') && (*in <= 0x7F)) ||
4950
1.12G
         ((*in >= 0x20) && (*in < '-')) ||
4951
1.12G
         (*in == 0x09)) {
4952
1.08G
        in++;
4953
1.08G
        ccol++;
4954
1.08G
  }
4955
45.1M
  ctxt->input->col = ccol;
4956
45.1M
  if (*in == 0xA) {
4957
11.8M
      do {
4958
11.8M
    ctxt->input->line++; ctxt->input->col = 1;
4959
11.8M
    in++;
4960
11.8M
      } while (*in == 0xA);
4961
11.0M
      goto get_more;
4962
11.0M
  }
4963
34.0M
  nbchar = in - ctxt->input->cur;
4964
  /*
4965
   * save current set of data
4966
   */
4967
34.0M
  if (nbchar > 0) {
4968
33.9M
      if ((ctxt->sax != NULL) &&
4969
33.9M
    (ctxt->sax->comment != NULL)) {
4970
33.9M
    if (buf == NULL) {
4971
12.9M
        if ((*in == '-') && (in[1] == '-'))
4972
9.09M
            size = nbchar + 1;
4973
3.85M
        else
4974
3.85M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4975
12.9M
        buf = (xmlChar *) xmlMallocAtomic(size);
4976
12.9M
        if (buf == NULL) {
4977
0
            xmlErrMemory(ctxt, NULL);
4978
0
      ctxt->instate = state;
4979
0
      return;
4980
0
        }
4981
12.9M
        len = 0;
4982
20.9M
    } else if (len + nbchar + 1 >= size) {
4983
1.40M
        xmlChar *new_buf;
4984
1.40M
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4985
1.40M
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4986
1.40M
        if (new_buf == NULL) {
4987
0
            xmlFree (buf);
4988
0
      xmlErrMemory(ctxt, NULL);
4989
0
      ctxt->instate = state;
4990
0
      return;
4991
0
        }
4992
1.40M
        buf = new_buf;
4993
1.40M
    }
4994
33.9M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4995
33.9M
    len += nbchar;
4996
33.9M
    buf[len] = 0;
4997
33.9M
      }
4998
33.9M
  }
4999
34.0M
        if (len > maxLength) {
5000
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5001
0
                         "Comment too big found", NULL);
5002
0
            xmlFree (buf);
5003
0
            return;
5004
0
        }
5005
34.0M
  ctxt->input->cur = in;
5006
34.0M
  if (*in == 0xA) {
5007
0
      in++;
5008
0
      ctxt->input->line++; ctxt->input->col = 1;
5009
0
  }
5010
34.0M
  if (*in == 0xD) {
5011
9.71M
      in++;
5012
9.71M
      if (*in == 0xA) {
5013
9.66M
    ctxt->input->cur = in;
5014
9.66M
    in++;
5015
9.66M
    ctxt->input->line++; ctxt->input->col = 1;
5016
9.66M
    goto get_more;
5017
9.66M
      }
5018
45.9k
      in--;
5019
45.9k
  }
5020
24.3M
  SHRINK;
5021
24.3M
  GROW;
5022
24.3M
        if (ctxt->instate == XML_PARSER_EOF) {
5023
0
            xmlFree(buf);
5024
0
            return;
5025
0
        }
5026
24.3M
  in = ctxt->input->cur;
5027
24.3M
  if (*in == '-') {
5028
22.8M
      if (in[1] == '-') {
5029
15.1M
          if (in[2] == '>') {
5030
11.4M
        if (ctxt->input->id != inputid) {
5031
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5032
0
                     "comment doesn't start and stop in the"
5033
0
                                       " same entity\n");
5034
0
        }
5035
11.4M
        SKIP(3);
5036
11.4M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5037
11.4M
            (!ctxt->disableSAX)) {
5038
2.36M
      if (buf != NULL)
5039
2.35M
          ctxt->sax->comment(ctxt->userData, buf);
5040
6.48k
      else
5041
6.48k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5042
2.36M
        }
5043
11.4M
        if (buf != NULL)
5044
11.4M
            xmlFree(buf);
5045
11.4M
        if (ctxt->instate != XML_PARSER_EOF)
5046
11.4M
      ctxt->instate = state;
5047
11.4M
        return;
5048
11.4M
    }
5049
3.68M
    if (buf != NULL) {
5050
3.68M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5051
3.68M
                          "Double hyphen within comment: "
5052
3.68M
                                      "<!--%.50s\n",
5053
3.68M
              buf);
5054
3.68M
    } else
5055
6.68k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5056
6.68k
                          "Double hyphen within comment\n", NULL);
5057
3.68M
                if (ctxt->instate == XML_PARSER_EOF) {
5058
0
                    xmlFree(buf);
5059
0
                    return;
5060
0
                }
5061
3.68M
    in++;
5062
3.68M
    ctxt->input->col++;
5063
3.68M
      }
5064
11.3M
      in++;
5065
11.3M
      ctxt->input->col++;
5066
11.3M
      goto get_more;
5067
22.8M
  }
5068
24.3M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5069
1.57M
    xmlParseCommentComplex(ctxt, buf, len, size);
5070
1.57M
    ctxt->instate = state;
5071
1.57M
    return;
5072
12.9M
}
5073
5074
5075
/**
5076
 * xmlParsePITarget:
5077
 * @ctxt:  an XML parser context
5078
 *
5079
 * DEPRECATED: Internal function, don't use.
5080
 *
5081
 * parse the name of a PI
5082
 *
5083
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5084
 *
5085
 * Returns the PITarget name or NULL
5086
 */
5087
5088
const xmlChar *
5089
3.50M
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5090
3.50M
    const xmlChar *name;
5091
5092
3.50M
    name = xmlParseName(ctxt);
5093
3.50M
    if ((name != NULL) &&
5094
3.50M
        ((name[0] == 'x') || (name[0] == 'X')) &&
5095
3.50M
        ((name[1] == 'm') || (name[1] == 'M')) &&
5096
3.50M
        ((name[2] == 'l') || (name[2] == 'L'))) {
5097
1.04M
  int i;
5098
1.04M
  if ((name[0] == 'x') && (name[1] == 'm') &&
5099
1.04M
      (name[2] == 'l') && (name[3] == 0)) {
5100
739k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5101
739k
     "XML declaration allowed only at the start of the document\n");
5102
739k
      return(name);
5103
739k
  } else if (name[3] == 0) {
5104
7.69k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5105
7.69k
      return(name);
5106
7.69k
  }
5107
452k
  for (i = 0;;i++) {
5108
452k
      if (xmlW3CPIs[i] == NULL) break;
5109
375k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5110
221k
          return(name);
5111
375k
  }
5112
77.2k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5113
77.2k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5114
77.2k
          NULL, NULL);
5115
77.2k
    }
5116
2.53M
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5117
26.6k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5118
26.6k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5119
26.6k
    }
5120
2.53M
    return(name);
5121
3.50M
}
5122
5123
#ifdef LIBXML_CATALOG_ENABLED
5124
/**
5125
 * xmlParseCatalogPI:
5126
 * @ctxt:  an XML parser context
5127
 * @catalog:  the PI value string
5128
 *
5129
 * parse an XML Catalog Processing Instruction.
5130
 *
5131
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5132
 *
5133
 * Occurs only if allowed by the user and if happening in the Misc
5134
 * part of the document before any doctype information
5135
 * This will add the given catalog to the parsing context in order
5136
 * to be used if there is a resolution need further down in the document
5137
 */
5138
5139
static void
5140
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5141
0
    xmlChar *URL = NULL;
5142
0
    const xmlChar *tmp, *base;
5143
0
    xmlChar marker;
5144
5145
0
    tmp = catalog;
5146
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5147
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5148
0
  goto error;
5149
0
    tmp += 7;
5150
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5151
0
    if (*tmp != '=') {
5152
0
  return;
5153
0
    }
5154
0
    tmp++;
5155
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5156
0
    marker = *tmp;
5157
0
    if ((marker != '\'') && (marker != '"'))
5158
0
  goto error;
5159
0
    tmp++;
5160
0
    base = tmp;
5161
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5162
0
    if (*tmp == 0)
5163
0
  goto error;
5164
0
    URL = xmlStrndup(base, tmp - base);
5165
0
    tmp++;
5166
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5167
0
    if (*tmp != 0)
5168
0
  goto error;
5169
5170
0
    if (URL != NULL) {
5171
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5172
0
  xmlFree(URL);
5173
0
    }
5174
0
    return;
5175
5176
0
error:
5177
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5178
0
            "Catalog PI syntax error: %s\n",
5179
0
      catalog, NULL);
5180
0
    if (URL != NULL)
5181
0
  xmlFree(URL);
5182
0
}
5183
#endif
5184
5185
/**
5186
 * xmlParsePI:
5187
 * @ctxt:  an XML parser context
5188
 *
5189
 * DEPRECATED: Internal function, don't use.
5190
 *
5191
 * parse an XML Processing Instruction.
5192
 *
5193
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5194
 *
5195
 * The processing is transferred to SAX once parsed.
5196
 */
5197
5198
void
5199
3.50M
xmlParsePI(xmlParserCtxtPtr ctxt) {
5200
3.50M
    xmlChar *buf = NULL;
5201
3.50M
    size_t len = 0;
5202
3.50M
    size_t size = XML_PARSER_BUFFER_SIZE;
5203
3.50M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5204
2.97M
                       XML_MAX_HUGE_LENGTH :
5205
3.50M
                       XML_MAX_TEXT_LENGTH;
5206
3.50M
    int cur, l;
5207
3.50M
    const xmlChar *target;
5208
3.50M
    xmlParserInputState state;
5209
3.50M
    int count = 0;
5210
5211
3.50M
    if ((RAW == '<') && (NXT(1) == '?')) {
5212
3.50M
  int inputid = ctxt->input->id;
5213
3.50M
  state = ctxt->instate;
5214
3.50M
        ctxt->instate = XML_PARSER_PI;
5215
  /*
5216
   * this is a Processing Instruction.
5217
   */
5218
3.50M
  SKIP(2);
5219
3.50M
  SHRINK;
5220
5221
  /*
5222
   * Parse the target name and check for special support like
5223
   * namespace.
5224
   */
5225
3.50M
        target = xmlParsePITarget(ctxt);
5226
3.50M
  if (target != NULL) {
5227
3.23M
      if ((RAW == '?') && (NXT(1) == '>')) {
5228
470k
    if (inputid != ctxt->input->id) {
5229
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5230
0
                             "PI declaration doesn't start and stop in"
5231
0
                                   " the same entity\n");
5232
0
    }
5233
470k
    SKIP(2);
5234
5235
    /*
5236
     * SAX: PI detected.
5237
     */
5238
470k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5239
470k
        (ctxt->sax->processingInstruction != NULL))
5240
123k
        ctxt->sax->processingInstruction(ctxt->userData,
5241
123k
                                         target, NULL);
5242
470k
    if (ctxt->instate != XML_PARSER_EOF)
5243
470k
        ctxt->instate = state;
5244
470k
    return;
5245
470k
      }
5246
2.76M
      buf = (xmlChar *) xmlMallocAtomic(size);
5247
2.76M
      if (buf == NULL) {
5248
0
    xmlErrMemory(ctxt, NULL);
5249
0
    ctxt->instate = state;
5250
0
    return;
5251
0
      }
5252
2.76M
      if (SKIP_BLANKS == 0) {
5253
636k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5254
636k
        "ParsePI: PI %s space expected\n", target);
5255
636k
      }
5256
2.76M
      cur = CUR_CHAR(l);
5257
374M
      while (IS_CHAR(cur) && /* checked */
5258
374M
       ((cur != '?') || (NXT(1) != '>'))) {
5259
371M
    if (len + 5 >= size) {
5260
1.07M
        xmlChar *tmp;
5261
1.07M
                    size_t new_size = size * 2;
5262
1.07M
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5263
1.07M
        if (tmp == NULL) {
5264
0
      xmlErrMemory(ctxt, NULL);
5265
0
      xmlFree(buf);
5266
0
      ctxt->instate = state;
5267
0
      return;
5268
0
        }
5269
1.07M
        buf = tmp;
5270
1.07M
                    size = new_size;
5271
1.07M
    }
5272
371M
    count++;
5273
371M
    if (count > 50) {
5274
6.36M
        SHRINK;
5275
6.36M
        GROW;
5276
6.36M
                    if (ctxt->instate == XML_PARSER_EOF) {
5277
0
                        xmlFree(buf);
5278
0
                        return;
5279
0
                    }
5280
6.36M
        count = 0;
5281
6.36M
    }
5282
371M
    COPY_BUF(l,buf,len,cur);
5283
371M
    NEXTL(l);
5284
371M
    cur = CUR_CHAR(l);
5285
371M
    if (cur == 0) {
5286
171k
        SHRINK;
5287
171k
        GROW;
5288
171k
        cur = CUR_CHAR(l);
5289
171k
    }
5290
371M
                if (len > maxLength) {
5291
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5292
0
                                      "PI %s too big found", target);
5293
0
                    xmlFree(buf);
5294
0
                    ctxt->instate = state;
5295
0
                    return;
5296
0
                }
5297
371M
      }
5298
2.76M
      buf[len] = 0;
5299
2.76M
      if (cur != '?') {
5300
402k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5301
402k
          "ParsePI: PI %s never end ...\n", target);
5302
2.36M
      } else {
5303
2.36M
    if (inputid != ctxt->input->id) {
5304
63
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5305
63
                             "PI declaration doesn't start and stop in"
5306
63
                                   " the same entity\n");
5307
63
    }
5308
2.36M
    SKIP(2);
5309
5310
2.36M
#ifdef LIBXML_CATALOG_ENABLED
5311
2.36M
    if (((state == XML_PARSER_MISC) ||
5312
2.36M
               (state == XML_PARSER_START)) &&
5313
2.36M
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5314
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5315
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5316
0
      (allow == XML_CATA_ALLOW_ALL))
5317
0
      xmlParseCatalogPI(ctxt, buf);
5318
0
    }
5319
2.36M
#endif
5320
5321
5322
    /*
5323
     * SAX: PI detected.
5324
     */
5325
2.36M
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5326
2.36M
        (ctxt->sax->processingInstruction != NULL))
5327
229k
        ctxt->sax->processingInstruction(ctxt->userData,
5328
229k
                                         target, buf);
5329
2.36M
      }
5330
2.76M
      xmlFree(buf);
5331
2.76M
  } else {
5332
266k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5333
266k
  }
5334
3.03M
  if (ctxt->instate != XML_PARSER_EOF)
5335
3.03M
      ctxt->instate = state;
5336
3.03M
    }
5337
3.50M
}
5338
5339
/**
5340
 * xmlParseNotationDecl:
5341
 * @ctxt:  an XML parser context
5342
 *
5343
 * DEPRECATED: Internal function, don't use.
5344
 *
5345
 * parse a notation declaration
5346
 *
5347
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5348
 *
5349
 * Hence there is actually 3 choices:
5350
 *     'PUBLIC' S PubidLiteral
5351
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5352
 * and 'SYSTEM' S SystemLiteral
5353
 *
5354
 * See the NOTE on xmlParseExternalID().
5355
 */
5356
5357
void
5358
14.8k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5359
14.8k
    const xmlChar *name;
5360
14.8k
    xmlChar *Pubid;
5361
14.8k
    xmlChar *Systemid;
5362
5363
14.8k
    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5364
11.6k
  int inputid = ctxt->input->id;
5365
11.6k
  SHRINK;
5366
11.6k
  SKIP(10);
5367
11.6k
  if (SKIP_BLANKS == 0) {
5368
415
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5369
415
         "Space required after '<!NOTATION'\n");
5370
415
      return;
5371
415
  }
5372
5373
11.2k
        name = xmlParseName(ctxt);
5374
11.2k
  if (name == NULL) {
5375
251
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5376
251
      return;
5377
251
  }
5378
11.0k
  if (xmlStrchr(name, ':') != NULL) {
5379
472
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5380
472
         "colons are forbidden from notation names '%s'\n",
5381
472
         name, NULL, NULL);
5382
472
  }
5383
11.0k
  if (SKIP_BLANKS == 0) {
5384
360
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385
360
         "Space required after the NOTATION name'\n");
5386
360
      return;
5387
360
  }
5388
5389
  /*
5390
   * Parse the IDs.
5391
   */
5392
10.6k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5393
10.6k
  SKIP_BLANKS;
5394
5395
10.6k
  if (RAW == '>') {
5396
8.84k
      if (inputid != ctxt->input->id) {
5397
3
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5398
3
                         "Notation declaration doesn't start and stop"
5399
3
                               " in the same entity\n");
5400
3
      }
5401
8.84k
      NEXT;
5402
8.84k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5403
8.84k
    (ctxt->sax->notationDecl != NULL))
5404
7.70k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5405
8.84k
  } else {
5406
1.80k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5407
1.80k
  }
5408
10.6k
  if (Systemid != NULL) xmlFree(Systemid);
5409
10.6k
  if (Pubid != NULL) xmlFree(Pubid);
5410
10.6k
    }
5411
14.8k
}
5412
5413
/**
5414
 * xmlParseEntityDecl:
5415
 * @ctxt:  an XML parser context
5416
 *
5417
 * DEPRECATED: Internal function, don't use.
5418
 *
5419
 * parse <!ENTITY declarations
5420
 *
5421
 * [70] EntityDecl ::= GEDecl | PEDecl
5422
 *
5423
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5424
 *
5425
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5426
 *
5427
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5428
 *
5429
 * [74] PEDef ::= EntityValue | ExternalID
5430
 *
5431
 * [76] NDataDecl ::= S 'NDATA' S Name
5432
 *
5433
 * [ VC: Notation Declared ]
5434
 * The Name must match the declared name of a notation.
5435
 */
5436
5437
void
5438
2.42M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5439
2.42M
    const xmlChar *name = NULL;
5440
2.42M
    xmlChar *value = NULL;
5441
2.42M
    xmlChar *URI = NULL, *literal = NULL;
5442
2.42M
    const xmlChar *ndata = NULL;
5443
2.42M
    int isParameter = 0;
5444
2.42M
    xmlChar *orig = NULL;
5445
5446
    /* GROW; done in the caller */
5447
2.42M
    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5448
2.41M
  int inputid = ctxt->input->id;
5449
2.41M
  SHRINK;
5450
2.41M
  SKIP(8);
5451
2.41M
  if (SKIP_BLANKS == 0) {
5452
18.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5453
18.4k
         "Space required after '<!ENTITY'\n");
5454
18.4k
  }
5455
5456
2.41M
  if (RAW == '%') {
5457
1.43M
      NEXT;
5458
1.43M
      if (SKIP_BLANKS == 0) {
5459
624
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
624
             "Space required after '%%'\n");
5461
624
      }
5462
1.43M
      isParameter = 1;
5463
1.43M
  }
5464
5465
2.41M
        name = xmlParseName(ctxt);
5466
2.41M
  if (name == NULL) {
5467
94.2k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5468
94.2k
                     "xmlParseEntityDecl: no name\n");
5469
94.2k
            return;
5470
94.2k
  }
5471
2.32M
  if (xmlStrchr(name, ':') != NULL) {
5472
778
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5473
778
         "colons are forbidden from entities names '%s'\n",
5474
778
         name, NULL, NULL);
5475
778
  }
5476
2.32M
  if (SKIP_BLANKS == 0) {
5477
11.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478
11.1k
         "Space required after the entity name\n");
5479
11.1k
  }
5480
5481
2.32M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5482
  /*
5483
   * handle the various case of definitions...
5484
   */
5485
2.32M
  if (isParameter) {
5486
1.43M
      if ((RAW == '"') || (RAW == '\'')) {
5487
1.32M
          value = xmlParseEntityValue(ctxt, &orig);
5488
1.32M
    if (value) {
5489
1.28M
        if ((ctxt->sax != NULL) &&
5490
1.28M
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5491
1.20M
      ctxt->sax->entityDecl(ctxt->userData, name,
5492
1.20M
                        XML_INTERNAL_PARAMETER_ENTITY,
5493
1.20M
            NULL, NULL, value);
5494
1.28M
    }
5495
1.32M
      } else {
5496
106k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5497
106k
    if ((URI == NULL) && (literal == NULL)) {
5498
2.73k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5499
2.73k
    }
5500
106k
    if (URI) {
5501
103k
        xmlURIPtr uri;
5502
5503
103k
        uri = xmlParseURI((const char *) URI);
5504
103k
        if (uri == NULL) {
5505
4.93k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5506
4.93k
             "Invalid URI: %s\n", URI);
5507
      /*
5508
       * This really ought to be a well formedness error
5509
       * but the XML Core WG decided otherwise c.f. issue
5510
       * E26 of the XML erratas.
5511
       */
5512
98.4k
        } else {
5513
98.4k
      if (uri->fragment != NULL) {
5514
          /*
5515
           * Okay this is foolish to block those but not
5516
           * invalid URIs.
5517
           */
5518
172
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5519
98.2k
      } else {
5520
98.2k
          if ((ctxt->sax != NULL) &&
5521
98.2k
        (!ctxt->disableSAX) &&
5522
98.2k
        (ctxt->sax->entityDecl != NULL))
5523
90.5k
        ctxt->sax->entityDecl(ctxt->userData, name,
5524
90.5k
              XML_EXTERNAL_PARAMETER_ENTITY,
5525
90.5k
              literal, URI, NULL);
5526
98.2k
      }
5527
98.4k
      xmlFreeURI(uri);
5528
98.4k
        }
5529
103k
    }
5530
106k
      }
5531
1.43M
  } else {
5532
893k
      if ((RAW == '"') || (RAW == '\'')) {
5533
782k
          value = xmlParseEntityValue(ctxt, &orig);
5534
782k
    if ((ctxt->sax != NULL) &&
5535
782k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5536
656k
        ctxt->sax->entityDecl(ctxt->userData, name,
5537
656k
        XML_INTERNAL_GENERAL_ENTITY,
5538
656k
        NULL, NULL, value);
5539
    /*
5540
     * For expat compatibility in SAX mode.
5541
     */
5542
782k
    if ((ctxt->myDoc == NULL) ||
5543
782k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5544
20.4k
        if (ctxt->myDoc == NULL) {
5545
1.98k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5546
1.98k
      if (ctxt->myDoc == NULL) {
5547
0
          xmlErrMemory(ctxt, "New Doc failed");
5548
0
          return;
5549
0
      }
5550
1.98k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5551
1.98k
        }
5552
20.4k
        if (ctxt->myDoc->intSubset == NULL)
5553
1.98k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5554
1.98k
              BAD_CAST "fake", NULL, NULL);
5555
5556
20.4k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5557
20.4k
                    NULL, NULL, value);
5558
20.4k
    }
5559
782k
      } else {
5560
111k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5561
111k
    if ((URI == NULL) && (literal == NULL)) {
5562
8.46k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5563
8.46k
    }
5564
111k
    if (URI) {
5565
100k
        xmlURIPtr uri;
5566
5567
100k
        uri = xmlParseURI((const char *)URI);
5568
100k
        if (uri == NULL) {
5569
5.19k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5570
5.19k
             "Invalid URI: %s\n", URI);
5571
      /*
5572
       * This really ought to be a well formedness error
5573
       * but the XML Core WG decided otherwise c.f. issue
5574
       * E26 of the XML erratas.
5575
       */
5576
95.4k
        } else {
5577
95.4k
      if (uri->fragment != NULL) {
5578
          /*
5579
           * Okay this is foolish to block those but not
5580
           * invalid URIs.
5581
           */
5582
1.04k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5583
1.04k
      }
5584
95.4k
      xmlFreeURI(uri);
5585
95.4k
        }
5586
100k
    }
5587
111k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5588
10.0k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5589
10.0k
           "Space required before 'NDATA'\n");
5590
10.0k
    }
5591
111k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5592
32.6k
        SKIP(5);
5593
32.6k
        if (SKIP_BLANKS == 0) {
5594
386
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5595
386
               "Space required after 'NDATA'\n");
5596
386
        }
5597
32.6k
        ndata = xmlParseName(ctxt);
5598
32.6k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5599
32.6k
            (ctxt->sax->unparsedEntityDecl != NULL))
5600
31.4k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5601
31.4k
            literal, URI, ndata);
5602
78.4k
    } else {
5603
78.4k
        if ((ctxt->sax != NULL) &&
5604
78.4k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5605
67.9k
      ctxt->sax->entityDecl(ctxt->userData, name,
5606
67.9k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5607
67.9k
            literal, URI, NULL);
5608
        /*
5609
         * For expat compatibility in SAX mode.
5610
         * assuming the entity replacement was asked for
5611
         */
5612
78.4k
        if ((ctxt->replaceEntities != 0) &&
5613
78.4k
      ((ctxt->myDoc == NULL) ||
5614
39.0k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5615
1.45k
      if (ctxt->myDoc == NULL) {
5616
822
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5617
822
          if (ctxt->myDoc == NULL) {
5618
0
              xmlErrMemory(ctxt, "New Doc failed");
5619
0
        return;
5620
0
          }
5621
822
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5622
822
      }
5623
5624
1.45k
      if (ctxt->myDoc->intSubset == NULL)
5625
822
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5626
822
            BAD_CAST "fake", NULL, NULL);
5627
1.45k
      xmlSAX2EntityDecl(ctxt, name,
5628
1.45k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5629
1.45k
                  literal, URI, NULL);
5630
1.45k
        }
5631
78.4k
    }
5632
111k
      }
5633
893k
  }
5634
2.32M
  if (ctxt->instate == XML_PARSER_EOF)
5635
0
      goto done;
5636
2.32M
  SKIP_BLANKS;
5637
2.32M
  if (RAW != '>') {
5638
23.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5639
23.9k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5640
23.9k
      xmlHaltParser(ctxt);
5641
2.30M
  } else {
5642
2.30M
      if (inputid != ctxt->input->id) {
5643
4.63k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5644
4.63k
                         "Entity declaration doesn't start and stop in"
5645
4.63k
                               " the same entity\n");
5646
4.63k
      }
5647
2.30M
      NEXT;
5648
2.30M
  }
5649
2.32M
  if (orig != NULL) {
5650
      /*
5651
       * Ugly mechanism to save the raw entity value.
5652
       */
5653
2.08M
      xmlEntityPtr cur = NULL;
5654
5655
2.08M
      if (isParameter) {
5656
1.31M
          if ((ctxt->sax != NULL) &&
5657
1.31M
        (ctxt->sax->getParameterEntity != NULL))
5658
1.31M
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5659
1.31M
      } else {
5660
770k
          if ((ctxt->sax != NULL) &&
5661
770k
        (ctxt->sax->getEntity != NULL))
5662
770k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5663
770k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5664
92.3k
        cur = xmlSAX2GetEntity(ctxt, name);
5665
92.3k
    }
5666
770k
      }
5667
2.08M
            if ((cur != NULL) && (cur->orig == NULL)) {
5668
1.47M
    cur->orig = orig;
5669
1.47M
                orig = NULL;
5670
1.47M
      }
5671
2.08M
  }
5672
5673
2.32M
done:
5674
2.32M
  if (value != NULL) xmlFree(value);
5675
2.32M
  if (URI != NULL) xmlFree(URI);
5676
2.32M
  if (literal != NULL) xmlFree(literal);
5677
2.32M
        if (orig != NULL) xmlFree(orig);
5678
2.32M
    }
5679
2.42M
}
5680
5681
/**
5682
 * xmlParseDefaultDecl:
5683
 * @ctxt:  an XML parser context
5684
 * @value:  Receive a possible fixed default value for the attribute
5685
 *
5686
 * DEPRECATED: Internal function, don't use.
5687
 *
5688
 * Parse an attribute default declaration
5689
 *
5690
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5691
 *
5692
 * [ VC: Required Attribute ]
5693
 * if the default declaration is the keyword #REQUIRED, then the
5694
 * attribute must be specified for all elements of the type in the
5695
 * attribute-list declaration.
5696
 *
5697
 * [ VC: Attribute Default Legal ]
5698
 * The declared default value must meet the lexical constraints of
5699
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5700
 *
5701
 * [ VC: Fixed Attribute Default ]
5702
 * if an attribute has a default value declared with the #FIXED
5703
 * keyword, instances of that attribute must match the default value.
5704
 *
5705
 * [ WFC: No < in Attribute Values ]
5706
 * handled in xmlParseAttValue()
5707
 *
5708
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5709
 *          or XML_ATTRIBUTE_FIXED.
5710
 */
5711
5712
int
5713
3.74M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5714
3.74M
    int val;
5715
3.74M
    xmlChar *ret;
5716
5717
3.74M
    *value = NULL;
5718
3.74M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5719
654k
  SKIP(9);
5720
654k
  return(XML_ATTRIBUTE_REQUIRED);
5721
654k
    }
5722
3.09M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5723
2.58M
  SKIP(8);
5724
2.58M
  return(XML_ATTRIBUTE_IMPLIED);
5725
2.58M
    }
5726
508k
    val = XML_ATTRIBUTE_NONE;
5727
508k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5728
272k
  SKIP(6);
5729
272k
  val = XML_ATTRIBUTE_FIXED;
5730
272k
  if (SKIP_BLANKS == 0) {
5731
351
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
351
         "Space required after '#FIXED'\n");
5733
351
  }
5734
272k
    }
5735
508k
    ret = xmlParseAttValue(ctxt);
5736
508k
    ctxt->instate = XML_PARSER_DTD;
5737
508k
    if (ret == NULL) {
5738
7.59k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5739
7.59k
           "Attribute default value declaration error\n");
5740
7.59k
    } else
5741
500k
        *value = ret;
5742
508k
    return(val);
5743
3.09M
}
5744
5745
/**
5746
 * xmlParseNotationType:
5747
 * @ctxt:  an XML parser context
5748
 *
5749
 * DEPRECATED: Internal function, don't use.
5750
 *
5751
 * parse an Notation attribute type.
5752
 *
5753
 * Note: the leading 'NOTATION' S part has already being parsed...
5754
 *
5755
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5756
 *
5757
 * [ VC: Notation Attributes ]
5758
 * Values of this type must match one of the notation names included
5759
 * in the declaration; all notation names in the declaration must be declared.
5760
 *
5761
 * Returns: the notation attribute tree built while parsing
5762
 */
5763
5764
xmlEnumerationPtr
5765
5.27k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5766
5.27k
    const xmlChar *name;
5767
5.27k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5768
5769
5.27k
    if (RAW != '(') {
5770
271
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5771
271
  return(NULL);
5772
271
    }
5773
4.99k
    SHRINK;
5774
6.28k
    do {
5775
6.28k
        NEXT;
5776
6.28k
  SKIP_BLANKS;
5777
6.28k
        name = xmlParseName(ctxt);
5778
6.28k
  if (name == NULL) {
5779
329
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5780
329
         "Name expected in NOTATION declaration\n");
5781
329
            xmlFreeEnumeration(ret);
5782
329
      return(NULL);
5783
329
  }
5784
5.95k
  tmp = ret;
5785
9.00k
  while (tmp != NULL) {
5786
3.35k
      if (xmlStrEqual(name, tmp->name)) {
5787
308
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5788
308
    "standalone: attribute notation value token %s duplicated\n",
5789
308
         name, NULL);
5790
308
    if (!xmlDictOwns(ctxt->dict, name))
5791
0
        xmlFree((xmlChar *) name);
5792
308
    break;
5793
308
      }
5794
3.04k
      tmp = tmp->next;
5795
3.04k
  }
5796
5.95k
  if (tmp == NULL) {
5797
5.65k
      cur = xmlCreateEnumeration(name);
5798
5.65k
      if (cur == NULL) {
5799
0
                xmlFreeEnumeration(ret);
5800
0
                return(NULL);
5801
0
            }
5802
5.65k
      if (last == NULL) ret = last = cur;
5803
916
      else {
5804
916
    last->next = cur;
5805
916
    last = cur;
5806
916
      }
5807
5.65k
  }
5808
5.95k
  SKIP_BLANKS;
5809
5.95k
    } while (RAW == '|');
5810
4.67k
    if (RAW != ')') {
5811
564
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5812
564
        xmlFreeEnumeration(ret);
5813
564
  return(NULL);
5814
564
    }
5815
4.10k
    NEXT;
5816
4.10k
    return(ret);
5817
4.67k
}
5818
5819
/**
5820
 * xmlParseEnumerationType:
5821
 * @ctxt:  an XML parser context
5822
 *
5823
 * DEPRECATED: Internal function, don't use.
5824
 *
5825
 * parse an Enumeration attribute type.
5826
 *
5827
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5828
 *
5829
 * [ VC: Enumeration ]
5830
 * Values of this type must match one of the Nmtoken tokens in
5831
 * the declaration
5832
 *
5833
 * Returns: the enumeration attribute tree built while parsing
5834
 */
5835
5836
xmlEnumerationPtr
5837
229k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5838
229k
    xmlChar *name;
5839
229k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5840
5841
229k
    if (RAW != '(') {
5842
13.7k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5843
13.7k
  return(NULL);
5844
13.7k
    }
5845
215k
    SHRINK;
5846
749k
    do {
5847
749k
        NEXT;
5848
749k
  SKIP_BLANKS;
5849
749k
        name = xmlParseNmtoken(ctxt);
5850
749k
  if (name == NULL) {
5851
577
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5852
577
      return(ret);
5853
577
  }
5854
748k
  tmp = ret;
5855
2.07M
  while (tmp != NULL) {
5856
1.32M
      if (xmlStrEqual(name, tmp->name)) {
5857
584
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5858
584
    "standalone: attribute enumeration value token %s duplicated\n",
5859
584
         name, NULL);
5860
584
    if (!xmlDictOwns(ctxt->dict, name))
5861
584
        xmlFree(name);
5862
584
    break;
5863
584
      }
5864
1.32M
      tmp = tmp->next;
5865
1.32M
  }
5866
748k
  if (tmp == NULL) {
5867
747k
      cur = xmlCreateEnumeration(name);
5868
747k
      if (!xmlDictOwns(ctxt->dict, name))
5869
747k
    xmlFree(name);
5870
747k
      if (cur == NULL) {
5871
0
                xmlFreeEnumeration(ret);
5872
0
                return(NULL);
5873
0
            }
5874
747k
      if (last == NULL) ret = last = cur;
5875
532k
      else {
5876
532k
    last->next = cur;
5877
532k
    last = cur;
5878
532k
      }
5879
747k
  }
5880
748k
  SKIP_BLANKS;
5881
748k
    } while (RAW == '|');
5882
215k
    if (RAW != ')') {
5883
2.40k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5884
2.40k
  return(ret);
5885
2.40k
    }
5886
212k
    NEXT;
5887
212k
    return(ret);
5888
215k
}
5889
5890
/**
5891
 * xmlParseEnumeratedType:
5892
 * @ctxt:  an XML parser context
5893
 * @tree:  the enumeration tree built while parsing
5894
 *
5895
 * DEPRECATED: Internal function, don't use.
5896
 *
5897
 * parse an Enumerated attribute type.
5898
 *
5899
 * [57] EnumeratedType ::= NotationType | Enumeration
5900
 *
5901
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5902
 *
5903
 *
5904
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5905
 */
5906
5907
int
5908
234k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5909
234k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5910
5.59k
  SKIP(8);
5911
5.59k
  if (SKIP_BLANKS == 0) {
5912
325
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5913
325
         "Space required after 'NOTATION'\n");
5914
325
      return(0);
5915
325
  }
5916
5.27k
  *tree = xmlParseNotationType(ctxt);
5917
5.27k
  if (*tree == NULL) return(0);
5918
4.10k
  return(XML_ATTRIBUTE_NOTATION);
5919
5.27k
    }
5920
229k
    *tree = xmlParseEnumerationType(ctxt);
5921
229k
    if (*tree == NULL) return(0);
5922
215k
    return(XML_ATTRIBUTE_ENUMERATION);
5923
229k
}
5924
5925
/**
5926
 * xmlParseAttributeType:
5927
 * @ctxt:  an XML parser context
5928
 * @tree:  the enumeration tree built while parsing
5929
 *
5930
 * DEPRECATED: Internal function, don't use.
5931
 *
5932
 * parse the Attribute list def for an element
5933
 *
5934
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5935
 *
5936
 * [55] StringType ::= 'CDATA'
5937
 *
5938
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5939
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5940
 *
5941
 * Validity constraints for attribute values syntax are checked in
5942
 * xmlValidateAttributeValue()
5943
 *
5944
 * [ VC: ID ]
5945
 * Values of type ID must match the Name production. A name must not
5946
 * appear more than once in an XML document as a value of this type;
5947
 * i.e., ID values must uniquely identify the elements which bear them.
5948
 *
5949
 * [ VC: One ID per Element Type ]
5950
 * No element type may have more than one ID attribute specified.
5951
 *
5952
 * [ VC: ID Attribute Default ]
5953
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5954
 *
5955
 * [ VC: IDREF ]
5956
 * Values of type IDREF must match the Name production, and values
5957
 * of type IDREFS must match Names; each IDREF Name must match the value
5958
 * of an ID attribute on some element in the XML document; i.e. IDREF
5959
 * values must match the value of some ID attribute.
5960
 *
5961
 * [ VC: Entity Name ]
5962
 * Values of type ENTITY must match the Name production, values
5963
 * of type ENTITIES must match Names; each Entity Name must match the
5964
 * name of an unparsed entity declared in the DTD.
5965
 *
5966
 * [ VC: Name Token ]
5967
 * Values of type NMTOKEN must match the Nmtoken production; values
5968
 * of type NMTOKENS must match Nmtokens.
5969
 *
5970
 * Returns the attribute type
5971
 */
5972
int
5973
3.76M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5974
3.76M
    SHRINK;
5975
3.76M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5976
1.34M
  SKIP(5);
5977
1.34M
  return(XML_ATTRIBUTE_CDATA);
5978
2.42M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5979
18.4k
  SKIP(6);
5980
18.4k
  return(XML_ATTRIBUTE_IDREFS);
5981
2.40M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5982
75.7k
  SKIP(5);
5983
75.7k
  return(XML_ATTRIBUTE_IDREF);
5984
2.32M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5985
994k
        SKIP(2);
5986
994k
  return(XML_ATTRIBUTE_ID);
5987
1.33M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5988
10.0k
  SKIP(6);
5989
10.0k
  return(XML_ATTRIBUTE_ENTITY);
5990
1.32M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5991
9.92k
  SKIP(8);
5992
9.92k
  return(XML_ATTRIBUTE_ENTITIES);
5993
1.31M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5994
102k
  SKIP(8);
5995
102k
  return(XML_ATTRIBUTE_NMTOKENS);
5996
1.20M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5997
974k
  SKIP(7);
5998
974k
  return(XML_ATTRIBUTE_NMTOKEN);
5999
974k
     }
6000
234k
     return(xmlParseEnumeratedType(ctxt, tree));
6001
3.76M
}
6002
6003
/**
6004
 * xmlParseAttributeListDecl:
6005
 * @ctxt:  an XML parser context
6006
 *
6007
 * DEPRECATED: Internal function, don't use.
6008
 *
6009
 * : parse the Attribute list def for an element
6010
 *
6011
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6012
 *
6013
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6014
 *
6015
 */
6016
void
6017
1.72M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6018
1.72M
    const xmlChar *elemName;
6019
1.72M
    const xmlChar *attrName;
6020
1.72M
    xmlEnumerationPtr tree;
6021
6022
1.72M
    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6023
1.71M
  int inputid = ctxt->input->id;
6024
6025
1.71M
  SKIP(9);
6026
1.71M
  if (SKIP_BLANKS == 0) {
6027
6.13k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6028
6.13k
                     "Space required after '<!ATTLIST'\n");
6029
6.13k
  }
6030
1.71M
        elemName = xmlParseName(ctxt);
6031
1.71M
  if (elemName == NULL) {
6032
2.78k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6033
2.78k
         "ATTLIST: no name for Element\n");
6034
2.78k
      return;
6035
2.78k
  }
6036
1.70M
  SKIP_BLANKS;
6037
1.70M
  GROW;
6038
5.44M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6039
3.79M
      int type;
6040
3.79M
      int def;
6041
3.79M
      xmlChar *defaultValue = NULL;
6042
6043
3.79M
      GROW;
6044
3.79M
            tree = NULL;
6045
3.79M
      attrName = xmlParseName(ctxt);
6046
3.79M
      if (attrName == NULL) {
6047
16.7k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6048
16.7k
             "ATTLIST: no name for Attribute\n");
6049
16.7k
    break;
6050
16.7k
      }
6051
3.77M
      GROW;
6052
3.77M
      if (SKIP_BLANKS == 0) {
6053
8.73k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6054
8.73k
            "Space required after the attribute name\n");
6055
8.73k
    break;
6056
8.73k
      }
6057
6058
3.76M
      type = xmlParseAttributeType(ctxt, &tree);
6059
3.76M
      if (type <= 0) {
6060
15.6k
          break;
6061
15.6k
      }
6062
6063
3.75M
      GROW;
6064
3.75M
      if (SKIP_BLANKS == 0) {
6065
5.87k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6066
5.87k
             "Space required after the attribute type\n");
6067
5.87k
          if (tree != NULL)
6068
2.90k
        xmlFreeEnumeration(tree);
6069
5.87k
    break;
6070
5.87k
      }
6071
6072
3.74M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6073
3.74M
      if (def <= 0) {
6074
0
                if (defaultValue != NULL)
6075
0
        xmlFree(defaultValue);
6076
0
          if (tree != NULL)
6077
0
        xmlFreeEnumeration(tree);
6078
0
          break;
6079
0
      }
6080
3.74M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6081
127k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6082
6083
3.74M
      GROW;
6084
3.74M
            if (RAW != '>') {
6085
3.19M
    if (SKIP_BLANKS == 0) {
6086
15.3k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6087
15.3k
      "Space required after the attribute default value\n");
6088
15.3k
        if (defaultValue != NULL)
6089
7.57k
      xmlFree(defaultValue);
6090
15.3k
        if (tree != NULL)
6091
1.61k
      xmlFreeEnumeration(tree);
6092
15.3k
        break;
6093
15.3k
    }
6094
3.19M
      }
6095
3.73M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6096
3.73M
    (ctxt->sax->attributeDecl != NULL))
6097
3.46M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6098
3.46M
                          type, def, defaultValue, tree);
6099
271k
      else if (tree != NULL)
6100
16.4k
    xmlFreeEnumeration(tree);
6101
6102
3.73M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6103
3.73M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6104
3.73M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6105
401k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6106
401k
      }
6107
3.73M
      if (ctxt->sax2) {
6108
2.79M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6109
2.79M
      }
6110
3.73M
      if (defaultValue != NULL)
6111
492k
          xmlFree(defaultValue);
6112
3.73M
      GROW;
6113
3.73M
  }
6114
1.70M
  if (RAW == '>') {
6115
1.65M
      if (inputid != ctxt->input->id) {
6116
3.33k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6117
3.33k
                               "Attribute list declaration doesn't start and"
6118
3.33k
                               " stop in the same entity\n");
6119
3.33k
      }
6120
1.65M
      NEXT;
6121
1.65M
  }
6122
1.70M
    }
6123
1.72M
}
6124
6125
/**
6126
 * xmlParseElementMixedContentDecl:
6127
 * @ctxt:  an XML parser context
6128
 * @inputchk:  the input used for the current entity, needed for boundary checks
6129
 *
6130
 * DEPRECATED: Internal function, don't use.
6131
 *
6132
 * parse the declaration for a Mixed Element content
6133
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6134
 *
6135
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6136
 *                '(' S? '#PCDATA' S? ')'
6137
 *
6138
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6139
 *
6140
 * [ VC: No Duplicate Types ]
6141
 * The same name must not appear more than once in a single
6142
 * mixed-content declaration.
6143
 *
6144
 * returns: the list of the xmlElementContentPtr describing the element choices
6145
 */
6146
xmlElementContentPtr
6147
607k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6148
607k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6149
607k
    const xmlChar *elem = NULL;
6150
6151
607k
    GROW;
6152
607k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6153
607k
  SKIP(7);
6154
607k
  SKIP_BLANKS;
6155
607k
  SHRINK;
6156
607k
  if (RAW == ')') {
6157
464k
      if (ctxt->input->id != inputchk) {
6158
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159
0
                               "Element content declaration doesn't start and"
6160
0
                               " stop in the same entity\n");
6161
0
      }
6162
464k
      NEXT;
6163
464k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6164
464k
      if (ret == NULL)
6165
0
          return(NULL);
6166
464k
      if (RAW == '*') {
6167
168
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168
168
    NEXT;
6169
168
      }
6170
464k
      return(ret);
6171
464k
  }
6172
142k
  if ((RAW == '(') || (RAW == '|')) {
6173
140k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6174
140k
      if (ret == NULL) return(NULL);
6175
140k
  }
6176
1.64M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6177
1.50M
      NEXT;
6178
1.50M
      if (elem == NULL) {
6179
140k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6180
140k
    if (ret == NULL) {
6181
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6182
0
                    return(NULL);
6183
0
                }
6184
140k
    ret->c1 = cur;
6185
140k
    if (cur != NULL)
6186
140k
        cur->parent = ret;
6187
140k
    cur = ret;
6188
1.36M
      } else {
6189
1.36M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6190
1.36M
    if (n == NULL) {
6191
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6192
0
                    return(NULL);
6193
0
                }
6194
1.36M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6195
1.36M
    if (n->c1 != NULL)
6196
1.36M
        n->c1->parent = n;
6197
1.36M
          cur->c2 = n;
6198
1.36M
    if (n != NULL)
6199
1.36M
        n->parent = cur;
6200
1.36M
    cur = n;
6201
1.36M
      }
6202
1.50M
      SKIP_BLANKS;
6203
1.50M
      elem = xmlParseName(ctxt);
6204
1.50M
      if (elem == NULL) {
6205
697
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6206
697
      "xmlParseElementMixedContentDecl : Name expected\n");
6207
697
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6208
697
    return(NULL);
6209
697
      }
6210
1.50M
      SKIP_BLANKS;
6211
1.50M
      GROW;
6212
1.50M
  }
6213
141k
  if ((RAW == ')') && (NXT(1) == '*')) {
6214
138k
      if (elem != NULL) {
6215
138k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6216
138k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6217
138k
    if (cur->c2 != NULL)
6218
138k
        cur->c2->parent = cur;
6219
138k
            }
6220
138k
            if (ret != NULL)
6221
138k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6222
138k
      if (ctxt->input->id != inputchk) {
6223
10
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6224
10
                               "Element content declaration doesn't start and"
6225
10
                               " stop in the same entity\n");
6226
10
      }
6227
138k
      SKIP(2);
6228
138k
  } else {
6229
3.21k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6230
3.21k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6231
3.21k
      return(NULL);
6232
3.21k
  }
6233
6234
141k
    } else {
6235
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6236
0
    }
6237
138k
    return(ret);
6238
607k
}
6239
6240
/**
6241
 * xmlParseElementChildrenContentDeclPriv:
6242
 * @ctxt:  an XML parser context
6243
 * @inputchk:  the input used for the current entity, needed for boundary checks
6244
 * @depth: the level of recursion
6245
 *
6246
 * parse the declaration for a Mixed Element content
6247
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6248
 *
6249
 *
6250
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6251
 *
6252
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6253
 *
6254
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6255
 *
6256
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6257
 *
6258
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6259
 * TODO Parameter-entity replacement text must be properly nested
6260
 *  with parenthesized groups. That is to say, if either of the
6261
 *  opening or closing parentheses in a choice, seq, or Mixed
6262
 *  construct is contained in the replacement text for a parameter
6263
 *  entity, both must be contained in the same replacement text. For
6264
 *  interoperability, if a parameter-entity reference appears in a
6265
 *  choice, seq, or Mixed construct, its replacement text should not
6266
 *  be empty, and neither the first nor last non-blank character of
6267
 *  the replacement text should be a connector (| or ,).
6268
 *
6269
 * Returns the tree of xmlElementContentPtr describing the element
6270
 *          hierarchy.
6271
 */
6272
static xmlElementContentPtr
6273
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6274
1.32M
                                       int depth) {
6275
1.32M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6276
1.32M
    const xmlChar *elem;
6277
1.32M
    xmlChar type = 0;
6278
6279
1.32M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6280
1.32M
        (depth >  2048)) {
6281
154
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6282
154
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6283
154
                          depth);
6284
154
  return(NULL);
6285
154
    }
6286
1.32M
    SKIP_BLANKS;
6287
1.32M
    GROW;
6288
1.32M
    if (RAW == '(') {
6289
161k
  int inputid = ctxt->input->id;
6290
6291
        /* Recurse on first child */
6292
161k
  NEXT;
6293
161k
  SKIP_BLANKS;
6294
161k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6295
161k
                                                           depth + 1);
6296
161k
        if (cur == NULL)
6297
103k
            return(NULL);
6298
57.0k
  SKIP_BLANKS;
6299
57.0k
  GROW;
6300
1.16M
    } else {
6301
1.16M
  elem = xmlParseName(ctxt);
6302
1.16M
  if (elem == NULL) {
6303
41.6k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6304
41.6k
      return(NULL);
6305
41.6k
  }
6306
1.12M
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6307
1.12M
  if (cur == NULL) {
6308
0
      xmlErrMemory(ctxt, NULL);
6309
0
      return(NULL);
6310
0
  }
6311
1.12M
  GROW;
6312
1.12M
  if (RAW == '?') {
6313
67.3k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6314
67.3k
      NEXT;
6315
1.05M
  } else if (RAW == '*') {
6316
149k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6317
149k
      NEXT;
6318
907k
  } else if (RAW == '+') {
6319
151k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6320
151k
      NEXT;
6321
755k
  } else {
6322
755k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6323
755k
  }
6324
1.12M
  GROW;
6325
1.12M
    }
6326
1.18M
    SKIP_BLANKS;
6327
1.18M
    SHRINK;
6328
4.66M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6329
        /*
6330
   * Each loop we parse one separator and one element.
6331
   */
6332
3.53M
        if (RAW == ',') {
6333
908k
      if (type == 0) type = CUR;
6334
6335
      /*
6336
       * Detect "Name | Name , Name" error
6337
       */
6338
500k
      else if (type != CUR) {
6339
258
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6340
258
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6341
258
                      type);
6342
258
    if ((last != NULL) && (last != ret))
6343
258
        xmlFreeDocElementContent(ctxt->myDoc, last);
6344
258
    if (ret != NULL)
6345
258
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6346
258
    return(NULL);
6347
258
      }
6348
908k
      NEXT;
6349
6350
908k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6351
908k
      if (op == NULL) {
6352
0
    if ((last != NULL) && (last != ret))
6353
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6354
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6355
0
    return(NULL);
6356
0
      }
6357
908k
      if (last == NULL) {
6358
408k
    op->c1 = ret;
6359
408k
    if (ret != NULL)
6360
408k
        ret->parent = op;
6361
408k
    ret = cur = op;
6362
500k
      } else {
6363
500k
          cur->c2 = op;
6364
500k
    if (op != NULL)
6365
500k
        op->parent = cur;
6366
500k
    op->c1 = last;
6367
500k
    if (last != NULL)
6368
500k
        last->parent = op;
6369
500k
    cur =op;
6370
500k
    last = NULL;
6371
500k
      }
6372
2.62M
  } else if (RAW == '|') {
6373
2.61M
      if (type == 0) type = CUR;
6374
6375
      /*
6376
       * Detect "Name , Name | Name" error
6377
       */
6378
2.27M
      else if (type != CUR) {
6379
221
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6380
221
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6381
221
          type);
6382
221
    if ((last != NULL) && (last != ret))
6383
221
        xmlFreeDocElementContent(ctxt->myDoc, last);
6384
221
    if (ret != NULL)
6385
221
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6386
221
    return(NULL);
6387
221
      }
6388
2.61M
      NEXT;
6389
6390
2.61M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6391
2.61M
      if (op == NULL) {
6392
0
    if ((last != NULL) && (last != ret))
6393
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6394
0
    if (ret != NULL)
6395
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6396
0
    return(NULL);
6397
0
      }
6398
2.61M
      if (last == NULL) {
6399
331k
    op->c1 = ret;
6400
331k
    if (ret != NULL)
6401
331k
        ret->parent = op;
6402
331k
    ret = cur = op;
6403
2.27M
      } else {
6404
2.27M
          cur->c2 = op;
6405
2.27M
    if (op != NULL)
6406
2.27M
        op->parent = cur;
6407
2.27M
    op->c1 = last;
6408
2.27M
    if (last != NULL)
6409
2.27M
        last->parent = op;
6410
2.27M
    cur =op;
6411
2.27M
    last = NULL;
6412
2.27M
      }
6413
2.61M
  } else {
6414
15.2k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6415
15.2k
      if ((last != NULL) && (last != ret))
6416
6.76k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6417
15.2k
      if (ret != NULL)
6418
15.2k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
15.2k
      return(NULL);
6420
15.2k
  }
6421
3.51M
  GROW;
6422
3.51M
  SKIP_BLANKS;
6423
3.51M
  GROW;
6424
3.51M
  if (RAW == '(') {
6425
149k
      int inputid = ctxt->input->id;
6426
      /* Recurse on second child */
6427
149k
      NEXT;
6428
149k
      SKIP_BLANKS;
6429
149k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6430
149k
                                                          depth + 1);
6431
149k
            if (last == NULL) {
6432
1.52k
    if (ret != NULL)
6433
1.52k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6434
1.52k
    return(NULL);
6435
1.52k
            }
6436
148k
      SKIP_BLANKS;
6437
3.36M
  } else {
6438
3.36M
      elem = xmlParseName(ctxt);
6439
3.36M
      if (elem == NULL) {
6440
32.1k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6441
32.1k
    if (ret != NULL)
6442
32.1k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
32.1k
    return(NULL);
6444
32.1k
      }
6445
3.33M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6446
3.33M
      if (last == NULL) {
6447
0
    if (ret != NULL)
6448
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6449
0
    return(NULL);
6450
0
      }
6451
3.33M
      if (RAW == '?') {
6452
367k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6453
367k
    NEXT;
6454
2.96M
      } else if (RAW == '*') {
6455
181k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6456
181k
    NEXT;
6457
2.78M
      } else if (RAW == '+') {
6458
38.6k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6459
38.6k
    NEXT;
6460
2.74M
      } else {
6461
2.74M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6462
2.74M
      }
6463
3.33M
  }
6464
3.48M
  SKIP_BLANKS;
6465
3.48M
  GROW;
6466
3.48M
    }
6467
1.13M
    if ((cur != NULL) && (last != NULL)) {
6468
698k
        cur->c2 = last;
6469
698k
  if (last != NULL)
6470
698k
      last->parent = cur;
6471
698k
    }
6472
1.13M
    if (ctxt->input->id != inputchk) {
6473
237
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6474
237
                       "Element content declaration doesn't start and stop in"
6475
237
                       " the same entity\n");
6476
237
    }
6477
1.13M
    NEXT;
6478
1.13M
    if (RAW == '?') {
6479
14.1k
  if (ret != NULL) {
6480
14.1k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6481
14.1k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6482
75
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6483
14.1k
      else
6484
14.1k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6485
14.1k
  }
6486
14.1k
  NEXT;
6487
1.11M
    } else if (RAW == '*') {
6488
342k
  if (ret != NULL) {
6489
342k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6490
342k
      cur = ret;
6491
      /*
6492
       * Some normalization:
6493
       * (a | b* | c?)* == (a | b | c)*
6494
       */
6495
1.94M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6496
1.60M
    if ((cur->c1 != NULL) &&
6497
1.60M
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6498
1.60M
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6499
82.8k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6500
1.60M
    if ((cur->c2 != NULL) &&
6501
1.60M
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6502
1.60M
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6503
13.7k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6504
1.60M
    cur = cur->c2;
6505
1.60M
      }
6506
342k
  }
6507
342k
  NEXT;
6508
774k
    } else if (RAW == '+') {
6509
118k
  if (ret != NULL) {
6510
118k
      int found = 0;
6511
6512
118k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6513
118k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6514
17
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6515
117k
      else
6516
117k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6517
      /*
6518
       * Some normalization:
6519
       * (a | b*)+ == (a | b)*
6520
       * (a | b?)+ == (a | b)*
6521
       */
6522
206k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6523
88.3k
    if ((cur->c1 != NULL) &&
6524
88.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6525
88.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6526
523
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6527
523
        found = 1;
6528
523
    }
6529
88.3k
    if ((cur->c2 != NULL) &&
6530
88.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6531
88.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6532
313
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6533
313
        found = 1;
6534
313
    }
6535
88.3k
    cur = cur->c2;
6536
88.3k
      }
6537
118k
      if (found)
6538
586
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6539
118k
  }
6540
118k
  NEXT;
6541
118k
    }
6542
1.13M
    return(ret);
6543
1.18M
}
6544
6545
/**
6546
 * xmlParseElementChildrenContentDecl:
6547
 * @ctxt:  an XML parser context
6548
 * @inputchk:  the input used for the current entity, needed for boundary checks
6549
 *
6550
 * DEPRECATED: Internal function, don't use.
6551
 *
6552
 * parse the declaration for a Mixed Element content
6553
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6554
 *
6555
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6556
 *
6557
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6558
 *
6559
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6560
 *
6561
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6562
 *
6563
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6564
 * TODO Parameter-entity replacement text must be properly nested
6565
 *  with parenthesized groups. That is to say, if either of the
6566
 *  opening or closing parentheses in a choice, seq, or Mixed
6567
 *  construct is contained in the replacement text for a parameter
6568
 *  entity, both must be contained in the same replacement text. For
6569
 *  interoperability, if a parameter-entity reference appears in a
6570
 *  choice, seq, or Mixed construct, its replacement text should not
6571
 *  be empty, and neither the first nor last non-blank character of
6572
 *  the replacement text should be a connector (| or ,).
6573
 *
6574
 * Returns the tree of xmlElementContentPtr describing the element
6575
 *          hierarchy.
6576
 */
6577
xmlElementContentPtr
6578
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6579
    /* stub left for API/ABI compat */
6580
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6581
0
}
6582
6583
/**
6584
 * xmlParseElementContentDecl:
6585
 * @ctxt:  an XML parser context
6586
 * @name:  the name of the element being defined.
6587
 * @result:  the Element Content pointer will be stored here if any
6588
 *
6589
 * DEPRECATED: Internal function, don't use.
6590
 *
6591
 * parse the declaration for an Element content either Mixed or Children,
6592
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6593
 *
6594
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6595
 *
6596
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6597
 */
6598
6599
int
6600
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6601
1.62M
                           xmlElementContentPtr *result) {
6602
6603
1.62M
    xmlElementContentPtr tree = NULL;
6604
1.62M
    int inputid = ctxt->input->id;
6605
1.62M
    int res;
6606
6607
1.62M
    *result = NULL;
6608
6609
1.62M
    if (RAW != '(') {
6610
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6611
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6612
0
  return(-1);
6613
0
    }
6614
1.62M
    NEXT;
6615
1.62M
    GROW;
6616
1.62M
    if (ctxt->instate == XML_PARSER_EOF)
6617
0
        return(-1);
6618
1.62M
    SKIP_BLANKS;
6619
1.62M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6620
607k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6621
607k
  res = XML_ELEMENT_TYPE_MIXED;
6622
1.01M
    } else {
6623
1.01M
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6624
1.01M
  res = XML_ELEMENT_TYPE_ELEMENT;
6625
1.01M
    }
6626
1.62M
    SKIP_BLANKS;
6627
1.62M
    *result = tree;
6628
1.62M
    return(res);
6629
1.62M
}
6630
6631
/**
6632
 * xmlParseElementDecl:
6633
 * @ctxt:  an XML parser context
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse an Element declaration.
6638
 *
6639
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6640
 *
6641
 * [ VC: Unique Element Type Declaration ]
6642
 * No element type may be declared more than once
6643
 *
6644
 * Returns the type of the element, or -1 in case of error
6645
 */
6646
int
6647
2.22M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6648
2.22M
    const xmlChar *name;
6649
2.22M
    int ret = -1;
6650
2.22M
    xmlElementContentPtr content  = NULL;
6651
6652
    /* GROW; done in the caller */
6653
2.22M
    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6654
2.21M
  int inputid = ctxt->input->id;
6655
6656
2.21M
  SKIP(9);
6657
2.21M
  if (SKIP_BLANKS == 0) {
6658
3.21k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6659
3.21k
               "Space required after 'ELEMENT'\n");
6660
3.21k
      return(-1);
6661
3.21k
  }
6662
2.21M
        name = xmlParseName(ctxt);
6663
2.21M
  if (name == NULL) {
6664
13.2k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6665
13.2k
         "xmlParseElementDecl: no name for Element\n");
6666
13.2k
      return(-1);
6667
13.2k
  }
6668
2.19M
  if (SKIP_BLANKS == 0) {
6669
36.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6670
36.9k
         "Space required after the element name\n");
6671
36.9k
  }
6672
2.19M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6673
526k
      SKIP(5);
6674
      /*
6675
       * Element must always be empty.
6676
       */
6677
526k
      ret = XML_ELEMENT_TYPE_EMPTY;
6678
1.67M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6679
1.67M
             (NXT(2) == 'Y')) {
6680
8.58k
      SKIP(3);
6681
      /*
6682
       * Element is a generic container.
6683
       */
6684
8.58k
      ret = XML_ELEMENT_TYPE_ANY;
6685
1.66M
  } else if (RAW == '(') {
6686
1.62M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6687
1.62M
  } else {
6688
      /*
6689
       * [ WFC: PEs in Internal Subset ] error handling.
6690
       */
6691
42.0k
      if ((RAW == '%') && (ctxt->external == 0) &&
6692
42.0k
          (ctxt->inputNr == 1)) {
6693
367
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6694
367
    "PEReference: forbidden within markup decl in internal subset\n");
6695
41.6k
      } else {
6696
41.6k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6697
41.6k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6698
41.6k
            }
6699
42.0k
      return(-1);
6700
42.0k
  }
6701
6702
2.15M
  SKIP_BLANKS;
6703
6704
2.15M
  if (RAW != '>') {
6705
64.3k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6706
64.3k
      if (content != NULL) {
6707
2.58k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6708
2.58k
      }
6709
2.09M
  } else {
6710
2.09M
      if (inputid != ctxt->input->id) {
6711
175
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
175
                               "Element declaration doesn't start and stop in"
6713
175
                               " the same entity\n");
6714
175
      }
6715
6716
2.09M
      NEXT;
6717
2.09M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6718
2.09M
    (ctxt->sax->elementDecl != NULL)) {
6719
1.90M
    if (content != NULL)
6720
1.37M
        content->parent = NULL;
6721
1.90M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6722
1.90M
                           content);
6723
1.90M
    if ((content != NULL) && (content->parent == NULL)) {
6724
        /*
6725
         * this is a trick: if xmlAddElementDecl is called,
6726
         * instead of copying the full tree it is plugged directly
6727
         * if called from the parser. Avoid duplicating the
6728
         * interfaces or change the API/ABI
6729
         */
6730
162k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6731
162k
    }
6732
1.90M
      } else if (content != NULL) {
6733
147k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6734
147k
      }
6735
2.09M
  }
6736
2.15M
    }
6737
2.16M
    return(ret);
6738
2.22M
}
6739
6740
/**
6741
 * xmlParseConditionalSections
6742
 * @ctxt:  an XML parser context
6743
 *
6744
 * [61] conditionalSect ::= includeSect | ignoreSect
6745
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6746
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6747
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6748
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6749
 */
6750
6751
static void
6752
16.5k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6753
16.5k
    int *inputIds = NULL;
6754
16.5k
    size_t inputIdsSize = 0;
6755
16.5k
    size_t depth = 0;
6756
6757
84.8k
    while (ctxt->instate != XML_PARSER_EOF) {
6758
84.6k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6759
45.4k
            int id = ctxt->input->id;
6760
6761
45.4k
            SKIP(3);
6762
45.4k
            SKIP_BLANKS;
6763
6764
45.4k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6765
36.7k
                SKIP(7);
6766
36.7k
                SKIP_BLANKS;
6767
36.7k
                if (RAW != '[') {
6768
331
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6769
331
                    xmlHaltParser(ctxt);
6770
331
                    goto error;
6771
331
                }
6772
36.4k
                if (ctxt->input->id != id) {
6773
20
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6774
20
                                   "All markup of the conditional section is"
6775
20
                                   " not in the same entity\n");
6776
20
                }
6777
36.4k
                NEXT;
6778
6779
36.4k
                if (inputIdsSize <= depth) {
6780
11.2k
                    int *tmp;
6781
6782
11.2k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6783
11.2k
                    tmp = (int *) xmlRealloc(inputIds,
6784
11.2k
                            inputIdsSize * sizeof(int));
6785
11.2k
                    if (tmp == NULL) {
6786
0
                        xmlErrMemory(ctxt, NULL);
6787
0
                        goto error;
6788
0
                    }
6789
11.2k
                    inputIds = tmp;
6790
11.2k
                }
6791
36.4k
                inputIds[depth] = id;
6792
36.4k
                depth++;
6793
36.4k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6794
6.30k
                int state;
6795
6.30k
                xmlParserInputState instate;
6796
6.30k
                size_t ignoreDepth = 0;
6797
6798
6.30k
                SKIP(6);
6799
6.30k
                SKIP_BLANKS;
6800
6.30k
                if (RAW != '[') {
6801
153
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6802
153
                    xmlHaltParser(ctxt);
6803
153
                    goto error;
6804
153
                }
6805
6.15k
                if (ctxt->input->id != id) {
6806
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6807
0
                                   "All markup of the conditional section is"
6808
0
                                   " not in the same entity\n");
6809
0
                }
6810
6.15k
                NEXT;
6811
6812
                /*
6813
                 * Parse up to the end of the conditional section but disable
6814
                 * SAX event generating DTD building in the meantime
6815
                 */
6816
6.15k
                state = ctxt->disableSAX;
6817
6.15k
                instate = ctxt->instate;
6818
6.15k
                if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6819
6.15k
                ctxt->instate = XML_PARSER_IGNORE;
6820
6821
8.69M
                while (RAW != 0) {
6822
8.69M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6823
14.5k
                        SKIP(3);
6824
14.5k
                        ignoreDepth++;
6825
                        /* Check for integer overflow */
6826
14.5k
                        if (ignoreDepth == 0) {
6827
0
                            xmlErrMemory(ctxt, NULL);
6828
0
                            goto error;
6829
0
                        }
6830
8.67M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6831
8.67M
                               (NXT(2) == '>')) {
6832
13.3k
                        if (ignoreDepth == 0)
6833
3.26k
                            break;
6834
10.0k
                        SKIP(3);
6835
10.0k
                        ignoreDepth--;
6836
8.66M
                    } else {
6837
8.66M
                        NEXT;
6838
8.66M
                    }
6839
8.69M
                }
6840
6841
6.15k
                ctxt->disableSAX = state;
6842
6.15k
                ctxt->instate = instate;
6843
6844
6.15k
    if (RAW == 0) {
6845
2.88k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6846
2.88k
                    goto error;
6847
2.88k
    }
6848
3.26k
                if (ctxt->input->id != id) {
6849
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6850
0
                                   "All markup of the conditional section is"
6851
0
                                   " not in the same entity\n");
6852
0
                }
6853
3.26k
                SKIP(3);
6854
3.26k
            } else {
6855
2.38k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6856
2.38k
                xmlHaltParser(ctxt);
6857
2.38k
                goto error;
6858
2.38k
            }
6859
45.4k
        } else if ((depth > 0) &&
6860
39.2k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6861
19.4k
            depth--;
6862
19.4k
            if (ctxt->input->id != inputIds[depth]) {
6863
251
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6864
251
                               "All markup of the conditional section is not"
6865
251
                               " in the same entity\n");
6866
251
            }
6867
19.4k
            SKIP(3);
6868
19.7k
        } else {
6869
19.7k
            int id = ctxt->input->id;
6870
19.7k
            unsigned long cons = CUR_CONSUMED;
6871
6872
19.7k
            xmlParseMarkupDecl(ctxt);
6873
6874
19.7k
            if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6875
3.54k
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6876
3.54k
                xmlHaltParser(ctxt);
6877
3.54k
                goto error;
6878
3.54k
            }
6879
19.7k
        }
6880
6881
75.3k
        if (depth == 0)
6882
7.02k
            break;
6883
6884
68.3k
        SKIP_BLANKS;
6885
68.3k
        GROW;
6886
68.3k
    }
6887
6888
16.5k
error:
6889
16.5k
    xmlFree(inputIds);
6890
16.5k
}
6891
6892
/**
6893
 * xmlParseMarkupDecl:
6894
 * @ctxt:  an XML parser context
6895
 *
6896
 * DEPRECATED: Internal function, don't use.
6897
 *
6898
 * parse Markup declarations
6899
 *
6900
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6901
 *                     NotationDecl | PI | Comment
6902
 *
6903
 * [ VC: Proper Declaration/PE Nesting ]
6904
 * Parameter-entity replacement text must be properly nested with
6905
 * markup declarations. That is to say, if either the first character
6906
 * or the last character of a markup declaration (markupdecl above) is
6907
 * contained in the replacement text for a parameter-entity reference,
6908
 * both must be contained in the same replacement text.
6909
 *
6910
 * [ WFC: PEs in Internal Subset ]
6911
 * In the internal DTD subset, parameter-entity references can occur
6912
 * only where markup declarations can occur, not within markup declarations.
6913
 * (This does not apply to references that occur in external parameter
6914
 * entities or to the external subset.)
6915
 */
6916
void
6917
8.38M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6918
8.38M
    GROW;
6919
8.38M
    if (CUR == '<') {
6920
8.05M
        if (NXT(1) == '!') {
6921
8.01M
      switch (NXT(2)) {
6922
4.65M
          case 'E':
6923
4.65M
        if (NXT(3) == 'L')
6924
2.22M
      xmlParseElementDecl(ctxt);
6925
2.42M
        else if (NXT(3) == 'N')
6926
2.42M
      xmlParseEntityDecl(ctxt);
6927
4.65M
        break;
6928
1.72M
          case 'A':
6929
1.72M
        xmlParseAttributeListDecl(ctxt);
6930
1.72M
        break;
6931
14.8k
          case 'N':
6932
14.8k
        xmlParseNotationDecl(ctxt);
6933
14.8k
        break;
6934
1.61M
          case '-':
6935
1.61M
        xmlParseComment(ctxt);
6936
1.61M
        break;
6937
6.44k
    default:
6938
        /* there is an error but it will be detected later */
6939
6.44k
        break;
6940
8.01M
      }
6941
8.01M
  } else if (NXT(1) == '?') {
6942
16.8k
      xmlParsePI(ctxt);
6943
16.8k
  }
6944
8.05M
    }
6945
6946
    /*
6947
     * detect requirement to exit there and act accordingly
6948
     * and avoid having instate overridden later on
6949
     */
6950
8.38M
    if (ctxt->instate == XML_PARSER_EOF)
6951
23.9k
        return;
6952
6953
8.36M
    ctxt->instate = XML_PARSER_DTD;
6954
8.36M
}
6955
6956
/**
6957
 * xmlParseTextDecl:
6958
 * @ctxt:  an XML parser context
6959
 *
6960
 * DEPRECATED: Internal function, don't use.
6961
 *
6962
 * parse an XML declaration header for external entities
6963
 *
6964
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6965
 */
6966
6967
void
6968
67.6k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6969
67.6k
    xmlChar *version;
6970
67.6k
    const xmlChar *encoding;
6971
67.6k
    int oldstate;
6972
6973
    /*
6974
     * We know that '<?xml' is here.
6975
     */
6976
67.6k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6977
67.3k
  SKIP(5);
6978
67.3k
    } else {
6979
222
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6980
222
  return;
6981
222
    }
6982
6983
    /* Avoid expansion of parameter entities when skipping blanks. */
6984
67.3k
    oldstate = ctxt->instate;
6985
67.3k
    ctxt->instate = XML_PARSER_START;
6986
6987
67.3k
    if (SKIP_BLANKS == 0) {
6988
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6989
0
           "Space needed after '<?xml'\n");
6990
0
    }
6991
6992
    /*
6993
     * We may have the VersionInfo here.
6994
     */
6995
67.3k
    version = xmlParseVersionInfo(ctxt);
6996
67.3k
    if (version == NULL)
6997
12.2k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6998
55.1k
    else {
6999
55.1k
  if (SKIP_BLANKS == 0) {
7000
1.49k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7001
1.49k
               "Space needed here\n");
7002
1.49k
  }
7003
55.1k
    }
7004
67.3k
    ctxt->input->version = version;
7005
7006
    /*
7007
     * We must have the encoding declaration
7008
     */
7009
67.3k
    encoding = xmlParseEncodingDecl(ctxt);
7010
67.3k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7011
  /*
7012
   * The XML REC instructs us to stop parsing right here
7013
   */
7014
254
        ctxt->instate = oldstate;
7015
254
        return;
7016
254
    }
7017
67.1k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7018
3.12k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7019
3.12k
           "Missing encoding in text declaration\n");
7020
3.12k
    }
7021
7022
67.1k
    SKIP_BLANKS;
7023
67.1k
    if ((RAW == '?') && (NXT(1) == '>')) {
7024
28.1k
        SKIP(2);
7025
39.0k
    } else if (RAW == '>') {
7026
        /* Deprecated old WD ... */
7027
430
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7028
430
  NEXT;
7029
38.5k
    } else {
7030
38.5k
        int c;
7031
7032
38.5k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7033
30.4M
        while ((c = CUR) != 0) {
7034
30.4M
            NEXT;
7035
30.4M
            if (c == '>')
7036
38.0k
                break;
7037
30.4M
        }
7038
38.5k
    }
7039
7040
67.1k
    ctxt->instate = oldstate;
7041
67.1k
}
7042
7043
/**
7044
 * xmlParseExternalSubset:
7045
 * @ctxt:  an XML parser context
7046
 * @ExternalID: the external identifier
7047
 * @SystemID: the system identifier (or URL)
7048
 *
7049
 * parse Markup declarations from an external subset
7050
 *
7051
 * [30] extSubset ::= textDecl? extSubsetDecl
7052
 *
7053
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7054
 */
7055
void
7056
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7057
68.5k
                       const xmlChar *SystemID) {
7058
68.5k
    xmlDetectSAX2(ctxt);
7059
68.5k
    GROW;
7060
7061
68.5k
    if ((ctxt->encoding == NULL) &&
7062
68.5k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7063
68.3k
        xmlChar start[4];
7064
68.3k
  xmlCharEncoding enc;
7065
7066
68.3k
  start[0] = RAW;
7067
68.3k
  start[1] = NXT(1);
7068
68.3k
  start[2] = NXT(2);
7069
68.3k
  start[3] = NXT(3);
7070
68.3k
  enc = xmlDetectCharEncoding(start, 4);
7071
68.3k
  if (enc != XML_CHAR_ENCODING_NONE)
7072
10.4k
      xmlSwitchEncoding(ctxt, enc);
7073
68.3k
    }
7074
7075
68.5k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7076
9.97k
  xmlParseTextDecl(ctxt);
7077
9.97k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7078
      /*
7079
       * The XML REC instructs us to stop parsing right here
7080
       */
7081
182
      xmlHaltParser(ctxt);
7082
182
      return;
7083
182
  }
7084
9.97k
    }
7085
68.3k
    if (ctxt->myDoc == NULL) {
7086
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7087
0
  if (ctxt->myDoc == NULL) {
7088
0
      xmlErrMemory(ctxt, "New Doc failed");
7089
0
      return;
7090
0
  }
7091
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7092
0
    }
7093
68.3k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7094
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7095
7096
68.3k
    ctxt->instate = XML_PARSER_DTD;
7097
68.3k
    ctxt->external = 1;
7098
68.3k
    SKIP_BLANKS;
7099
4.07M
    while (((RAW == '<') && (NXT(1) == '?')) ||
7100
4.07M
           ((RAW == '<') && (NXT(1) == '!')) ||
7101
4.07M
     (RAW == '%')) {
7102
4.01M
  int id = ctxt->input->id;
7103
4.01M
  unsigned long cons = CUR_CONSUMED;
7104
7105
4.01M
  GROW;
7106
4.01M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7107
16.5k
      xmlParseConditionalSections(ctxt);
7108
16.5k
  } else
7109
4.00M
      xmlParseMarkupDecl(ctxt);
7110
4.01M
        SKIP_BLANKS;
7111
7112
4.01M
  if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7113
9.71k
      xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7114
9.71k
      break;
7115
9.71k
  }
7116
4.01M
    }
7117
7118
68.3k
    if (RAW != 0) {
7119
27.5k
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7120
27.5k
    }
7121
7122
68.3k
}
7123
7124
/**
7125
 * xmlParseReference:
7126
 * @ctxt:  an XML parser context
7127
 *
7128
 * DEPRECATED: Internal function, don't use.
7129
 *
7130
 * parse and handle entity references in content, depending on the SAX
7131
 * interface, this may end-up in a call to character() if this is a
7132
 * CharRef, a predefined entity, if there is no reference() callback.
7133
 * or if the parser was asked to switch to that mode.
7134
 *
7135
 * [67] Reference ::= EntityRef | CharRef
7136
 */
7137
void
7138
46.8M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7139
46.8M
    xmlEntityPtr ent;
7140
46.8M
    xmlChar *val;
7141
46.8M
    int was_checked;
7142
46.8M
    xmlNodePtr list = NULL;
7143
46.8M
    xmlParserErrors ret = XML_ERR_OK;
7144
7145
7146
46.8M
    if (RAW != '&')
7147
0
        return;
7148
7149
    /*
7150
     * Simple case of a CharRef
7151
     */
7152
46.8M
    if (NXT(1) == '#') {
7153
9.75M
  int i = 0;
7154
9.75M
  xmlChar out[16];
7155
9.75M
  int hex = NXT(2);
7156
9.75M
  int value = xmlParseCharRef(ctxt);
7157
7158
9.75M
  if (value == 0)
7159
810k
      return;
7160
8.94M
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7161
      /*
7162
       * So we are using non-UTF-8 buffers
7163
       * Check that the char fit on 8bits, if not
7164
       * generate a CharRef.
7165
       */
7166
7.10M
      if (value <= 0xFF) {
7167
6.98M
    out[0] = value;
7168
6.98M
    out[1] = 0;
7169
6.98M
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7170
6.98M
        (!ctxt->disableSAX))
7171
494k
        ctxt->sax->characters(ctxt->userData, out, 1);
7172
6.98M
      } else {
7173
122k
    if ((hex == 'x') || (hex == 'X'))
7174
60.5k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7175
61.8k
    else
7176
61.8k
        snprintf((char *)out, sizeof(out), "#%d", value);
7177
122k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7178
122k
        (!ctxt->disableSAX))
7179
16.5k
        ctxt->sax->reference(ctxt->userData, out);
7180
122k
      }
7181
7.10M
  } else {
7182
      /*
7183
       * Just encode the value in UTF-8
7184
       */
7185
1.83M
      COPY_BUF(0 ,out, i, value);
7186
1.83M
      out[i] = 0;
7187
1.83M
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7188
1.83M
    (!ctxt->disableSAX))
7189
178k
    ctxt->sax->characters(ctxt->userData, out, i);
7190
1.83M
  }
7191
8.94M
  return;
7192
9.75M
    }
7193
7194
    /*
7195
     * We are seeing an entity reference
7196
     */
7197
37.0M
    ent = xmlParseEntityRef(ctxt);
7198
37.0M
    if (ent == NULL) return;
7199
11.1M
    if (!ctxt->wellFormed)
7200
5.82M
  return;
7201
5.31M
    was_checked = ent->checked;
7202
7203
    /* special case of predefined entities */
7204
5.31M
    if ((ent->name == NULL) ||
7205
5.31M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7206
501k
  val = ent->content;
7207
501k
  if (val == NULL) return;
7208
  /*
7209
   * inline the entity.
7210
   */
7211
501k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7212
501k
      (!ctxt->disableSAX))
7213
501k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7214
501k
  return;
7215
501k
    }
7216
7217
    /*
7218
     * The first reference to the entity trigger a parsing phase
7219
     * where the ent->children is filled with the result from
7220
     * the parsing.
7221
     * Note: external parsed entities will not be loaded, it is not
7222
     * required for a non-validating parser, unless the parsing option
7223
     * of validating, or substituting entities were given. Doing so is
7224
     * far more secure as the parser will only process data coming from
7225
     * the document entity by default.
7226
     */
7227
4.81M
    if (((ent->checked == 0) ||
7228
4.81M
         ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7229
4.81M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7230
4.60M
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7231
4.60M
  unsigned long oldnbent = ctxt->nbentities, diff;
7232
7233
  /*
7234
   * This is a bit hackish but this seems the best
7235
   * way to make sure both SAX and DOM entity support
7236
   * behaves okay.
7237
   */
7238
4.60M
  void *user_data;
7239
4.60M
  if (ctxt->userData == ctxt)
7240
4.60M
      user_data = NULL;
7241
0
  else
7242
0
      user_data = ctxt->userData;
7243
7244
  /*
7245
   * Check that this entity is well formed
7246
   * 4.3.2: An internal general parsed entity is well-formed
7247
   * if its replacement text matches the production labeled
7248
   * content.
7249
   */
7250
4.60M
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7251
262k
      ctxt->depth++;
7252
262k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7253
262k
                                                user_data, &list);
7254
262k
      ctxt->depth--;
7255
7256
4.33M
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7257
4.33M
      ctxt->depth++;
7258
4.33M
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7259
4.33M
                                     user_data, ctxt->depth, ent->URI,
7260
4.33M
             ent->ExternalID, &list);
7261
4.33M
      ctxt->depth--;
7262
4.33M
  } else {
7263
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7264
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7265
0
       "invalid entity type found\n", NULL);
7266
0
  }
7267
7268
  /*
7269
   * Store the number of entities needing parsing for this entity
7270
   * content and do checkings
7271
   */
7272
4.60M
        diff = ctxt->nbentities - oldnbent + 1;
7273
4.60M
        if (diff > INT_MAX / 2)
7274
0
            diff = INT_MAX / 2;
7275
4.60M
        ent->checked = diff * 2;
7276
4.60M
  if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7277
91.0k
      ent->checked |= 1;
7278
4.60M
  if (ret == XML_ERR_ENTITY_LOOP) {
7279
781k
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7280
781k
            xmlHaltParser(ctxt);
7281
781k
      xmlFreeNodeList(list);
7282
781k
      return;
7283
781k
  }
7284
3.82M
  if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7285
2.09k
      xmlFreeNodeList(list);
7286
2.09k
      return;
7287
2.09k
  }
7288
7289
3.81M
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7290
41.5k
      if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7291
41.5k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7292
41.5k
    (ent->children == NULL)) {
7293
41.1k
    ent->children = list;
7294
                /*
7295
                 * Prune it directly in the generated document
7296
                 * except for single text nodes.
7297
                 */
7298
41.1k
                if ((ctxt->replaceEntities == 0) ||
7299
41.1k
                    (ctxt->parseMode == XML_PARSE_READER) ||
7300
41.1k
                    ((list->type == XML_TEXT_NODE) &&
7301
37.7k
                     (list->next == NULL))) {
7302
37.7k
                    ent->owner = 1;
7303
91.2k
                    while (list != NULL) {
7304
53.4k
                        list->parent = (xmlNodePtr) ent;
7305
53.4k
                        if (list->doc != ent->doc)
7306
0
                            xmlSetTreeDoc(list, ent->doc);
7307
53.4k
                        if (list->next == NULL)
7308
37.7k
                            ent->last = list;
7309
53.4k
                        list = list->next;
7310
53.4k
                    }
7311
37.7k
                    list = NULL;
7312
37.7k
                } else {
7313
3.40k
                    ent->owner = 0;
7314
12.3k
                    while (list != NULL) {
7315
8.96k
                        list->parent = (xmlNodePtr) ctxt->node;
7316
8.96k
                        list->doc = ctxt->myDoc;
7317
8.96k
                        if (list->next == NULL)
7318
3.40k
                            ent->last = list;
7319
8.96k
                        list = list->next;
7320
8.96k
                    }
7321
3.40k
                    list = ent->children;
7322
#ifdef LIBXML_LEGACY_ENABLED
7323
                    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7324
                        xmlAddEntityReference(ent, list, NULL);
7325
#endif /* LIBXML_LEGACY_ENABLED */
7326
3.40k
                }
7327
41.1k
      } else {
7328
383
    xmlFreeNodeList(list);
7329
383
    list = NULL;
7330
383
      }
7331
3.77M
  } else if ((ret != XML_ERR_OK) &&
7332
3.77M
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7333
3.70M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7334
3.70M
         "Entity '%s' failed to parse\n", ent->name);
7335
3.70M
            if (ent->content != NULL)
7336
83.5k
                ent->content[0] = 0;
7337
3.70M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
7338
3.70M
  } else if (list != NULL) {
7339
0
      xmlFreeNodeList(list);
7340
0
      list = NULL;
7341
0
  }
7342
3.81M
  if (ent->checked == 0)
7343
0
      ent->checked = 2;
7344
7345
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7346
3.81M
        was_checked = 0;
7347
3.81M
    } else if (ent->checked != 1) {
7348
214k
  ctxt->nbentities += ent->checked / 2;
7349
214k
    }
7350
7351
    /*
7352
     * Now that the entity content has been gathered
7353
     * provide it to the application, this can take different forms based
7354
     * on the parsing modes.
7355
     */
7356
4.03M
    if (ent->children == NULL) {
7357
  /*
7358
   * Probably running in SAX mode and the callbacks don't
7359
   * build the entity content. So unless we already went
7360
   * though parsing for first checking go though the entity
7361
   * content to generate callbacks associated to the entity
7362
   */
7363
3.88M
  if (was_checked != 0) {
7364
112k
      void *user_data;
7365
      /*
7366
       * This is a bit hackish but this seems the best
7367
       * way to make sure both SAX and DOM entity support
7368
       * behaves okay.
7369
       */
7370
112k
      if (ctxt->userData == ctxt)
7371
112k
    user_data = NULL;
7372
0
      else
7373
0
    user_data = ctxt->userData;
7374
7375
112k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7376
30.8k
    ctxt->depth++;
7377
30.8k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7378
30.8k
           ent->content, user_data, NULL);
7379
30.8k
    ctxt->depth--;
7380
81.9k
      } else if (ent->etype ==
7381
81.9k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7382
81.9k
    ctxt->depth++;
7383
81.9k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7384
81.9k
         ctxt->sax, user_data, ctxt->depth,
7385
81.9k
         ent->URI, ent->ExternalID, NULL);
7386
81.9k
    ctxt->depth--;
7387
81.9k
      } else {
7388
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7389
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7390
0
           "invalid entity type found\n", NULL);
7391
0
      }
7392
112k
      if (ret == XML_ERR_ENTITY_LOOP) {
7393
324
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7394
324
    return;
7395
324
      }
7396
112k
  }
7397
3.88M
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7398
3.88M
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7399
      /*
7400
       * Entity reference callback comes second, it's somewhat
7401
       * superfluous but a compatibility to historical behaviour
7402
       */
7403
137k
      ctxt->sax->reference(ctxt->userData, ent->name);
7404
137k
  }
7405
3.88M
  return;
7406
3.88M
    }
7407
7408
    /*
7409
     * If we didn't get any children for the entity being built
7410
     */
7411
147k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7412
147k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7413
  /*
7414
   * Create a node.
7415
   */
7416
96.7k
  ctxt->sax->reference(ctxt->userData, ent->name);
7417
96.7k
  return;
7418
96.7k
    }
7419
7420
51.1k
    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7421
  /*
7422
   * There is a problem on the handling of _private for entities
7423
   * (bug 155816): Should we copy the content of the field from
7424
   * the entity (possibly overwriting some value set by the user
7425
   * when a copy is created), should we leave it alone, or should
7426
   * we try to take care of different situations?  The problem
7427
   * is exacerbated by the usage of this field by the xmlReader.
7428
   * To fix this bug, we look at _private on the created node
7429
   * and, if it's NULL, we copy in whatever was in the entity.
7430
   * If it's not NULL we leave it alone.  This is somewhat of a
7431
   * hack - maybe we should have further tests to determine
7432
   * what to do.
7433
   */
7434
41.3k
  if ((ctxt->node != NULL) && (ent->children != NULL)) {
7435
      /*
7436
       * Seems we are generating the DOM content, do
7437
       * a simple tree copy for all references except the first
7438
       * In the first occurrence list contains the replacement.
7439
       */
7440
41.3k
      if (((list == NULL) && (ent->owner == 0)) ||
7441
41.3k
    (ctxt->parseMode == XML_PARSE_READER)) {
7442
13.1k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7443
7444
    /*
7445
     * We are copying here, make sure there is no abuse
7446
     */
7447
13.1k
    ctxt->sizeentcopy += ent->length + 5;
7448
13.1k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7449
0
        return;
7450
7451
    /*
7452
     * when operating on a reader, the entities definitions
7453
     * are always owning the entities subtree.
7454
    if (ctxt->parseMode == XML_PARSE_READER)
7455
        ent->owner = 1;
7456
     */
7457
7458
13.1k
    cur = ent->children;
7459
17.6k
    while (cur != NULL) {
7460
17.6k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7461
17.6k
        if (nw != NULL) {
7462
17.6k
      if (nw->_private == NULL)
7463
17.6k
          nw->_private = cur->_private;
7464
17.6k
      if (firstChild == NULL){
7465
13.1k
          firstChild = nw;
7466
13.1k
      }
7467
17.6k
      nw = xmlAddChild(ctxt->node, nw);
7468
17.6k
        }
7469
17.6k
        if (cur == ent->last) {
7470
      /*
7471
       * needed to detect some strange empty
7472
       * node cases in the reader tests
7473
       */
7474
13.1k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7475
13.1k
          (nw != NULL) &&
7476
13.1k
          (nw->type == XML_ELEMENT_NODE) &&
7477
13.1k
          (nw->children == NULL))
7478
854
          nw->extra = 1;
7479
7480
13.1k
      break;
7481
13.1k
        }
7482
4.49k
        cur = cur->next;
7483
4.49k
    }
7484
#ifdef LIBXML_LEGACY_ENABLED
7485
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7486
      xmlAddEntityReference(ent, firstChild, nw);
7487
#endif /* LIBXML_LEGACY_ENABLED */
7488
28.2k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7489
28.2k
    xmlNodePtr nw = NULL, cur, next, last,
7490
28.2k
         firstChild = NULL;
7491
7492
    /*
7493
     * We are copying here, make sure there is no abuse
7494
     */
7495
28.2k
    ctxt->sizeentcopy += ent->length + 5;
7496
28.2k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7497
0
        return;
7498
7499
    /*
7500
     * Copy the entity child list and make it the new
7501
     * entity child list. The goal is to make sure any
7502
     * ID or REF referenced will be the one from the
7503
     * document content and not the entity copy.
7504
     */
7505
28.2k
    cur = ent->children;
7506
28.2k
    ent->children = NULL;
7507
28.2k
    last = ent->last;
7508
28.2k
    ent->last = NULL;
7509
36.4k
    while (cur != NULL) {
7510
36.4k
        next = cur->next;
7511
36.4k
        cur->next = NULL;
7512
36.4k
        cur->parent = NULL;
7513
36.4k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7514
36.4k
        if (nw != NULL) {
7515
36.4k
      if (nw->_private == NULL)
7516
36.4k
          nw->_private = cur->_private;
7517
36.4k
      if (firstChild == NULL){
7518
28.2k
          firstChild = cur;
7519
28.2k
      }
7520
36.4k
      xmlAddChild((xmlNodePtr) ent, nw);
7521
36.4k
      xmlAddChild(ctxt->node, cur);
7522
36.4k
        }
7523
36.4k
        if (cur == last)
7524
28.2k
      break;
7525
8.24k
        cur = next;
7526
8.24k
    }
7527
28.2k
    if (ent->owner == 0)
7528
3.40k
        ent->owner = 1;
7529
#ifdef LIBXML_LEGACY_ENABLED
7530
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7531
      xmlAddEntityReference(ent, firstChild, nw);
7532
#endif /* LIBXML_LEGACY_ENABLED */
7533
28.2k
      } else {
7534
0
    const xmlChar *nbktext;
7535
7536
    /*
7537
     * the name change is to avoid coalescing of the
7538
     * node with a possible previous text one which
7539
     * would make ent->children a dangling pointer
7540
     */
7541
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7542
0
          -1);
7543
0
    if (ent->children->type == XML_TEXT_NODE)
7544
0
        ent->children->name = nbktext;
7545
0
    if ((ent->last != ent->children) &&
7546
0
        (ent->last->type == XML_TEXT_NODE))
7547
0
        ent->last->name = nbktext;
7548
0
    xmlAddChildList(ctxt->node, ent->children);
7549
0
      }
7550
7551
      /*
7552
       * This is to avoid a nasty side effect, see
7553
       * characters() in SAX.c
7554
       */
7555
41.3k
      ctxt->nodemem = 0;
7556
41.3k
      ctxt->nodelen = 0;
7557
41.3k
      return;
7558
41.3k
  }
7559
41.3k
    }
7560
51.1k
}
7561
7562
/**
7563
 * xmlParseEntityRef:
7564
 * @ctxt:  an XML parser context
7565
 *
7566
 * DEPRECATED: Internal function, don't use.
7567
 *
7568
 * parse ENTITY references declarations
7569
 *
7570
 * [68] EntityRef ::= '&' Name ';'
7571
 *
7572
 * [ WFC: Entity Declared ]
7573
 * In a document without any DTD, a document with only an internal DTD
7574
 * subset which contains no parameter entity references, or a document
7575
 * with "standalone='yes'", the Name given in the entity reference
7576
 * must match that in an entity declaration, except that well-formed
7577
 * documents need not declare any of the following entities: amp, lt,
7578
 * gt, apos, quot.  The declaration of a parameter entity must precede
7579
 * any reference to it.  Similarly, the declaration of a general entity
7580
 * must precede any reference to it which appears in a default value in an
7581
 * attribute-list declaration. Note that if entities are declared in the
7582
 * external subset or in external parameter entities, a non-validating
7583
 * processor is not obligated to read and process their declarations;
7584
 * for such documents, the rule that an entity must be declared is a
7585
 * well-formedness constraint only if standalone='yes'.
7586
 *
7587
 * [ WFC: Parsed Entity ]
7588
 * An entity reference must not contain the name of an unparsed entity
7589
 *
7590
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7591
 */
7592
xmlEntityPtr
7593
43.4M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7594
43.4M
    const xmlChar *name;
7595
43.4M
    xmlEntityPtr ent = NULL;
7596
7597
43.4M
    GROW;
7598
43.4M
    if (ctxt->instate == XML_PARSER_EOF)
7599
0
        return(NULL);
7600
7601
43.4M
    if (RAW != '&')
7602
0
        return(NULL);
7603
43.4M
    NEXT;
7604
43.4M
    name = xmlParseName(ctxt);
7605
43.4M
    if (name == NULL) {
7606
24.5M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7607
24.5M
           "xmlParseEntityRef: no name\n");
7608
24.5M
        return(NULL);
7609
24.5M
    }
7610
18.8M
    if (RAW != ';') {
7611
2.22M
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7612
2.22M
  return(NULL);
7613
2.22M
    }
7614
16.6M
    NEXT;
7615
7616
    /*
7617
     * Predefined entities override any extra definition
7618
     */
7619
16.6M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7620
8.52M
        ent = xmlGetPredefinedEntity(name);
7621
8.52M
        if (ent != NULL)
7622
2.71M
            return(ent);
7623
8.52M
    }
7624
7625
    /*
7626
     * Increase the number of entity references parsed
7627
     */
7628
13.9M
    ctxt->nbentities++;
7629
7630
    /*
7631
     * Ask first SAX for entity resolution, otherwise try the
7632
     * entities which may have stored in the parser context.
7633
     */
7634
13.9M
    if (ctxt->sax != NULL) {
7635
13.9M
  if (ctxt->sax->getEntity != NULL)
7636
13.9M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7637
13.9M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7638
13.9M
      (ctxt->options & XML_PARSE_OLDSAX))
7639
7.57k
      ent = xmlGetPredefinedEntity(name);
7640
13.9M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7641
13.9M
      (ctxt->userData==ctxt)) {
7642
22.2k
      ent = xmlSAX2GetEntity(ctxt, name);
7643
22.2k
  }
7644
13.9M
    }
7645
13.9M
    if (ctxt->instate == XML_PARSER_EOF)
7646
0
  return(NULL);
7647
    /*
7648
     * [ WFC: Entity Declared ]
7649
     * In a document without any DTD, a document with only an
7650
     * internal DTD subset which contains no parameter entity
7651
     * references, or a document with "standalone='yes'", the
7652
     * Name given in the entity reference must match that in an
7653
     * entity declaration, except that well-formed documents
7654
     * need not declare any of the following entities: amp, lt,
7655
     * gt, apos, quot.
7656
     * The declaration of a parameter entity must precede any
7657
     * reference to it.
7658
     * Similarly, the declaration of a general entity must
7659
     * precede any reference to it which appears in a default
7660
     * value in an attribute-list declaration. Note that if
7661
     * entities are declared in the external subset or in
7662
     * external parameter entities, a non-validating processor
7663
     * is not obligated to read and process their declarations;
7664
     * for such documents, the rule that an entity must be
7665
     * declared is a well-formedness constraint only if
7666
     * standalone='yes'.
7667
     */
7668
13.9M
    if (ent == NULL) {
7669
3.32M
  if ((ctxt->standalone == 1) ||
7670
3.32M
      ((ctxt->hasExternalSubset == 0) &&
7671
3.32M
       (ctxt->hasPErefs == 0))) {
7672
3.24M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7673
3.24M
         "Entity '%s' not defined\n", name);
7674
3.24M
  } else {
7675
79.8k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7676
79.8k
         "Entity '%s' not defined\n", name);
7677
79.8k
      if ((ctxt->inSubset == 0) &&
7678
79.8k
    (ctxt->sax != NULL) &&
7679
79.8k
    (ctxt->sax->reference != NULL)) {
7680
79.3k
    ctxt->sax->reference(ctxt->userData, name);
7681
79.3k
      }
7682
79.8k
  }
7683
3.32M
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7684
3.32M
  ctxt->valid = 0;
7685
3.32M
    }
7686
7687
    /*
7688
     * [ WFC: Parsed Entity ]
7689
     * An entity reference must not contain the name of an
7690
     * unparsed entity
7691
     */
7692
10.6M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7693
5.39k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7694
5.39k
     "Entity reference to unparsed entity %s\n", name);
7695
5.39k
    }
7696
7697
    /*
7698
     * [ WFC: No External Entity References ]
7699
     * Attribute values cannot contain direct or indirect
7700
     * entity references to external entities.
7701
     */
7702
10.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7703
10.6M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7704
51.6k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7705
51.6k
       "Attribute references external entity '%s'\n", name);
7706
51.6k
    }
7707
    /*
7708
     * [ WFC: No < in Attribute Values ]
7709
     * The replacement text of any entity referred to directly or
7710
     * indirectly in an attribute value (other than "&lt;") must
7711
     * not contain a <.
7712
     */
7713
10.5M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7714
10.5M
       (ent != NULL) && 
7715
10.5M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7716
207k
  if (((ent->checked & 1) || (ent->checked == 0)) &&
7717
207k
       (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7718
5.37k
      xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7719
5.37k
  "'<' in entity '%s' is not allowed in attributes values\n", name);
7720
5.37k
        }
7721
207k
    }
7722
7723
    /*
7724
     * Internal check, no parameter entities here ...
7725
     */
7726
10.3M
    else {
7727
10.3M
  switch (ent->etype) {
7728
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7729
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7730
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7731
0
       "Attempt to reference the parameter entity '%s'\n",
7732
0
            name);
7733
0
      break;
7734
10.3M
      default:
7735
10.3M
      break;
7736
10.3M
  }
7737
10.3M
    }
7738
7739
    /*
7740
     * [ WFC: No Recursion ]
7741
     * A parsed entity must not contain a recursive reference
7742
     * to itself, either directly or indirectly.
7743
     * Done somewhere else
7744
     */
7745
13.9M
    return(ent);
7746
13.9M
}
7747
7748
/**
7749
 * xmlParseStringEntityRef:
7750
 * @ctxt:  an XML parser context
7751
 * @str:  a pointer to an index in the string
7752
 *
7753
 * parse ENTITY references declarations, but this version parses it from
7754
 * a string value.
7755
 *
7756
 * [68] EntityRef ::= '&' Name ';'
7757
 *
7758
 * [ WFC: Entity Declared ]
7759
 * In a document without any DTD, a document with only an internal DTD
7760
 * subset which contains no parameter entity references, or a document
7761
 * with "standalone='yes'", the Name given in the entity reference
7762
 * must match that in an entity declaration, except that well-formed
7763
 * documents need not declare any of the following entities: amp, lt,
7764
 * gt, apos, quot.  The declaration of a parameter entity must precede
7765
 * any reference to it.  Similarly, the declaration of a general entity
7766
 * must precede any reference to it which appears in a default value in an
7767
 * attribute-list declaration. Note that if entities are declared in the
7768
 * external subset or in external parameter entities, a non-validating
7769
 * processor is not obligated to read and process their declarations;
7770
 * for such documents, the rule that an entity must be declared is a
7771
 * well-formedness constraint only if standalone='yes'.
7772
 *
7773
 * [ WFC: Parsed Entity ]
7774
 * An entity reference must not contain the name of an unparsed entity
7775
 *
7776
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7777
 * is updated to the current location in the string.
7778
 */
7779
static xmlEntityPtr
7780
921k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7781
921k
    xmlChar *name;
7782
921k
    const xmlChar *ptr;
7783
921k
    xmlChar cur;
7784
921k
    xmlEntityPtr ent = NULL;
7785
7786
921k
    if ((str == NULL) || (*str == NULL))
7787
0
        return(NULL);
7788
921k
    ptr = *str;
7789
921k
    cur = *ptr;
7790
921k
    if (cur != '&')
7791
0
  return(NULL);
7792
7793
921k
    ptr++;
7794
921k
    name = xmlParseStringName(ctxt, &ptr);
7795
921k
    if (name == NULL) {
7796
44.2k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7797
44.2k
           "xmlParseStringEntityRef: no name\n");
7798
44.2k
  *str = ptr;
7799
44.2k
  return(NULL);
7800
44.2k
    }
7801
877k
    if (*ptr != ';') {
7802
30.7k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7803
30.7k
        xmlFree(name);
7804
30.7k
  *str = ptr;
7805
30.7k
  return(NULL);
7806
30.7k
    }
7807
846k
    ptr++;
7808
7809
7810
    /*
7811
     * Predefined entities override any extra definition
7812
     */
7813
846k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7814
366k
        ent = xmlGetPredefinedEntity(name);
7815
366k
        if (ent != NULL) {
7816
38.9k
            xmlFree(name);
7817
38.9k
            *str = ptr;
7818
38.9k
            return(ent);
7819
38.9k
        }
7820
366k
    }
7821
7822
    /*
7823
     * Increase the number of entity references parsed
7824
     */
7825
807k
    ctxt->nbentities++;
7826
7827
    /*
7828
     * Ask first SAX for entity resolution, otherwise try the
7829
     * entities which may have stored in the parser context.
7830
     */
7831
807k
    if (ctxt->sax != NULL) {
7832
807k
  if (ctxt->sax->getEntity != NULL)
7833
807k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7834
807k
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7835
37.8k
      ent = xmlGetPredefinedEntity(name);
7836
807k
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7837
59.6k
      ent = xmlSAX2GetEntity(ctxt, name);
7838
59.6k
  }
7839
807k
    }
7840
807k
    if (ctxt->instate == XML_PARSER_EOF) {
7841
0
  xmlFree(name);
7842
0
  return(NULL);
7843
0
    }
7844
7845
    /*
7846
     * [ WFC: Entity Declared ]
7847
     * In a document without any DTD, a document with only an
7848
     * internal DTD subset which contains no parameter entity
7849
     * references, or a document with "standalone='yes'", the
7850
     * Name given in the entity reference must match that in an
7851
     * entity declaration, except that well-formed documents
7852
     * need not declare any of the following entities: amp, lt,
7853
     * gt, apos, quot.
7854
     * The declaration of a parameter entity must precede any
7855
     * reference to it.
7856
     * Similarly, the declaration of a general entity must
7857
     * precede any reference to it which appears in a default
7858
     * value in an attribute-list declaration. Note that if
7859
     * entities are declared in the external subset or in
7860
     * external parameter entities, a non-validating processor
7861
     * is not obligated to read and process their declarations;
7862
     * for such documents, the rule that an entity must be
7863
     * declared is a well-formedness constraint only if
7864
     * standalone='yes'.
7865
     */
7866
807k
    if (ent == NULL) {
7867
59.6k
  if ((ctxt->standalone == 1) ||
7868
59.6k
      ((ctxt->hasExternalSubset == 0) &&
7869
57.6k
       (ctxt->hasPErefs == 0))) {
7870
53.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7871
53.9k
         "Entity '%s' not defined\n", name);
7872
53.9k
  } else {
7873
5.72k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7874
5.72k
        "Entity '%s' not defined\n",
7875
5.72k
        name);
7876
5.72k
  }
7877
59.6k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7878
  /* TODO ? check regressions ctxt->valid = 0; */
7879
59.6k
    }
7880
7881
    /*
7882
     * [ WFC: Parsed Entity ]
7883
     * An entity reference must not contain the name of an
7884
     * unparsed entity
7885
     */
7886
748k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7887
4.64k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7888
4.64k
     "Entity reference to unparsed entity %s\n", name);
7889
4.64k
    }
7890
7891
    /*
7892
     * [ WFC: No External Entity References ]
7893
     * Attribute values cannot contain direct or indirect
7894
     * entity references to external entities.
7895
     */
7896
743k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7897
743k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7898
14.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7899
14.5k
   "Attribute references external entity '%s'\n", name);
7900
14.5k
    }
7901
    /*
7902
     * [ WFC: No < in Attribute Values ]
7903
     * The replacement text of any entity referred to directly or
7904
     * indirectly in an attribute value (other than "&lt;") must
7905
     * not contain a <.
7906
     */
7907
728k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7908
728k
       (ent != NULL) && (ent->content != NULL) &&
7909
728k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7910
728k
       (xmlStrchr(ent->content, '<'))) {
7911
163k
  xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7912
163k
     "'<' in entity '%s' is not allowed in attributes values\n",
7913
163k
        name);
7914
163k
    }
7915
7916
    /*
7917
     * Internal check, no parameter entities here ...
7918
     */
7919
565k
    else {
7920
565k
  switch (ent->etype) {
7921
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7922
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7923
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7924
0
       "Attempt to reference the parameter entity '%s'\n",
7925
0
          name);
7926
0
      break;
7927
565k
      default:
7928
565k
      break;
7929
565k
  }
7930
565k
    }
7931
7932
    /*
7933
     * [ WFC: No Recursion ]
7934
     * A parsed entity must not contain a recursive reference
7935
     * to itself, either directly or indirectly.
7936
     * Done somewhere else
7937
     */
7938
7939
807k
    xmlFree(name);
7940
807k
    *str = ptr;
7941
807k
    return(ent);
7942
807k
}
7943
7944
/**
7945
 * xmlParsePEReference:
7946
 * @ctxt:  an XML parser context
7947
 *
7948
 * DEPRECATED: Internal function, don't use.
7949
 *
7950
 * parse PEReference declarations
7951
 * The entity content is handled directly by pushing it's content as
7952
 * a new input stream.
7953
 *
7954
 * [69] PEReference ::= '%' Name ';'
7955
 *
7956
 * [ WFC: No Recursion ]
7957
 * A parsed entity must not contain a recursive
7958
 * reference to itself, either directly or indirectly.
7959
 *
7960
 * [ WFC: Entity Declared ]
7961
 * In a document without any DTD, a document with only an internal DTD
7962
 * subset which contains no parameter entity references, or a document
7963
 * with "standalone='yes'", ...  ... The declaration of a parameter
7964
 * entity must precede any reference to it...
7965
 *
7966
 * [ VC: Entity Declared ]
7967
 * In a document with an external subset or external parameter entities
7968
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7969
 * must precede any reference to it...
7970
 *
7971
 * [ WFC: In DTD ]
7972
 * Parameter-entity references may only appear in the DTD.
7973
 * NOTE: misleading but this is handled.
7974
 */
7975
void
7976
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7977
8.88M
{
7978
8.88M
    const xmlChar *name;
7979
8.88M
    xmlEntityPtr entity = NULL;
7980
8.88M
    xmlParserInputPtr input;
7981
7982
8.88M
    if (RAW != '%')
7983
4.30M
        return;
7984
4.58M
    NEXT;
7985
4.58M
    name = xmlParseName(ctxt);
7986
4.58M
    if (name == NULL) {
7987
2.64M
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7988
2.64M
  return;
7989
2.64M
    }
7990
1.93M
    if (xmlParserDebugEntities)
7991
0
  xmlGenericError(xmlGenericErrorContext,
7992
0
    "PEReference: %s\n", name);
7993
1.93M
    if (RAW != ';') {
7994
19.5k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7995
19.5k
        return;
7996
19.5k
    }
7997
7998
1.91M
    NEXT;
7999
8000
    /*
8001
     * Increase the number of entity references parsed
8002
     */
8003
1.91M
    ctxt->nbentities++;
8004
8005
    /*
8006
     * Request the entity from SAX
8007
     */
8008
1.91M
    if ((ctxt->sax != NULL) &&
8009
1.91M
  (ctxt->sax->getParameterEntity != NULL))
8010
1.91M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8011
1.91M
    if (ctxt->instate == XML_PARSER_EOF)
8012
0
  return;
8013
1.91M
    if (entity == NULL) {
8014
  /*
8015
   * [ WFC: Entity Declared ]
8016
   * In a document without any DTD, a document with only an
8017
   * internal DTD subset which contains no parameter entity
8018
   * references, or a document with "standalone='yes'", ...
8019
   * ... The declaration of a parameter entity must precede
8020
   * any reference to it...
8021
   */
8022
160k
  if ((ctxt->standalone == 1) ||
8023
160k
      ((ctxt->hasExternalSubset == 0) &&
8024
155k
       (ctxt->hasPErefs == 0))) {
8025
6.70k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8026
6.70k
            "PEReference: %%%s; not found\n",
8027
6.70k
            name);
8028
153k
  } else {
8029
      /*
8030
       * [ VC: Entity Declared ]
8031
       * In a document with an external subset or external
8032
       * parameter entities with "standalone='no'", ...
8033
       * ... The declaration of a parameter entity must
8034
       * precede any reference to it...
8035
       */
8036
153k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8037
19.4k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8038
19.4k
                                 "PEReference: %%%s; not found\n",
8039
19.4k
                                 name, NULL);
8040
19.4k
            } else
8041
134k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8042
134k
                              "PEReference: %%%s; not found\n",
8043
134k
                              name, NULL);
8044
153k
            ctxt->valid = 0;
8045
153k
  }
8046
160k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8047
1.75M
    } else {
8048
  /*
8049
   * Internal checking in case the entity quest barfed
8050
   */
8051
1.75M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8052
1.75M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8053
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8054
0
      "Internal: %%%s; is not a parameter entity\n",
8055
0
        name, NULL);
8056
1.75M
  } else {
8057
1.75M
            xmlChar start[4];
8058
1.75M
            xmlCharEncoding enc;
8059
8060
1.75M
      if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8061
596
          return;
8062
8063
1.75M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8064
1.75M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8065
1.75M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8066
1.75M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8067
1.75M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8068
1.75M
    (ctxt->replaceEntities == 0) &&
8069
1.75M
    (ctxt->validate == 0))
8070
28
    return;
8071
8072
1.75M
      input = xmlNewEntityInputStream(ctxt, entity);
8073
1.75M
      if (xmlPushInput(ctxt, input) < 0) {
8074
6.45k
                xmlFreeInputStream(input);
8075
6.45k
    return;
8076
6.45k
            }
8077
8078
1.75M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8079
                /*
8080
                 * Get the 4 first bytes and decode the charset
8081
                 * if enc != XML_CHAR_ENCODING_NONE
8082
                 * plug some encoding conversion routines.
8083
                 * Note that, since we may have some non-UTF8
8084
                 * encoding (like UTF16, bug 135229), the 'length'
8085
                 * is not known, but we can calculate based upon
8086
                 * the amount of data in the buffer.
8087
                 */
8088
141k
                GROW
8089
141k
                if (ctxt->instate == XML_PARSER_EOF)
8090
0
                    return;
8091
141k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8092
140k
                    start[0] = RAW;
8093
140k
                    start[1] = NXT(1);
8094
140k
                    start[2] = NXT(2);
8095
140k
                    start[3] = NXT(3);
8096
140k
                    enc = xmlDetectCharEncoding(start, 4);
8097
140k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8098
40.8k
                        xmlSwitchEncoding(ctxt, enc);
8099
40.8k
                    }
8100
140k
                }
8101
8102
141k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8103
141k
                    (IS_BLANK_CH(NXT(5)))) {
8104
39.4k
                    xmlParseTextDecl(ctxt);
8105
39.4k
                }
8106
141k
            }
8107
1.75M
  }
8108
1.75M
    }
8109
1.91M
    ctxt->hasPErefs = 1;
8110
1.91M
}
8111
8112
/**
8113
 * xmlLoadEntityContent:
8114
 * @ctxt:  an XML parser context
8115
 * @entity: an unloaded system entity
8116
 *
8117
 * Load the original content of the given system entity from the
8118
 * ExternalID/SystemID given. This is to be used for Included in Literal
8119
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8120
 *
8121
 * Returns 0 in case of success and -1 in case of failure
8122
 */
8123
static int
8124
10.5k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8125
10.5k
    xmlParserInputPtr input;
8126
10.5k
    xmlBufferPtr buf;
8127
10.5k
    int l, c;
8128
10.5k
    int count = 0;
8129
8130
10.5k
    if ((ctxt == NULL) || (entity == NULL) ||
8131
10.5k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8132
10.5k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8133
10.5k
  (entity->content != NULL)) {
8134
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8135
0
              "xmlLoadEntityContent parameter error");
8136
0
        return(-1);
8137
0
    }
8138
8139
10.5k
    if (xmlParserDebugEntities)
8140
0
  xmlGenericError(xmlGenericErrorContext,
8141
0
    "Reading %s entity content input\n", entity->name);
8142
8143
10.5k
    buf = xmlBufferCreate();
8144
10.5k
    if (buf == NULL) {
8145
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8146
0
              "xmlLoadEntityContent parameter error");
8147
0
        return(-1);
8148
0
    }
8149
10.5k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8150
8151
10.5k
    input = xmlNewEntityInputStream(ctxt, entity);
8152
10.5k
    if (input == NULL) {
8153
411
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8154
411
              "xmlLoadEntityContent input error");
8155
411
  xmlBufferFree(buf);
8156
411
        return(-1);
8157
411
    }
8158
8159
    /*
8160
     * Push the entity as the current input, read char by char
8161
     * saving to the buffer until the end of the entity or an error
8162
     */
8163
10.1k
    if (xmlPushInput(ctxt, input) < 0) {
8164
36
        xmlBufferFree(buf);
8165
36
  xmlFreeInputStream(input);
8166
36
  return(-1);
8167
36
    }
8168
8169
10.1k
    GROW;
8170
10.1k
    c = CUR_CHAR(l);
8171
26.4M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8172
26.4M
           (IS_CHAR(c))) {
8173
26.4M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8174
26.4M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8175
254k
      count = 0;
8176
254k
      GROW;
8177
254k
            if (ctxt->instate == XML_PARSER_EOF) {
8178
0
                xmlBufferFree(buf);
8179
0
                return(-1);
8180
0
            }
8181
254k
  }
8182
26.4M
  NEXTL(l);
8183
26.4M
  c = CUR_CHAR(l);
8184
26.4M
  if (c == 0) {
8185
8.37k
      count = 0;
8186
8.37k
      GROW;
8187
8.37k
            if (ctxt->instate == XML_PARSER_EOF) {
8188
0
                xmlBufferFree(buf);
8189
0
                return(-1);
8190
0
            }
8191
8.37k
      c = CUR_CHAR(l);
8192
8.37k
  }
8193
26.4M
    }
8194
8195
10.1k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8196
4.91k
        xmlPopInput(ctxt);
8197
5.19k
    } else if (!IS_CHAR(c)) {
8198
5.19k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8199
5.19k
                          "xmlLoadEntityContent: invalid char value %d\n",
8200
5.19k
                    c);
8201
5.19k
  xmlBufferFree(buf);
8202
5.19k
  return(-1);
8203
5.19k
    }
8204
4.91k
    entity->content = buf->content;
8205
4.91k
    buf->content = NULL;
8206
4.91k
    xmlBufferFree(buf);
8207
8208
4.91k
    return(0);
8209
10.1k
}
8210
8211
/**
8212
 * xmlParseStringPEReference:
8213
 * @ctxt:  an XML parser context
8214
 * @str:  a pointer to an index in the string
8215
 *
8216
 * parse PEReference declarations
8217
 *
8218
 * [69] PEReference ::= '%' Name ';'
8219
 *
8220
 * [ WFC: No Recursion ]
8221
 * A parsed entity must not contain a recursive
8222
 * reference to itself, either directly or indirectly.
8223
 *
8224
 * [ WFC: Entity Declared ]
8225
 * In a document without any DTD, a document with only an internal DTD
8226
 * subset which contains no parameter entity references, or a document
8227
 * with "standalone='yes'", ...  ... The declaration of a parameter
8228
 * entity must precede any reference to it...
8229
 *
8230
 * [ VC: Entity Declared ]
8231
 * In a document with an external subset or external parameter entities
8232
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8233
 * must precede any reference to it...
8234
 *
8235
 * [ WFC: In DTD ]
8236
 * Parameter-entity references may only appear in the DTD.
8237
 * NOTE: misleading but this is handled.
8238
 *
8239
 * Returns the string of the entity content.
8240
 *         str is updated to the current value of the index
8241
 */
8242
static xmlEntityPtr
8243
1.91M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8244
1.91M
    const xmlChar *ptr;
8245
1.91M
    xmlChar cur;
8246
1.91M
    xmlChar *name;
8247
1.91M
    xmlEntityPtr entity = NULL;
8248
8249
1.91M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8250
1.91M
    ptr = *str;
8251
1.91M
    cur = *ptr;
8252
1.91M
    if (cur != '%')
8253
0
        return(NULL);
8254
1.91M
    ptr++;
8255
1.91M
    name = xmlParseStringName(ctxt, &ptr);
8256
1.91M
    if (name == NULL) {
8257
429k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8258
429k
           "xmlParseStringPEReference: no name\n");
8259
429k
  *str = ptr;
8260
429k
  return(NULL);
8261
429k
    }
8262
1.48M
    cur = *ptr;
8263
1.48M
    if (cur != ';') {
8264
67.8k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8265
67.8k
  xmlFree(name);
8266
67.8k
  *str = ptr;
8267
67.8k
  return(NULL);
8268
67.8k
    }
8269
1.42M
    ptr++;
8270
8271
    /*
8272
     * Increase the number of entity references parsed
8273
     */
8274
1.42M
    ctxt->nbentities++;
8275
8276
    /*
8277
     * Request the entity from SAX
8278
     */
8279
1.42M
    if ((ctxt->sax != NULL) &&
8280
1.42M
  (ctxt->sax->getParameterEntity != NULL))
8281
1.42M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8282
1.42M
    if (ctxt->instate == XML_PARSER_EOF) {
8283
0
  xmlFree(name);
8284
0
  *str = ptr;
8285
0
  return(NULL);
8286
0
    }
8287
1.42M
    if (entity == NULL) {
8288
  /*
8289
   * [ WFC: Entity Declared ]
8290
   * In a document without any DTD, a document with only an
8291
   * internal DTD subset which contains no parameter entity
8292
   * references, or a document with "standalone='yes'", ...
8293
   * ... The declaration of a parameter entity must precede
8294
   * any reference to it...
8295
   */
8296
135k
  if ((ctxt->standalone == 1) ||
8297
135k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8298
2.40k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8299
2.40k
     "PEReference: %%%s; not found\n", name);
8300
132k
  } else {
8301
      /*
8302
       * [ VC: Entity Declared ]
8303
       * In a document with an external subset or external
8304
       * parameter entities with "standalone='no'", ...
8305
       * ... The declaration of a parameter entity must
8306
       * precede any reference to it...
8307
       */
8308
132k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8309
132k
        "PEReference: %%%s; not found\n",
8310
132k
        name, NULL);
8311
132k
      ctxt->valid = 0;
8312
132k
  }
8313
135k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8314
1.28M
    } else {
8315
  /*
8316
   * Internal checking in case the entity quest barfed
8317
   */
8318
1.28M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8319
1.28M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8320
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8321
0
        "%%%s; is not a parameter entity\n",
8322
0
        name, NULL);
8323
0
  }
8324
1.28M
    }
8325
1.42M
    ctxt->hasPErefs = 1;
8326
1.42M
    xmlFree(name);
8327
1.42M
    *str = ptr;
8328
1.42M
    return(entity);
8329
1.42M
}
8330
8331
/**
8332
 * xmlParseDocTypeDecl:
8333
 * @ctxt:  an XML parser context
8334
 *
8335
 * DEPRECATED: Internal function, don't use.
8336
 *
8337
 * parse a DOCTYPE declaration
8338
 *
8339
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8340
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8341
 *
8342
 * [ VC: Root Element Type ]
8343
 * The Name in the document type declaration must match the element
8344
 * type of the root element.
8345
 */
8346
8347
void
8348
511k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8349
511k
    const xmlChar *name = NULL;
8350
511k
    xmlChar *ExternalID = NULL;
8351
511k
    xmlChar *URI = NULL;
8352
8353
    /*
8354
     * We know that '<!DOCTYPE' has been detected.
8355
     */
8356
511k
    SKIP(9);
8357
8358
511k
    SKIP_BLANKS;
8359
8360
    /*
8361
     * Parse the DOCTYPE name.
8362
     */
8363
511k
    name = xmlParseName(ctxt);
8364
511k
    if (name == NULL) {
8365
2.16k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8366
2.16k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8367
2.16k
    }
8368
511k
    ctxt->intSubName = name;
8369
8370
511k
    SKIP_BLANKS;
8371
8372
    /*
8373
     * Check for SystemID and ExternalID
8374
     */
8375
511k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8376
8377
511k
    if ((URI != NULL) || (ExternalID != NULL)) {
8378
174k
        ctxt->hasExternalSubset = 1;
8379
174k
    }
8380
511k
    ctxt->extSubURI = URI;
8381
511k
    ctxt->extSubSystem = ExternalID;
8382
8383
511k
    SKIP_BLANKS;
8384
8385
    /*
8386
     * Create and update the internal subset.
8387
     */
8388
511k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8389
511k
  (!ctxt->disableSAX))
8390
495k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8391
511k
    if (ctxt->instate == XML_PARSER_EOF)
8392
0
  return;
8393
8394
    /*
8395
     * Is there any internal subset declarations ?
8396
     * they are handled separately in xmlParseInternalSubset()
8397
     */
8398
511k
    if (RAW == '[')
8399
392k
  return;
8400
8401
    /*
8402
     * We should be at the end of the DOCTYPE declaration.
8403
     */
8404
118k
    if (RAW != '>') {
8405
19.6k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8406
19.6k
    }
8407
118k
    NEXT;
8408
118k
}
8409
8410
/**
8411
 * xmlParseInternalSubset:
8412
 * @ctxt:  an XML parser context
8413
 *
8414
 * parse the internal subset declaration
8415
 *
8416
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8417
 */
8418
8419
static void
8420
344k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8421
    /*
8422
     * Is there any DTD definition ?
8423
     */
8424
344k
    if (RAW == '[') {
8425
344k
        int baseInputNr = ctxt->inputNr;
8426
344k
        ctxt->instate = XML_PARSER_DTD;
8427
344k
        NEXT;
8428
  /*
8429
   * Parse the succession of Markup declarations and
8430
   * PEReferences.
8431
   * Subsequence (markupdecl | PEReference | S)*
8432
   */
8433
4.62M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8434
4.62M
               (ctxt->instate != XML_PARSER_EOF)) {
8435
4.36M
      int id = ctxt->input->id;
8436
4.36M
      unsigned long cons = CUR_CONSUMED;
8437
8438
4.36M
      SKIP_BLANKS;
8439
4.36M
      xmlParseMarkupDecl(ctxt);
8440
4.36M
      xmlParsePEReference(ctxt);
8441
8442
            /*
8443
             * Conditional sections are allowed from external entities included
8444
             * by PE References in the internal subset.
8445
             */
8446
4.36M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8447
4.36M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8448
0
                xmlParseConditionalSections(ctxt);
8449
0
            }
8450
8451
4.36M
      if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8452
97.8k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8453
97.8k
       "xmlParseInternalSubset: error detected in Markup declaration\n");
8454
97.8k
                if (ctxt->inputNr > baseInputNr)
8455
4.70k
                    xmlPopInput(ctxt);
8456
93.1k
                else
8457
93.1k
        break;
8458
97.8k
      }
8459
4.36M
  }
8460
344k
  if (RAW == ']') {
8461
234k
      NEXT;
8462
234k
      SKIP_BLANKS;
8463
234k
  }
8464
344k
    }
8465
8466
    /*
8467
     * We should be at the end of the DOCTYPE declaration.
8468
     */
8469
344k
    if (RAW != '>') {
8470
109k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8471
109k
  return;
8472
109k
    }
8473
234k
    NEXT;
8474
234k
}
8475
8476
#ifdef LIBXML_SAX1_ENABLED
8477
/**
8478
 * xmlParseAttribute:
8479
 * @ctxt:  an XML parser context
8480
 * @value:  a xmlChar ** used to store the value of the attribute
8481
 *
8482
 * DEPRECATED: Internal function, don't use.
8483
 *
8484
 * parse an attribute
8485
 *
8486
 * [41] Attribute ::= Name Eq AttValue
8487
 *
8488
 * [ WFC: No External Entity References ]
8489
 * Attribute values cannot contain direct or indirect entity references
8490
 * to external entities.
8491
 *
8492
 * [ WFC: No < in Attribute Values ]
8493
 * The replacement text of any entity referred to directly or indirectly in
8494
 * an attribute value (other than "&lt;") must not contain a <.
8495
 *
8496
 * [ VC: Attribute Value Type ]
8497
 * The attribute must have been declared; the value must be of the type
8498
 * declared for it.
8499
 *
8500
 * [25] Eq ::= S? '=' S?
8501
 *
8502
 * With namespace:
8503
 *
8504
 * [NS 11] Attribute ::= QName Eq AttValue
8505
 *
8506
 * Also the case QName == xmlns:??? is handled independently as a namespace
8507
 * definition.
8508
 *
8509
 * Returns the attribute name, and the value in *value.
8510
 */
8511
8512
const xmlChar *
8513
85.3M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8514
85.3M
    const xmlChar *name;
8515
85.3M
    xmlChar *val;
8516
8517
85.3M
    *value = NULL;
8518
85.3M
    GROW;
8519
85.3M
    name = xmlParseName(ctxt);
8520
85.3M
    if (name == NULL) {
8521
8.76M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8522
8.76M
                 "error parsing attribute name\n");
8523
8.76M
        return(NULL);
8524
8.76M
    }
8525
8526
    /*
8527
     * read the value
8528
     */
8529
76.5M
    SKIP_BLANKS;
8530
76.5M
    if (RAW == '=') {
8531
71.4M
        NEXT;
8532
71.4M
  SKIP_BLANKS;
8533
71.4M
  val = xmlParseAttValue(ctxt);
8534
71.4M
  ctxt->instate = XML_PARSER_CONTENT;
8535
71.4M
    } else {
8536
5.05M
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8537
5.05M
         "Specification mandates value for attribute %s\n", name);
8538
5.05M
  return(NULL);
8539
5.05M
    }
8540
8541
    /*
8542
     * Check that xml:lang conforms to the specification
8543
     * No more registered as an error, just generate a warning now
8544
     * since this was deprecated in XML second edition
8545
     */
8546
71.4M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8547
18.7k
  if (!xmlCheckLanguageID(val)) {
8548
7.62k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8549
7.62k
              "Malformed value for xml:lang : %s\n",
8550
7.62k
        val, NULL);
8551
7.62k
  }
8552
18.7k
    }
8553
8554
    /*
8555
     * Check that xml:space conforms to the specification
8556
     */
8557
71.4M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8558
1.09k
  if (xmlStrEqual(val, BAD_CAST "default"))
8559
0
      *(ctxt->space) = 0;
8560
1.09k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8561
563
      *(ctxt->space) = 1;
8562
527
  else {
8563
527
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8564
527
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8565
527
                                 val, NULL);
8566
527
  }
8567
1.09k
    }
8568
8569
71.4M
    *value = val;
8570
71.4M
    return(name);
8571
76.5M
}
8572
8573
/**
8574
 * xmlParseStartTag:
8575
 * @ctxt:  an XML parser context
8576
 *
8577
 * DEPRECATED: Internal function, don't use.
8578
 *
8579
 * parse a start of tag either for rule element or
8580
 * EmptyElement. In both case we don't parse the tag closing chars.
8581
 *
8582
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8583
 *
8584
 * [ WFC: Unique Att Spec ]
8585
 * No attribute name may appear more than once in the same start-tag or
8586
 * empty-element tag.
8587
 *
8588
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8589
 *
8590
 * [ WFC: Unique Att Spec ]
8591
 * No attribute name may appear more than once in the same start-tag or
8592
 * empty-element tag.
8593
 *
8594
 * With namespace:
8595
 *
8596
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8597
 *
8598
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8599
 *
8600
 * Returns the element name parsed
8601
 */
8602
8603
const xmlChar *
8604
149M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8605
149M
    const xmlChar *name;
8606
149M
    const xmlChar *attname;
8607
149M
    xmlChar *attvalue;
8608
149M
    const xmlChar **atts = ctxt->atts;
8609
149M
    int nbatts = 0;
8610
149M
    int maxatts = ctxt->maxatts;
8611
149M
    int i;
8612
8613
149M
    if (RAW != '<') return(NULL);
8614
149M
    NEXT1;
8615
8616
149M
    name = xmlParseName(ctxt);
8617
149M
    if (name == NULL) {
8618
63.6M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8619
63.6M
       "xmlParseStartTag: invalid element name\n");
8620
63.6M
        return(NULL);
8621
63.6M
    }
8622
8623
    /*
8624
     * Now parse the attributes, it ends up with the ending
8625
     *
8626
     * (S Attribute)* S?
8627
     */
8628
85.5M
    SKIP_BLANKS;
8629
85.5M
    GROW;
8630
8631
117M
    while (((RAW != '>') &&
8632
117M
     ((RAW != '/') || (NXT(1) != '>')) &&
8633
117M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8634
85.3M
        int id = ctxt->input->id;
8635
85.3M
  unsigned long cons = CUR_CONSUMED;
8636
8637
85.3M
  attname = xmlParseAttribute(ctxt, &attvalue);
8638
85.3M
        if ((attname != NULL) && (attvalue != NULL)) {
8639
      /*
8640
       * [ WFC: Unique Att Spec ]
8641
       * No attribute name may appear more than once in the same
8642
       * start-tag or empty-element tag.
8643
       */
8644
95.8M
      for (i = 0; i < nbatts;i += 2) {
8645
24.8M
          if (xmlStrEqual(atts[i], attname)) {
8646
87.1k
        xmlErrAttributeDup(ctxt, NULL, attname);
8647
87.1k
        xmlFree(attvalue);
8648
87.1k
        goto failed;
8649
87.1k
    }
8650
24.8M
      }
8651
      /*
8652
       * Add the pair to atts
8653
       */
8654
71.0M
      if (atts == NULL) {
8655
4.03M
          maxatts = 22; /* allow for 10 attrs by default */
8656
4.03M
          atts = (const xmlChar **)
8657
4.03M
           xmlMalloc(maxatts * sizeof(xmlChar *));
8658
4.03M
    if (atts == NULL) {
8659
0
        xmlErrMemory(ctxt, NULL);
8660
0
        if (attvalue != NULL)
8661
0
      xmlFree(attvalue);
8662
0
        goto failed;
8663
0
    }
8664
4.03M
    ctxt->atts = atts;
8665
4.03M
    ctxt->maxatts = maxatts;
8666
67.0M
      } else if (nbatts + 4 > maxatts) {
8667
4.31k
          const xmlChar **n;
8668
8669
4.31k
          maxatts *= 2;
8670
4.31k
          n = (const xmlChar **) xmlRealloc((void *) atts,
8671
4.31k
               maxatts * sizeof(const xmlChar *));
8672
4.31k
    if (n == NULL) {
8673
0
        xmlErrMemory(ctxt, NULL);
8674
0
        if (attvalue != NULL)
8675
0
      xmlFree(attvalue);
8676
0
        goto failed;
8677
0
    }
8678
4.31k
    atts = n;
8679
4.31k
    ctxt->atts = atts;
8680
4.31k
    ctxt->maxatts = maxatts;
8681
4.31k
      }
8682
71.0M
      atts[nbatts++] = attname;
8683
71.0M
      atts[nbatts++] = attvalue;
8684
71.0M
      atts[nbatts] = NULL;
8685
71.0M
      atts[nbatts + 1] = NULL;
8686
71.0M
  } else {
8687
14.1M
      if (attvalue != NULL)
8688
0
    xmlFree(attvalue);
8689
14.1M
  }
8690
8691
85.3M
failed:
8692
8693
85.3M
  GROW
8694
85.3M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8695
44.4M
      break;
8696
40.8M
  if (SKIP_BLANKS == 0) {
8697
17.0M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8698
17.0M
         "attributes construct error\n");
8699
17.0M
  }
8700
40.8M
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8701
40.8M
            (attname == NULL) && (attvalue == NULL)) {
8702
8.76M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8703
8.76M
         "xmlParseStartTag: problem parsing attributes\n");
8704
8.76M
      break;
8705
8.76M
  }
8706
32.1M
  SHRINK;
8707
32.1M
        GROW;
8708
32.1M
    }
8709
8710
    /*
8711
     * SAX: Start of Element !
8712
     */
8713
85.5M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8714
85.5M
  (!ctxt->disableSAX)) {
8715
18.8M
  if (nbatts > 0)
8716
10.8M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8717
7.98M
  else
8718
7.98M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8719
18.8M
    }
8720
8721
85.5M
    if (atts != NULL) {
8722
        /* Free only the content strings */
8723
154M
        for (i = 1;i < nbatts;i+=2)
8724
71.0M
      if (atts[i] != NULL)
8725
71.0M
         xmlFree((xmlChar *) atts[i]);
8726
83.4M
    }
8727
85.5M
    return(name);
8728
85.5M
}
8729
8730
/**
8731
 * xmlParseEndTag1:
8732
 * @ctxt:  an XML parser context
8733
 * @line:  line of the start tag
8734
 * @nsNr:  number of namespaces on the start tag
8735
 *
8736
 * parse an end of tag
8737
 *
8738
 * [42] ETag ::= '</' Name S? '>'
8739
 *
8740
 * With namespace
8741
 *
8742
 * [NS 9] ETag ::= '</' QName S? '>'
8743
 */
8744
8745
static void
8746
34.6M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8747
34.6M
    const xmlChar *name;
8748
8749
34.6M
    GROW;
8750
34.6M
    if ((RAW != '<') || (NXT(1) != '/')) {
8751
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8752
0
           "xmlParseEndTag: '</' not found\n");
8753
0
  return;
8754
0
    }
8755
34.6M
    SKIP(2);
8756
8757
34.6M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8758
8759
    /*
8760
     * We should definitely be at the ending "S? '>'" part
8761
     */
8762
34.6M
    GROW;
8763
34.6M
    SKIP_BLANKS;
8764
34.6M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8765
3.00M
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8766
3.00M
    } else
8767
31.6M
  NEXT1;
8768
8769
    /*
8770
     * [ WFC: Element Type Match ]
8771
     * The Name in an element's end-tag must match the element type in the
8772
     * start-tag.
8773
     *
8774
     */
8775
34.6M
    if (name != (xmlChar*)1) {
8776
8.47M
        if (name == NULL) name = BAD_CAST "unparsable";
8777
8.47M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8778
8.47M
         "Opening and ending tag mismatch: %s line %d and %s\n",
8779
8.47M
                    ctxt->name, line, name);
8780
8.47M
    }
8781
8782
    /*
8783
     * SAX: End of Tag
8784
     */
8785
34.6M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8786
34.6M
  (!ctxt->disableSAX))
8787
6.04M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8788
8789
34.6M
    namePop(ctxt);
8790
34.6M
    spacePop(ctxt);
8791
34.6M
    return;
8792
34.6M
}
8793
8794
/**
8795
 * xmlParseEndTag:
8796
 * @ctxt:  an XML parser context
8797
 *
8798
 * DEPRECATED: Internal function, don't use.
8799
 *
8800
 * parse an end of tag
8801
 *
8802
 * [42] ETag ::= '</' Name S? '>'
8803
 *
8804
 * With namespace
8805
 *
8806
 * [NS 9] ETag ::= '</' QName S? '>'
8807
 */
8808
8809
void
8810
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8811
0
    xmlParseEndTag1(ctxt, 0);
8812
0
}
8813
#endif /* LIBXML_SAX1_ENABLED */
8814
8815
/************************************************************************
8816
 *                  *
8817
 *          SAX 2 specific operations       *
8818
 *                  *
8819
 ************************************************************************/
8820
8821
/*
8822
 * xmlGetNamespace:
8823
 * @ctxt:  an XML parser context
8824
 * @prefix:  the prefix to lookup
8825
 *
8826
 * Lookup the namespace name for the @prefix (which ca be NULL)
8827
 * The prefix must come from the @ctxt->dict dictionary
8828
 *
8829
 * Returns the namespace name or NULL if not bound
8830
 */
8831
static const xmlChar *
8832
20.9M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8833
20.9M
    int i;
8834
8835
20.9M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8836
26.3M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8837
7.99M
        if (ctxt->nsTab[i] == prefix) {
8838
2.47M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8839
11.0k
          return(NULL);
8840
2.46M
      return(ctxt->nsTab[i + 1]);
8841
2.47M
  }
8842
18.3M
    return(NULL);
8843
20.8M
}
8844
8845
/**
8846
 * xmlParseQName:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  pointer to store the prefix part
8849
 *
8850
 * parse an XML Namespace QName
8851
 *
8852
 * [6]  QName  ::= (Prefix ':')? LocalPart
8853
 * [7]  Prefix  ::= NCName
8854
 * [8]  LocalPart  ::= NCName
8855
 *
8856
 * Returns the Name parsed or NULL
8857
 */
8858
8859
static const xmlChar *
8860
54.3M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8861
54.3M
    const xmlChar *l, *p;
8862
8863
54.3M
    GROW;
8864
8865
54.3M
    l = xmlParseNCName(ctxt);
8866
54.3M
    if (l == NULL) {
8867
5.89M
        if (CUR == ':') {
8868
20.5k
      l = xmlParseName(ctxt);
8869
20.5k
      if (l != NULL) {
8870
20.5k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8871
20.5k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8872
20.5k
    *prefix = NULL;
8873
20.5k
    return(l);
8874
20.5k
      }
8875
20.5k
  }
8876
5.87M
        return(NULL);
8877
5.89M
    }
8878
48.4M
    if (CUR == ':') {
8879
4.05M
        NEXT;
8880
4.05M
  p = l;
8881
4.05M
  l = xmlParseNCName(ctxt);
8882
4.05M
  if (l == NULL) {
8883
51.0k
      xmlChar *tmp;
8884
8885
51.0k
            if (ctxt->instate == XML_PARSER_EOF)
8886
0
                return(NULL);
8887
51.0k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8888
51.0k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8889
51.0k
      l = xmlParseNmtoken(ctxt);
8890
51.0k
      if (l == NULL) {
8891
39.0k
                if (ctxt->instate == XML_PARSER_EOF)
8892
0
                    return(NULL);
8893
39.0k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8894
39.0k
            } else {
8895
12.0k
    tmp = xmlBuildQName(l, p, NULL, 0);
8896
12.0k
    xmlFree((char *)l);
8897
12.0k
      }
8898
51.0k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8899
51.0k
      if (tmp != NULL) xmlFree(tmp);
8900
51.0k
      *prefix = NULL;
8901
51.0k
      return(p);
8902
51.0k
  }
8903
4.00M
  if (CUR == ':') {
8904
23.5k
      xmlChar *tmp;
8905
8906
23.5k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8907
23.5k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8908
23.5k
      NEXT;
8909
23.5k
      tmp = (xmlChar *) xmlParseName(ctxt);
8910
23.5k
      if (tmp != NULL) {
8911
16.8k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8912
16.8k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8913
16.8k
    if (tmp != NULL) xmlFree(tmp);
8914
16.8k
    *prefix = p;
8915
16.8k
    return(l);
8916
16.8k
      }
8917
6.68k
            if (ctxt->instate == XML_PARSER_EOF)
8918
0
                return(NULL);
8919
6.68k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8920
6.68k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8921
6.68k
      if (tmp != NULL) xmlFree(tmp);
8922
6.68k
      *prefix = p;
8923
6.68k
      return(l);
8924
6.68k
  }
8925
3.97M
  *prefix = p;
8926
3.97M
    } else
8927
44.4M
        *prefix = NULL;
8928
48.3M
    return(l);
8929
48.4M
}
8930
8931
/**
8932
 * xmlParseQNameAndCompare:
8933
 * @ctxt:  an XML parser context
8934
 * @name:  the localname
8935
 * @prefix:  the prefix, if any.
8936
 *
8937
 * parse an XML name and compares for match
8938
 * (specialized for endtag parsing)
8939
 *
8940
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8941
 * and the name for mismatch
8942
 */
8943
8944
static const xmlChar *
8945
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8946
723k
                        xmlChar const *prefix) {
8947
723k
    const xmlChar *cmp;
8948
723k
    const xmlChar *in;
8949
723k
    const xmlChar *ret;
8950
723k
    const xmlChar *prefix2;
8951
8952
723k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8953
8954
723k
    GROW;
8955
723k
    in = ctxt->input->cur;
8956
8957
723k
    cmp = prefix;
8958
2.47M
    while (*in != 0 && *in == *cmp) {
8959
1.75M
  ++in;
8960
1.75M
  ++cmp;
8961
1.75M
    }
8962
723k
    if ((*cmp == 0) && (*in == ':')) {
8963
656k
        in++;
8964
656k
  cmp = name;
8965
4.98M
  while (*in != 0 && *in == *cmp) {
8966
4.32M
      ++in;
8967
4.32M
      ++cmp;
8968
4.32M
  }
8969
656k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8970
      /* success */
8971
506k
            ctxt->input->col += in - ctxt->input->cur;
8972
506k
      ctxt->input->cur = in;
8973
506k
      return((const xmlChar*) 1);
8974
506k
  }
8975
656k
    }
8976
    /*
8977
     * all strings coms from the dictionary, equality can be done directly
8978
     */
8979
217k
    ret = xmlParseQName (ctxt, &prefix2);
8980
217k
    if ((ret == name) && (prefix == prefix2))
8981
1.50k
  return((const xmlChar*) 1);
8982
215k
    return ret;
8983
217k
}
8984
8985
/**
8986
 * xmlParseAttValueInternal:
8987
 * @ctxt:  an XML parser context
8988
 * @len:  attribute len result
8989
 * @alloc:  whether the attribute was reallocated as a new string
8990
 * @normalize:  if 1 then further non-CDATA normalization must be done
8991
 *
8992
 * parse a value for an attribute.
8993
 * NOTE: if no normalization is needed, the routine will return pointers
8994
 *       directly from the data buffer.
8995
 *
8996
 * 3.3.3 Attribute-Value Normalization:
8997
 * Before the value of an attribute is passed to the application or
8998
 * checked for validity, the XML processor must normalize it as follows:
8999
 * - a character reference is processed by appending the referenced
9000
 *   character to the attribute value
9001
 * - an entity reference is processed by recursively processing the
9002
 *   replacement text of the entity
9003
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9004
 *   appending #x20 to the normalized value, except that only a single
9005
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9006
 *   parsed entity or the literal entity value of an internal parsed entity
9007
 * - other characters are processed by appending them to the normalized value
9008
 * If the declared value is not CDATA, then the XML processor must further
9009
 * process the normalized attribute value by discarding any leading and
9010
 * trailing space (#x20) characters, and by replacing sequences of space
9011
 * (#x20) characters by a single space (#x20) character.
9012
 * All attributes for which no declaration has been read should be treated
9013
 * by a non-validating parser as if declared CDATA.
9014
 *
9015
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9016
 *     caller if it was copied, this can be detected by val[*len] == 0.
9017
 */
9018
9019
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9020
42.9k
    const xmlChar *oldbase = ctxt->input->base;\
9021
42.9k
    GROW;\
9022
42.9k
    if (ctxt->instate == XML_PARSER_EOF)\
9023
42.9k
        return(NULL);\
9024
42.9k
    if (oldbase != ctxt->input->base) {\
9025
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9026
0
        start = start + delta;\
9027
0
        in = in + delta;\
9028
0
    }\
9029
42.9k
    end = ctxt->input->end;
9030
9031
static xmlChar *
9032
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9033
                         int normalize)
9034
100M
{
9035
100M
    xmlChar limit = 0;
9036
100M
    const xmlChar *in = NULL, *start, *end, *last;
9037
100M
    xmlChar *ret = NULL;
9038
100M
    int line, col;
9039
100M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9040
62.1M
                    XML_MAX_HUGE_LENGTH :
9041
100M
                    XML_MAX_TEXT_LENGTH;
9042
9043
100M
    GROW;
9044
100M
    in = (xmlChar *) CUR_PTR;
9045
100M
    line = ctxt->input->line;
9046
100M
    col = ctxt->input->col;
9047
100M
    if (*in != '"' && *in != '\'') {
9048
404k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9049
404k
        return (NULL);
9050
404k
    }
9051
99.7M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9052
9053
    /*
9054
     * try to handle in this routine the most common case where no
9055
     * allocation of a new string is required and where content is
9056
     * pure ASCII.
9057
     */
9058
99.7M
    limit = *in++;
9059
99.7M
    col++;
9060
99.7M
    end = ctxt->input->end;
9061
99.7M
    start = in;
9062
99.7M
    if (in >= end) {
9063
9.87k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9064
9.87k
    }
9065
99.7M
    if (normalize) {
9066
        /*
9067
   * Skip any leading spaces
9068
   */
9069
693k
  while ((in < end) && (*in != limit) &&
9070
693k
         ((*in == 0x20) || (*in == 0x9) ||
9071
686k
          (*in == 0xA) || (*in == 0xD))) {
9072
360k
      if (*in == 0xA) {
9073
198k
          line++; col = 1;
9074
198k
      } else {
9075
162k
          col++;
9076
162k
      }
9077
360k
      in++;
9078
360k
      start = in;
9079
360k
      if (in >= end) {
9080
168
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9081
168
                if ((in - start) > maxLength) {
9082
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9083
0
                                   "AttValue length too long\n");
9084
0
                    return(NULL);
9085
0
                }
9086
168
      }
9087
360k
  }
9088
2.06M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9089
2.06M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9090
1.74M
      col++;
9091
1.74M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9092
1.73M
      if (in >= end) {
9093
227
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9094
227
                if ((in - start) > maxLength) {
9095
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9096
0
                                   "AttValue length too long\n");
9097
0
                    return(NULL);
9098
0
                }
9099
227
      }
9100
1.73M
  }
9101
332k
  last = in;
9102
  /*
9103
   * skip the trailing blanks
9104
   */
9105
344k
  while ((last[-1] == 0x20) && (last > start)) last--;
9106
738k
  while ((in < end) && (*in != limit) &&
9107
738k
         ((*in == 0x20) || (*in == 0x9) ||
9108
476k
          (*in == 0xA) || (*in == 0xD))) {
9109
406k
      if (*in == 0xA) {
9110
135k
          line++, col = 1;
9111
271k
      } else {
9112
271k
          col++;
9113
271k
      }
9114
406k
      in++;
9115
406k
      if (in >= end) {
9116
256
    const xmlChar *oldbase = ctxt->input->base;
9117
256
    GROW;
9118
256
                if (ctxt->instate == XML_PARSER_EOF)
9119
0
                    return(NULL);
9120
256
    if (oldbase != ctxt->input->base) {
9121
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9122
0
        start = start + delta;
9123
0
        in = in + delta;
9124
0
        last = last + delta;
9125
0
    }
9126
256
    end = ctxt->input->end;
9127
256
                if ((in - start) > maxLength) {
9128
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9129
0
                                   "AttValue length too long\n");
9130
0
                    return(NULL);
9131
0
                }
9132
256
      }
9133
406k
  }
9134
332k
        if ((in - start) > maxLength) {
9135
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9136
0
                           "AttValue length too long\n");
9137
0
            return(NULL);
9138
0
        }
9139
332k
  if (*in != limit) goto need_complex;
9140
99.4M
    } else {
9141
1.03G
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9142
1.03G
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9143
939M
      in++;
9144
939M
      col++;
9145
939M
      if (in >= end) {
9146
32.7k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9147
32.7k
                if ((in - start) > maxLength) {
9148
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9149
0
                                   "AttValue length too long\n");
9150
0
                    return(NULL);
9151
0
                }
9152
32.7k
      }
9153
939M
  }
9154
99.4M
  last = in;
9155
99.4M
        if ((in - start) > maxLength) {
9156
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9157
0
                           "AttValue length too long\n");
9158
0
            return(NULL);
9159
0
        }
9160
99.4M
  if (*in != limit) goto need_complex;
9161
99.4M
    }
9162
92.8M
    in++;
9163
92.8M
    col++;
9164
92.8M
    if (len != NULL) {
9165
27.2M
        if (alloc) *alloc = 0;
9166
27.2M
        *len = last - start;
9167
27.2M
        ret = (xmlChar *) start;
9168
65.5M
    } else {
9169
65.5M
        if (alloc) *alloc = 1;
9170
65.5M
        ret = xmlStrndup(start, last - start);
9171
65.5M
    }
9172
92.8M
    CUR_PTR = in;
9173
92.8M
    ctxt->input->line = line;
9174
92.8M
    ctxt->input->col = col;
9175
92.8M
    return ret;
9176
6.92M
need_complex:
9177
6.92M
    if (alloc) *alloc = 1;
9178
6.92M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9179
99.7M
}
9180
9181
/**
9182
 * xmlParseAttribute2:
9183
 * @ctxt:  an XML parser context
9184
 * @pref:  the element prefix
9185
 * @elem:  the element name
9186
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9187
 * @value:  a xmlChar ** used to store the value of the attribute
9188
 * @len:  an int * to save the length of the attribute
9189
 * @alloc:  an int * to indicate if the attribute was allocated
9190
 *
9191
 * parse an attribute in the new SAX2 framework.
9192
 *
9193
 * Returns the attribute name, and the value in *value, .
9194
 */
9195
9196
static const xmlChar *
9197
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9198
                   const xmlChar * pref, const xmlChar * elem,
9199
                   const xmlChar ** prefix, xmlChar ** value,
9200
                   int *len, int *alloc)
9201
28.8M
{
9202
28.8M
    const xmlChar *name;
9203
28.8M
    xmlChar *val, *internal_val = NULL;
9204
28.8M
    int normalize = 0;
9205
9206
28.8M
    *value = NULL;
9207
28.8M
    GROW;
9208
28.8M
    name = xmlParseQName(ctxt, prefix);
9209
28.8M
    if (name == NULL) {
9210
451k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9211
451k
                       "error parsing attribute name\n");
9212
451k
        return (NULL);
9213
451k
    }
9214
9215
    /*
9216
     * get the type if needed
9217
     */
9218
28.4M
    if (ctxt->attsSpecial != NULL) {
9219
2.04M
        int type;
9220
9221
2.04M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9222
2.04M
                                                 pref, elem, *prefix, name);
9223
2.04M
        if (type != 0)
9224
334k
            normalize = 1;
9225
2.04M
    }
9226
9227
    /*
9228
     * read the value
9229
     */
9230
28.4M
    SKIP_BLANKS;
9231
28.4M
    if (RAW == '=') {
9232
28.1M
        NEXT;
9233
28.1M
        SKIP_BLANKS;
9234
28.1M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9235
28.1M
  if (normalize) {
9236
      /*
9237
       * Sometimes a second normalisation pass for spaces is needed
9238
       * but that only happens if charrefs or entities references
9239
       * have been used in the attribute value, i.e. the attribute
9240
       * value have been extracted in an allocated string already.
9241
       */
9242
333k
      if (*alloc) {
9243
70.5k
          const xmlChar *val2;
9244
9245
70.5k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9246
70.5k
    if ((val2 != NULL) && (val2 != val)) {
9247
10.4k
        xmlFree(val);
9248
10.4k
        val = (xmlChar *) val2;
9249
10.4k
    }
9250
70.5k
      }
9251
333k
  }
9252
28.1M
        ctxt->instate = XML_PARSER_CONTENT;
9253
28.1M
    } else {
9254
251k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9255
251k
                          "Specification mandates value for attribute %s\n",
9256
251k
                          name);
9257
251k
        return (NULL);
9258
251k
    }
9259
9260
28.1M
    if (*prefix == ctxt->str_xml) {
9261
        /*
9262
         * Check that xml:lang conforms to the specification
9263
         * No more registered as an error, just generate a warning now
9264
         * since this was deprecated in XML second edition
9265
         */
9266
76.5k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9267
19.7k
            internal_val = xmlStrndup(val, *len);
9268
19.7k
            if (!xmlCheckLanguageID(internal_val)) {
9269
9.02k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9270
9.02k
                              "Malformed value for xml:lang : %s\n",
9271
9.02k
                              internal_val, NULL);
9272
9.02k
            }
9273
19.7k
        }
9274
9275
        /*
9276
         * Check that xml:space conforms to the specification
9277
         */
9278
76.5k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9279
814
            internal_val = xmlStrndup(val, *len);
9280
814
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9281
0
                *(ctxt->space) = 0;
9282
814
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9283
273
                *(ctxt->space) = 1;
9284
541
            else {
9285
541
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9286
541
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9287
541
                              internal_val, NULL);
9288
541
            }
9289
814
        }
9290
76.5k
        if (internal_val) {
9291
19.9k
            xmlFree(internal_val);
9292
19.9k
        }
9293
76.5k
    }
9294
9295
28.1M
    *value = val;
9296
28.1M
    return (name);
9297
28.4M
}
9298
/**
9299
 * xmlParseStartTag2:
9300
 * @ctxt:  an XML parser context
9301
 *
9302
 * parse a start of tag either for rule element or
9303
 * EmptyElement. In both case we don't parse the tag closing chars.
9304
 * This routine is called when running SAX2 parsing
9305
 *
9306
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9307
 *
9308
 * [ WFC: Unique Att Spec ]
9309
 * No attribute name may appear more than once in the same start-tag or
9310
 * empty-element tag.
9311
 *
9312
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9313
 *
9314
 * [ WFC: Unique Att Spec ]
9315
 * No attribute name may appear more than once in the same start-tag or
9316
 * empty-element tag.
9317
 *
9318
 * With namespace:
9319
 *
9320
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9321
 *
9322
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9323
 *
9324
 * Returns the element name parsed
9325
 */
9326
9327
static const xmlChar *
9328
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9329
25.2M
                  const xmlChar **URI, int *tlen) {
9330
25.2M
    const xmlChar *localname;
9331
25.2M
    const xmlChar *prefix;
9332
25.2M
    const xmlChar *attname;
9333
25.2M
    const xmlChar *aprefix;
9334
25.2M
    const xmlChar *nsname;
9335
25.2M
    xmlChar *attvalue;
9336
25.2M
    const xmlChar **atts = ctxt->atts;
9337
25.2M
    int maxatts = ctxt->maxatts;
9338
25.2M
    int nratts, nbatts, nbdef, inputid;
9339
25.2M
    int i, j, nbNs, attval;
9340
25.2M
    unsigned long cur;
9341
25.2M
    int nsNr = ctxt->nsNr;
9342
9343
25.2M
    if (RAW != '<') return(NULL);
9344
25.2M
    NEXT1;
9345
9346
    /*
9347
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9348
     *       point since the attribute values may be stored as pointers to
9349
     *       the buffer and calling SHRINK would destroy them !
9350
     *       The Shrinking is only possible once the full set of attribute
9351
     *       callbacks have been done.
9352
     */
9353
25.2M
    SHRINK;
9354
25.2M
    cur = ctxt->input->cur - ctxt->input->base;
9355
25.2M
    inputid = ctxt->input->id;
9356
25.2M
    nbatts = 0;
9357
25.2M
    nratts = 0;
9358
25.2M
    nbdef = 0;
9359
25.2M
    nbNs = 0;
9360
25.2M
    attval = 0;
9361
    /* Forget any namespaces added during an earlier parse of this element. */
9362
25.2M
    ctxt->nsNr = nsNr;
9363
9364
25.2M
    localname = xmlParseQName(ctxt, &prefix);
9365
25.2M
    if (localname == NULL) {
9366
5.41M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9367
5.41M
           "StartTag: invalid element name\n");
9368
5.41M
        return(NULL);
9369
5.41M
    }
9370
19.8M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9371
9372
    /*
9373
     * Now parse the attributes, it ends up with the ending
9374
     *
9375
     * (S Attribute)* S?
9376
     */
9377
19.8M
    SKIP_BLANKS;
9378
19.8M
    GROW;
9379
9380
32.5M
    while (((RAW != '>') &&
9381
32.5M
     ((RAW != '/') || (NXT(1) != '>')) &&
9382
32.5M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9383
28.8M
  int id = ctxt->input->id;
9384
28.8M
  unsigned long cons = CUR_CONSUMED;
9385
28.8M
  int len = -1, alloc = 0;
9386
9387
28.8M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9388
28.8M
                               &aprefix, &attvalue, &len, &alloc);
9389
28.8M
        if ((attname == NULL) || (attvalue == NULL))
9390
737k
            goto next_attr;
9391
28.1M
  if (len < 0) len = xmlStrlen(attvalue);
9392
9393
28.1M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9394
115k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9395
115k
            xmlURIPtr uri;
9396
9397
115k
            if (URL == NULL) {
9398
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9399
0
                if ((attvalue != NULL) && (alloc != 0))
9400
0
                    xmlFree(attvalue);
9401
0
                localname = NULL;
9402
0
                goto done;
9403
0
            }
9404
115k
            if (*URL != 0) {
9405
111k
                uri = xmlParseURI((const char *) URL);
9406
111k
                if (uri == NULL) {
9407
21.4k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9408
21.4k
                             "xmlns: '%s' is not a valid URI\n",
9409
21.4k
                                       URL, NULL, NULL);
9410
90.4k
                } else {
9411
90.4k
                    if (uri->scheme == NULL) {
9412
44.0k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9413
44.0k
                                  "xmlns: URI %s is not absolute\n",
9414
44.0k
                                  URL, NULL, NULL);
9415
44.0k
                    }
9416
90.4k
                    xmlFreeURI(uri);
9417
90.4k
                }
9418
111k
                if (URL == ctxt->str_xml_ns) {
9419
0
                    if (attname != ctxt->str_xml) {
9420
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9421
0
                     "xml namespace URI cannot be the default namespace\n",
9422
0
                                 NULL, NULL, NULL);
9423
0
                    }
9424
0
                    goto next_attr;
9425
0
                }
9426
111k
                if ((len == 29) &&
9427
111k
                    (xmlStrEqual(URL,
9428
1.74k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9429
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9430
0
                         "reuse of the xmlns namespace name is forbidden\n",
9431
0
                             NULL, NULL, NULL);
9432
0
                    goto next_attr;
9433
0
                }
9434
111k
            }
9435
            /*
9436
             * check that it's not a defined namespace
9437
             */
9438
134k
            for (j = 1;j <= nbNs;j++)
9439
26.7k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9440
7.51k
                    break;
9441
115k
            if (j <= nbNs)
9442
7.51k
                xmlErrAttributeDup(ctxt, NULL, attname);
9443
108k
            else
9444
108k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9445
9446
28.0M
        } else if (aprefix == ctxt->str_xmlns) {
9447
408k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9448
408k
            xmlURIPtr uri;
9449
9450
408k
            if (attname == ctxt->str_xml) {
9451
937
                if (URL != ctxt->str_xml_ns) {
9452
937
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9453
937
                             "xml namespace prefix mapped to wrong URI\n",
9454
937
                             NULL, NULL, NULL);
9455
937
                }
9456
                /*
9457
                 * Do not keep a namespace definition node
9458
                 */
9459
937
                goto next_attr;
9460
937
            }
9461
407k
            if (URL == ctxt->str_xml_ns) {
9462
0
                if (attname != ctxt->str_xml) {
9463
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9464
0
                             "xml namespace URI mapped to wrong prefix\n",
9465
0
                             NULL, NULL, NULL);
9466
0
                }
9467
0
                goto next_attr;
9468
0
            }
9469
407k
            if (attname == ctxt->str_xmlns) {
9470
1.22k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9471
1.22k
                         "redefinition of the xmlns prefix is forbidden\n",
9472
1.22k
                         NULL, NULL, NULL);
9473
1.22k
                goto next_attr;
9474
1.22k
            }
9475
405k
            if ((len == 29) &&
9476
405k
                (xmlStrEqual(URL,
9477
5.05k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9478
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9479
0
                         "reuse of the xmlns namespace name is forbidden\n",
9480
0
                         NULL, NULL, NULL);
9481
0
                goto next_attr;
9482
0
            }
9483
405k
            if ((URL == NULL) || (URL[0] == 0)) {
9484
5.05k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9485
5.05k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9486
5.05k
                              attname, NULL, NULL);
9487
5.05k
                goto next_attr;
9488
400k
            } else {
9489
400k
                uri = xmlParseURI((const char *) URL);
9490
400k
                if (uri == NULL) {
9491
73.4k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9492
73.4k
                         "xmlns:%s: '%s' is not a valid URI\n",
9493
73.4k
                                       attname, URL, NULL);
9494
327k
                } else {
9495
327k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9496
7.92k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9497
7.92k
                                  "xmlns:%s: URI %s is not absolute\n",
9498
7.92k
                                  attname, URL, NULL);
9499
7.92k
                    }
9500
327k
                    xmlFreeURI(uri);
9501
327k
                }
9502
400k
            }
9503
9504
            /*
9505
             * check that it's not a defined namespace
9506
             */
9507
466k
            for (j = 1;j <= nbNs;j++)
9508
72.3k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9509
6.80k
                    break;
9510
400k
            if (j <= nbNs)
9511
6.80k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9512
393k
            else
9513
393k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9514
9515
27.6M
        } else {
9516
            /*
9517
             * Add the pair to atts
9518
             */
9519
27.6M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9520
339k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9521
0
                    goto next_attr;
9522
0
                }
9523
339k
                maxatts = ctxt->maxatts;
9524
339k
                atts = ctxt->atts;
9525
339k
            }
9526
27.6M
            ctxt->attallocs[nratts++] = alloc;
9527
27.6M
            atts[nbatts++] = attname;
9528
27.6M
            atts[nbatts++] = aprefix;
9529
            /*
9530
             * The namespace URI field is used temporarily to point at the
9531
             * base of the current input buffer for non-alloced attributes.
9532
             * When the input buffer is reallocated, all the pointers become
9533
             * invalid, but they can be reconstructed later.
9534
             */
9535
27.6M
            if (alloc)
9536
772k
                atts[nbatts++] = NULL;
9537
26.8M
            else
9538
26.8M
                atts[nbatts++] = ctxt->input->base;
9539
27.6M
            atts[nbatts++] = attvalue;
9540
27.6M
            attvalue += len;
9541
27.6M
            atts[nbatts++] = attvalue;
9542
            /*
9543
             * tag if some deallocation is needed
9544
             */
9545
27.6M
            if (alloc != 0) attval = 1;
9546
27.6M
            attvalue = NULL; /* moved into atts */
9547
27.6M
        }
9548
9549
28.8M
next_attr:
9550
28.8M
        if ((attvalue != NULL) && (alloc != 0)) {
9551
95.5k
            xmlFree(attvalue);
9552
95.5k
            attvalue = NULL;
9553
95.5k
        }
9554
9555
28.8M
  GROW
9556
28.8M
        if (ctxt->instate == XML_PARSER_EOF)
9557
0
            break;
9558
28.8M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9559
14.9M
      break;
9560
13.8M
  if (SKIP_BLANKS == 0) {
9561
1.14M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9562
1.14M
         "attributes construct error\n");
9563
1.14M
      break;
9564
1.14M
  }
9565
12.7M
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9566
12.7M
            (attname == NULL) && (attvalue == NULL)) {
9567
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9568
0
           "xmlParseStartTag: problem parsing attributes\n");
9569
0
      break;
9570
0
  }
9571
12.7M
        GROW;
9572
12.7M
    }
9573
9574
19.8M
    if (ctxt->input->id != inputid) {
9575
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9576
0
                    "Unexpected change of input\n");
9577
0
        localname = NULL;
9578
0
        goto done;
9579
0
    }
9580
9581
    /* Reconstruct attribute value pointers. */
9582
47.4M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9583
27.6M
        if (atts[i+2] != NULL) {
9584
            /*
9585
             * Arithmetic on dangling pointers is technically undefined
9586
             * behavior, but well...
9587
             */
9588
26.8M
            ptrdiff_t offset = ctxt->input->base - atts[i+2];
9589
26.8M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9590
26.8M
            atts[i+3] += offset;  /* value */
9591
26.8M
            atts[i+4] += offset;  /* valuend */
9592
26.8M
        }
9593
27.6M
    }
9594
9595
    /*
9596
     * The attributes defaulting
9597
     */
9598
19.8M
    if (ctxt->attsDefault != NULL) {
9599
1.52M
        xmlDefAttrsPtr defaults;
9600
9601
1.52M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9602
1.52M
  if (defaults != NULL) {
9603
191k
      for (i = 0;i < defaults->nbAttrs;i++) {
9604
128k
          attname = defaults->values[5 * i];
9605
128k
    aprefix = defaults->values[5 * i + 1];
9606
9607
                /*
9608
     * special work for namespaces defaulted defs
9609
     */
9610
128k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9611
        /*
9612
         * check that it's not a defined namespace
9613
         */
9614
1.64k
        for (j = 1;j <= nbNs;j++)
9615
985
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9616
516
          break;
9617
1.17k
              if (j <= nbNs) continue;
9618
9619
662
        nsname = xmlGetNamespace(ctxt, NULL);
9620
662
        if (nsname != defaults->values[5 * i + 2]) {
9621
594
      if (nsPush(ctxt, NULL,
9622
594
                 defaults->values[5 * i + 2]) > 0)
9623
592
          nbNs++;
9624
594
        }
9625
126k
    } else if (aprefix == ctxt->str_xmlns) {
9626
        /*
9627
         * check that it's not a defined namespace
9628
         */
9629
17.2k
        for (j = 1;j <= nbNs;j++)
9630
13.1k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9631
12.2k
          break;
9632
16.3k
              if (j <= nbNs) continue;
9633
9634
4.14k
        nsname = xmlGetNamespace(ctxt, attname);
9635
4.14k
        if (nsname != defaults->values[2]) {
9636
3.42k
      if (nsPush(ctxt, attname,
9637
3.42k
                 defaults->values[5 * i + 2]) > 0)
9638
3.04k
          nbNs++;
9639
3.42k
        }
9640
110k
    } else {
9641
        /*
9642
         * check that it's not a defined attribute
9643
         */
9644
298k
        for (j = 0;j < nbatts;j+=5) {
9645
189k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9646
1.69k
          break;
9647
189k
        }
9648
110k
        if (j < nbatts) continue;
9649
9650
108k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9651
4.09k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9652
0
                            localname = NULL;
9653
0
                            goto done;
9654
0
      }
9655
4.09k
      maxatts = ctxt->maxatts;
9656
4.09k
      atts = ctxt->atts;
9657
4.09k
        }
9658
108k
        atts[nbatts++] = attname;
9659
108k
        atts[nbatts++] = aprefix;
9660
108k
        if (aprefix == NULL)
9661
81.4k
      atts[nbatts++] = NULL;
9662
27.3k
        else
9663
27.3k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9664
108k
        atts[nbatts++] = defaults->values[5 * i + 2];
9665
108k
        atts[nbatts++] = defaults->values[5 * i + 3];
9666
108k
        if ((ctxt->standalone == 1) &&
9667
108k
            (defaults->values[5 * i + 4] != NULL)) {
9668
407
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9669
407
    "standalone: attribute %s on %s defaulted from external subset\n",
9670
407
                                   attname, localname);
9671
407
        }
9672
108k
        nbdef++;
9673
108k
    }
9674
128k
      }
9675
63.0k
  }
9676
1.52M
    }
9677
9678
    /*
9679
     * The attributes checkings
9680
     */
9681
47.5M
    for (i = 0; i < nbatts;i += 5) {
9682
        /*
9683
  * The default namespace does not apply to attribute names.
9684
  */
9685
27.7M
  if (atts[i + 1] != NULL) {
9686
1.06M
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9687
1.06M
      if (nsname == NULL) {
9688
305k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9689
305k
        "Namespace prefix %s for %s on %s is not defined\n",
9690
305k
        atts[i + 1], atts[i], localname);
9691
305k
      }
9692
1.06M
      atts[i + 2] = nsname;
9693
1.06M
  } else
9694
26.6M
      nsname = NULL;
9695
  /*
9696
   * [ WFC: Unique Att Spec ]
9697
   * No attribute name may appear more than once in the same
9698
   * start-tag or empty-element tag.
9699
   * As extended by the Namespace in XML REC.
9700
   */
9701
40.7M
        for (j = 0; j < i;j += 5) {
9702
13.0M
      if (atts[i] == atts[j]) {
9703
38.2k
          if (atts[i+1] == atts[j+1]) {
9704
18.1k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9705
18.1k
        break;
9706
18.1k
    }
9707
20.1k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9708
193
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9709
193
           "Namespaced Attribute %s in '%s' redefined\n",
9710
193
           atts[i], nsname, NULL);
9711
193
        break;
9712
193
    }
9713
20.1k
      }
9714
13.0M
  }
9715
27.7M
    }
9716
9717
19.8M
    nsname = xmlGetNamespace(ctxt, prefix);
9718
19.8M
    if ((prefix != NULL) && (nsname == NULL)) {
9719
868k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9720
868k
           "Namespace prefix %s on %s is not defined\n",
9721
868k
     prefix, localname, NULL);
9722
868k
    }
9723
19.8M
    *pref = prefix;
9724
19.8M
    *URI = nsname;
9725
9726
    /*
9727
     * SAX: Start of Element !
9728
     */
9729
19.8M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9730
19.8M
  (!ctxt->disableSAX)) {
9731
8.06M
  if (nbNs > 0)
9732
336k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9733
336k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9734
336k
        nbatts / 5, nbdef, atts);
9735
7.72M
  else
9736
7.72M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9737
7.72M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9738
8.06M
    }
9739
9740
19.8M
done:
9741
    /*
9742
     * Free up attribute allocated strings if needed
9743
     */
9744
19.8M
    if (attval != 0) {
9745
1.61M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9746
883k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9747
772k
          xmlFree((xmlChar *) atts[i]);
9748
734k
    }
9749
9750
19.8M
    return(localname);
9751
19.8M
}
9752
9753
/**
9754
 * xmlParseEndTag2:
9755
 * @ctxt:  an XML parser context
9756
 * @line:  line of the start tag
9757
 * @nsNr:  number of namespaces on the start tag
9758
 *
9759
 * parse an end of tag
9760
 *
9761
 * [42] ETag ::= '</' Name S? '>'
9762
 *
9763
 * With namespace
9764
 *
9765
 * [NS 9] ETag ::= '</' QName S? '>'
9766
 */
9767
9768
static void
9769
3.71M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9770
3.71M
    const xmlChar *name;
9771
9772
3.71M
    GROW;
9773
3.71M
    if ((RAW != '<') || (NXT(1) != '/')) {
9774
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9775
0
  return;
9776
0
    }
9777
3.71M
    SKIP(2);
9778
9779
3.71M
    if (tag->prefix == NULL)
9780
2.99M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9781
723k
    else
9782
723k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9783
9784
    /*
9785
     * We should definitely be at the ending "S? '>'" part
9786
     */
9787
3.71M
    GROW;
9788
3.71M
    if (ctxt->instate == XML_PARSER_EOF)
9789
0
        return;
9790
3.71M
    SKIP_BLANKS;
9791
3.71M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9792
201k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9793
201k
    } else
9794
3.51M
  NEXT1;
9795
9796
    /*
9797
     * [ WFC: Element Type Match ]
9798
     * The Name in an element's end-tag must match the element type in the
9799
     * start-tag.
9800
     *
9801
     */
9802
3.71M
    if (name != (xmlChar*)1) {
9803
558k
        if (name == NULL) name = BAD_CAST "unparsable";
9804
558k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9805
558k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9806
558k
                    ctxt->name, tag->line, name);
9807
558k
    }
9808
9809
    /*
9810
     * SAX: End of Tag
9811
     */
9812
3.71M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9813
3.71M
  (!ctxt->disableSAX))
9814
2.29M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9815
2.29M
                                tag->URI);
9816
9817
3.71M
    spacePop(ctxt);
9818
3.71M
    if (tag->nsNr != 0)
9819
114k
  nsPop(ctxt, tag->nsNr);
9820
3.71M
}
9821
9822
/**
9823
 * xmlParseCDSect:
9824
 * @ctxt:  an XML parser context
9825
 *
9826
 * DEPRECATED: Internal function, don't use.
9827
 *
9828
 * Parse escaped pure raw content.
9829
 *
9830
 * [18] CDSect ::= CDStart CData CDEnd
9831
 *
9832
 * [19] CDStart ::= '<![CDATA['
9833
 *
9834
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9835
 *
9836
 * [21] CDEnd ::= ']]>'
9837
 */
9838
void
9839
736k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9840
736k
    xmlChar *buf = NULL;
9841
736k
    int len = 0;
9842
736k
    int size = XML_PARSER_BUFFER_SIZE;
9843
736k
    int r, rl;
9844
736k
    int s, sl;
9845
736k
    int cur, l;
9846
736k
    int count = 0;
9847
736k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9848
583k
                    XML_MAX_HUGE_LENGTH :
9849
736k
                    XML_MAX_TEXT_LENGTH;
9850
9851
    /* Check 2.6.0 was NXT(0) not RAW */
9852
736k
    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9853
736k
  SKIP(9);
9854
736k
    } else
9855
0
        return;
9856
9857
736k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9858
736k
    r = CUR_CHAR(rl);
9859
736k
    if (!IS_CHAR(r)) {
9860
66.9k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9861
66.9k
  ctxt->instate = XML_PARSER_CONTENT;
9862
66.9k
        return;
9863
66.9k
    }
9864
669k
    NEXTL(rl);
9865
669k
    s = CUR_CHAR(sl);
9866
669k
    if (!IS_CHAR(s)) {
9867
16.1k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9868
16.1k
  ctxt->instate = XML_PARSER_CONTENT;
9869
16.1k
        return;
9870
16.1k
    }
9871
653k
    NEXTL(sl);
9872
653k
    cur = CUR_CHAR(l);
9873
653k
    buf = (xmlChar *) xmlMallocAtomic(size);
9874
653k
    if (buf == NULL) {
9875
0
  xmlErrMemory(ctxt, NULL);
9876
0
  return;
9877
0
    }
9878
215M
    while (IS_CHAR(cur) &&
9879
215M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9880
215M
  if (len + 5 >= size) {
9881
749k
      xmlChar *tmp;
9882
9883
749k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9884
749k
      if (tmp == NULL) {
9885
0
          xmlFree(buf);
9886
0
    xmlErrMemory(ctxt, NULL);
9887
0
    return;
9888
0
      }
9889
749k
      buf = tmp;
9890
749k
      size *= 2;
9891
749k
  }
9892
215M
  COPY_BUF(rl,buf,len,r);
9893
215M
  r = s;
9894
215M
  rl = sl;
9895
215M
  s = cur;
9896
215M
  sl = l;
9897
215M
  count++;
9898
215M
  if (count > 50) {
9899
4.01M
      SHRINK;
9900
4.01M
      GROW;
9901
4.01M
            if (ctxt->instate == XML_PARSER_EOF) {
9902
0
    xmlFree(buf);
9903
0
    return;
9904
0
            }
9905
4.01M
      count = 0;
9906
4.01M
  }
9907
215M
  NEXTL(l);
9908
215M
  cur = CUR_CHAR(l);
9909
215M
        if (len > maxLength) {
9910
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9911
0
                           "CData section too big found\n");
9912
0
            xmlFree(buf);
9913
0
            return;
9914
0
        }
9915
215M
    }
9916
653k
    buf[len] = 0;
9917
653k
    ctxt->instate = XML_PARSER_CONTENT;
9918
653k
    if (cur != '>') {
9919
180k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9920
180k
                       "CData section not finished\n%.50s\n", buf);
9921
180k
  xmlFree(buf);
9922
180k
        return;
9923
180k
    }
9924
472k
    NEXTL(l);
9925
9926
    /*
9927
     * OK the buffer is to be consumed as cdata.
9928
     */
9929
472k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9930
63.5k
  if (ctxt->sax->cdataBlock != NULL)
9931
33.1k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9932
30.3k
  else if (ctxt->sax->characters != NULL)
9933
30.3k
      ctxt->sax->characters(ctxt->userData, buf, len);
9934
63.5k
    }
9935
472k
    xmlFree(buf);
9936
472k
}
9937
9938
/**
9939
 * xmlParseContentInternal:
9940
 * @ctxt:  an XML parser context
9941
 *
9942
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9943
 * unexpected EOF to the caller.
9944
 */
9945
9946
static void
9947
4.82M
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9948
4.82M
    int nameNr = ctxt->nameNr;
9949
9950
4.82M
    GROW;
9951
551M
    while ((RAW != 0) &&
9952
551M
     (ctxt->instate != XML_PARSER_EOF)) {
9953
547M
        int id = ctxt->input->id;
9954
547M
  unsigned long cons = CUR_CONSUMED;
9955
547M
  const xmlChar *cur = ctxt->input->cur;
9956
9957
  /*
9958
   * First case : a Processing Instruction.
9959
   */
9960
547M
  if ((*cur == '<') && (cur[1] == '?')) {
9961
3.23M
      xmlParsePI(ctxt);
9962
3.23M
  }
9963
9964
  /*
9965
   * Second case : a CDSection
9966
   */
9967
  /* 2.6.0 test was *cur not RAW */
9968
544M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9969
736k
      xmlParseCDSect(ctxt);
9970
736k
  }
9971
9972
  /*
9973
   * Third case :  a comment
9974
   */
9975
543M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9976
543M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9977
11.0M
      xmlParseComment(ctxt);
9978
11.0M
      ctxt->instate = XML_PARSER_CONTENT;
9979
11.0M
  }
9980
9981
  /*
9982
   * Fourth case :  a sub-element.
9983
   */
9984
532M
  else if (*cur == '<') {
9985
203M
            if (NXT(1) == '/') {
9986
36.5M
                if (ctxt->nameNr <= nameNr)
9987
686k
                    break;
9988
35.8M
          xmlParseElementEnd(ctxt);
9989
166M
            } else {
9990
166M
          xmlParseElementStart(ctxt);
9991
166M
            }
9992
203M
  }
9993
9994
  /*
9995
   * Fifth case : a reference. If if has not been resolved,
9996
   *    parsing returns it's Name, create the node
9997
   */
9998
9999
329M
  else if (*cur == '&') {
10000
38.3M
      xmlParseReference(ctxt);
10001
38.3M
  }
10002
10003
  /*
10004
   * Last case, text. Note that References are handled directly.
10005
   */
10006
291M
  else {
10007
291M
      xmlParseCharData(ctxt, 0);
10008
291M
  }
10009
10010
547M
  GROW;
10011
547M
  SHRINK;
10012
10013
547M
  if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
10014
82.9k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10015
82.9k
                  "detected an error in element content\n");
10016
82.9k
      xmlHaltParser(ctxt);
10017
82.9k
            break;
10018
82.9k
  }
10019
547M
    }
10020
4.82M
}
10021
10022
/**
10023
 * xmlParseContent:
10024
 * @ctxt:  an XML parser context
10025
 *
10026
 * Parse a content sequence. Stops at EOF or '</'.
10027
 *
10028
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10029
 */
10030
10031
void
10032
4.54M
xmlParseContent(xmlParserCtxtPtr ctxt) {
10033
4.54M
    int nameNr = ctxt->nameNr;
10034
10035
4.54M
    xmlParseContentInternal(ctxt);
10036
10037
4.54M
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10038
2.05M
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10039
2.05M
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10040
2.05M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10041
2.05M
                "Premature end of data in tag %s line %d\n",
10042
2.05M
    name, line, NULL);
10043
2.05M
    }
10044
4.54M
}
10045
10046
/**
10047
 * xmlParseElement:
10048
 * @ctxt:  an XML parser context
10049
 *
10050
 * DEPRECATED: Internal function, don't use.
10051
 *
10052
 * parse an XML element
10053
 *
10054
 * [39] element ::= EmptyElemTag | STag content ETag
10055
 *
10056
 * [ WFC: Element Type Match ]
10057
 * The Name in an element's end-tag must match the element type in the
10058
 * start-tag.
10059
 *
10060
 */
10061
10062
void
10063
379k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10064
379k
    if (xmlParseElementStart(ctxt) != 0)
10065
97.3k
        return;
10066
10067
282k
    xmlParseContentInternal(ctxt);
10068
282k
    if (ctxt->instate == XML_PARSER_EOF)
10069
2.76k
  return;
10070
10071
279k
    if (CUR == 0) {
10072
169k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10073
169k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10074
169k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10075
169k
                "Premature end of data in tag %s line %d\n",
10076
169k
    name, line, NULL);
10077
169k
        return;
10078
169k
    }
10079
10080
109k
    xmlParseElementEnd(ctxt);
10081
109k
}
10082
10083
/**
10084
 * xmlParseElementStart:
10085
 * @ctxt:  an XML parser context
10086
 *
10087
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10088
 * opening tag was parsed, 1 if an empty element was parsed.
10089
 */
10090
static int
10091
166M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10092
166M
    const xmlChar *name;
10093
166M
    const xmlChar *prefix = NULL;
10094
166M
    const xmlChar *URI = NULL;
10095
166M
    xmlParserNodeInfo node_info;
10096
166M
    int line, tlen = 0;
10097
166M
    xmlNodePtr ret;
10098
166M
    int nsNr = ctxt->nsNr;
10099
10100
166M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10101
166M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10102
21
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10103
21
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10104
21
        xmlParserMaxDepth);
10105
21
  xmlHaltParser(ctxt);
10106
21
  return(-1);
10107
21
    }
10108
10109
    /* Capture start position */
10110
166M
    if (ctxt->record_info) {
10111
0
        node_info.begin_pos = ctxt->input->consumed +
10112
0
                          (CUR_PTR - ctxt->input->base);
10113
0
  node_info.begin_line = ctxt->input->line;
10114
0
    }
10115
10116
166M
    if (ctxt->spaceNr == 0)
10117
0
  spacePush(ctxt, -1);
10118
166M
    else if (*ctxt->space == -2)
10119
35.9M
  spacePush(ctxt, -1);
10120
130M
    else
10121
130M
  spacePush(ctxt, *ctxt->space);
10122
10123
166M
    line = ctxt->input->line;
10124
166M
#ifdef LIBXML_SAX1_ENABLED
10125
166M
    if (ctxt->sax2)
10126
21.6M
#endif /* LIBXML_SAX1_ENABLED */
10127
21.6M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10128
145M
#ifdef LIBXML_SAX1_ENABLED
10129
145M
    else
10130
145M
  name = xmlParseStartTag(ctxt);
10131
166M
#endif /* LIBXML_SAX1_ENABLED */
10132
166M
    if (ctxt->instate == XML_PARSER_EOF)
10133
7.16k
  return(-1);
10134
166M
    if (name == NULL) {
10135
69.0M
  spacePop(ctxt);
10136
69.0M
        return(-1);
10137
69.0M
    }
10138
97.8M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10139
97.8M
    ret = ctxt->node;
10140
10141
97.8M
#ifdef LIBXML_VALID_ENABLED
10142
    /*
10143
     * [ VC: Root Element Type ]
10144
     * The Name in the document type declaration must match the element
10145
     * type of the root element.
10146
     */
10147
97.8M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10148
97.8M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10149
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10150
97.8M
#endif /* LIBXML_VALID_ENABLED */
10151
10152
    /*
10153
     * Check for an Empty Element.
10154
     */
10155
97.8M
    if ((RAW == '/') && (NXT(1) == '>')) {
10156
42.0M
        SKIP(2);
10157
42.0M
  if (ctxt->sax2) {
10158
11.8M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10159
11.8M
    (!ctxt->disableSAX))
10160
1.94M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10161
11.8M
#ifdef LIBXML_SAX1_ENABLED
10162
30.2M
  } else {
10163
30.2M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10164
30.2M
    (!ctxt->disableSAX))
10165
5.35M
    ctxt->sax->endElement(ctxt->userData, name);
10166
30.2M
#endif /* LIBXML_SAX1_ENABLED */
10167
30.2M
  }
10168
42.0M
  namePop(ctxt);
10169
42.0M
  spacePop(ctxt);
10170
42.0M
  if (nsNr != ctxt->nsNr)
10171
37.3k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10172
42.0M
  if ( ret != NULL && ctxt->record_info ) {
10173
0
     node_info.end_pos = ctxt->input->consumed +
10174
0
            (CUR_PTR - ctxt->input->base);
10175
0
     node_info.end_line = ctxt->input->line;
10176
0
     node_info.node = ret;
10177
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10178
0
  }
10179
42.0M
  return(1);
10180
42.0M
    }
10181
55.7M
    if (RAW == '>') {
10182
45.4M
        NEXT1;
10183
45.4M
    } else {
10184
10.3M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10185
10.3M
         "Couldn't find end of Start Tag %s line %d\n",
10186
10.3M
                    name, line, NULL);
10187
10188
  /*
10189
   * end of parsing of this node.
10190
   */
10191
10.3M
  nodePop(ctxt);
10192
10.3M
  namePop(ctxt);
10193
10.3M
  spacePop(ctxt);
10194
10.3M
  if (nsNr != ctxt->nsNr)
10195
65.5k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10196
10197
  /*
10198
   * Capture end position and add node
10199
   */
10200
10.3M
  if ( ret != NULL && ctxt->record_info ) {
10201
0
     node_info.end_pos = ctxt->input->consumed +
10202
0
            (CUR_PTR - ctxt->input->base);
10203
0
     node_info.end_line = ctxt->input->line;
10204
0
     node_info.node = ret;
10205
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10206
0
  }
10207
10.3M
  return(-1);
10208
10.3M
    }
10209
10210
45.4M
    return(0);
10211
55.7M
}
10212
10213
/**
10214
 * xmlParseElementEnd:
10215
 * @ctxt:  an XML parser context
10216
 *
10217
 * Parse the end of an XML element.
10218
 */
10219
static void
10220
35.9M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10221
35.9M
    xmlParserNodeInfo node_info;
10222
35.9M
    xmlNodePtr ret = ctxt->node;
10223
10224
35.9M
    if (ctxt->nameNr <= 0)
10225
0
        return;
10226
10227
    /*
10228
     * parse the end of tag: '</' should be here.
10229
     */
10230
35.9M
    if (ctxt->sax2) {
10231
2.63M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10232
2.63M
  namePop(ctxt);
10233
2.63M
    }
10234
33.2M
#ifdef LIBXML_SAX1_ENABLED
10235
33.2M
    else
10236
33.2M
  xmlParseEndTag1(ctxt, 0);
10237
35.9M
#endif /* LIBXML_SAX1_ENABLED */
10238
10239
    /*
10240
     * Capture end position and add node
10241
     */
10242
35.9M
    if ( ret != NULL && ctxt->record_info ) {
10243
0
       node_info.end_pos = ctxt->input->consumed +
10244
0
                          (CUR_PTR - ctxt->input->base);
10245
0
       node_info.end_line = ctxt->input->line;
10246
0
       node_info.node = ret;
10247
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10248
0
    }
10249
35.9M
}
10250
10251
/**
10252
 * xmlParseVersionNum:
10253
 * @ctxt:  an XML parser context
10254
 *
10255
 * DEPRECATED: Internal function, don't use.
10256
 *
10257
 * parse the XML version value.
10258
 *
10259
 * [26] VersionNum ::= '1.' [0-9]+
10260
 *
10261
 * In practice allow [0-9].[0-9]+ at that level
10262
 *
10263
 * Returns the string giving the XML version number, or NULL
10264
 */
10265
xmlChar *
10266
394k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10267
394k
    xmlChar *buf = NULL;
10268
394k
    int len = 0;
10269
394k
    int size = 10;
10270
394k
    xmlChar cur;
10271
10272
394k
    buf = (xmlChar *) xmlMallocAtomic(size);
10273
394k
    if (buf == NULL) {
10274
0
  xmlErrMemory(ctxt, NULL);
10275
0
  return(NULL);
10276
0
    }
10277
394k
    cur = CUR;
10278
394k
    if (!((cur >= '0') && (cur <= '9'))) {
10279
4.96k
  xmlFree(buf);
10280
4.96k
  return(NULL);
10281
4.96k
    }
10282
389k
    buf[len++] = cur;
10283
389k
    NEXT;
10284
389k
    cur=CUR;
10285
389k
    if (cur != '.') {
10286
8.51k
  xmlFree(buf);
10287
8.51k
  return(NULL);
10288
8.51k
    }
10289
381k
    buf[len++] = cur;
10290
381k
    NEXT;
10291
381k
    cur=CUR;
10292
1.52M
    while ((cur >= '0') && (cur <= '9')) {
10293
1.14M
  if (len + 1 >= size) {
10294
2.22k
      xmlChar *tmp;
10295
10296
2.22k
      size *= 2;
10297
2.22k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10298
2.22k
      if (tmp == NULL) {
10299
0
          xmlFree(buf);
10300
0
    xmlErrMemory(ctxt, NULL);
10301
0
    return(NULL);
10302
0
      }
10303
2.22k
      buf = tmp;
10304
2.22k
  }
10305
1.14M
  buf[len++] = cur;
10306
1.14M
  NEXT;
10307
1.14M
  cur=CUR;
10308
1.14M
    }
10309
381k
    buf[len] = 0;
10310
381k
    return(buf);
10311
381k
}
10312
10313
/**
10314
 * xmlParseVersionInfo:
10315
 * @ctxt:  an XML parser context
10316
 *
10317
 * DEPRECATED: Internal function, don't use.
10318
 *
10319
 * parse the XML version.
10320
 *
10321
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10322
 *
10323
 * [25] Eq ::= S? '=' S?
10324
 *
10325
 * Returns the version string, e.g. "1.0"
10326
 */
10327
10328
xmlChar *
10329
466k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10330
466k
    xmlChar *version = NULL;
10331
10332
466k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10333
407k
  SKIP(7);
10334
407k
  SKIP_BLANKS;
10335
407k
  if (RAW != '=') {
10336
6.36k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10337
6.36k
      return(NULL);
10338
6.36k
        }
10339
401k
  NEXT;
10340
401k
  SKIP_BLANKS;
10341
401k
  if (RAW == '"') {
10342
320k
      NEXT;
10343
320k
      version = xmlParseVersionNum(ctxt);
10344
320k
      if (RAW != '"') {
10345
16.0k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10346
16.0k
      } else
10347
304k
          NEXT;
10348
320k
  } else if (RAW == '\''){
10349
73.8k
      NEXT;
10350
73.8k
      version = xmlParseVersionNum(ctxt);
10351
73.8k
      if (RAW != '\'') {
10352
6.85k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10353
6.85k
      } else
10354
66.9k
          NEXT;
10355
73.8k
  } else {
10356
6.34k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10357
6.34k
  }
10358
401k
    }
10359
459k
    return(version);
10360
466k
}
10361
10362
/**
10363
 * xmlParseEncName:
10364
 * @ctxt:  an XML parser context
10365
 *
10366
 * DEPRECATED: Internal function, don't use.
10367
 *
10368
 * parse the XML encoding name
10369
 *
10370
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10371
 *
10372
 * Returns the encoding name value or NULL
10373
 */
10374
xmlChar *
10375
194k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10376
194k
    xmlChar *buf = NULL;
10377
194k
    int len = 0;
10378
194k
    int size = 10;
10379
194k
    xmlChar cur;
10380
10381
194k
    cur = CUR;
10382
194k
    if (((cur >= 'a') && (cur <= 'z')) ||
10383
194k
        ((cur >= 'A') && (cur <= 'Z'))) {
10384
193k
  buf = (xmlChar *) xmlMallocAtomic(size);
10385
193k
  if (buf == NULL) {
10386
0
      xmlErrMemory(ctxt, NULL);
10387
0
      return(NULL);
10388
0
  }
10389
10390
193k
  buf[len++] = cur;
10391
193k
  NEXT;
10392
193k
  cur = CUR;
10393
2.95M
  while (((cur >= 'a') && (cur <= 'z')) ||
10394
2.95M
         ((cur >= 'A') && (cur <= 'Z')) ||
10395
2.95M
         ((cur >= '0') && (cur <= '9')) ||
10396
2.95M
         (cur == '.') || (cur == '_') ||
10397
2.95M
         (cur == '-')) {
10398
2.75M
      if (len + 1 >= size) {
10399
69.1k
          xmlChar *tmp;
10400
10401
69.1k
    size *= 2;
10402
69.1k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10403
69.1k
    if (tmp == NULL) {
10404
0
        xmlErrMemory(ctxt, NULL);
10405
0
        xmlFree(buf);
10406
0
        return(NULL);
10407
0
    }
10408
69.1k
    buf = tmp;
10409
69.1k
      }
10410
2.75M
      buf[len++] = cur;
10411
2.75M
      NEXT;
10412
2.75M
      cur = CUR;
10413
2.75M
      if (cur == 0) {
10414
957
          SHRINK;
10415
957
    GROW;
10416
957
    cur = CUR;
10417
957
      }
10418
2.75M
        }
10419
193k
  buf[len] = 0;
10420
193k
    } else {
10421
1.07k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10422
1.07k
    }
10423
194k
    return(buf);
10424
194k
}
10425
10426
/**
10427
 * xmlParseEncodingDecl:
10428
 * @ctxt:  an XML parser context
10429
 *
10430
 * DEPRECATED: Internal function, don't use.
10431
 *
10432
 * parse the XML encoding declaration
10433
 *
10434
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10435
 *
10436
 * this setups the conversion filters.
10437
 *
10438
 * Returns the encoding value or NULL
10439
 */
10440
10441
const xmlChar *
10442
364k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10443
364k
    xmlChar *encoding = NULL;
10444
10445
364k
    SKIP_BLANKS;
10446
364k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10447
198k
  SKIP(8);
10448
198k
  SKIP_BLANKS;
10449
198k
  if (RAW != '=') {
10450
2.24k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10451
2.24k
      return(NULL);
10452
2.24k
        }
10453
196k
  NEXT;
10454
196k
  SKIP_BLANKS;
10455
196k
  if (RAW == '"') {
10456
163k
      NEXT;
10457
163k
      encoding = xmlParseEncName(ctxt);
10458
163k
      if (RAW != '"') {
10459
7.79k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10460
7.79k
    xmlFree((xmlChar *) encoding);
10461
7.79k
    return(NULL);
10462
7.79k
      } else
10463
155k
          NEXT;
10464
163k
  } else if (RAW == '\''){
10465
30.9k
      NEXT;
10466
30.9k
      encoding = xmlParseEncName(ctxt);
10467
30.9k
      if (RAW != '\'') {
10468
2.11k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10469
2.11k
    xmlFree((xmlChar *) encoding);
10470
2.11k
    return(NULL);
10471
2.11k
      } else
10472
28.8k
          NEXT;
10473
30.9k
  } else {
10474
1.45k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10475
1.45k
  }
10476
10477
        /*
10478
         * Non standard parsing, allowing the user to ignore encoding
10479
         */
10480
186k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10481
29.4k
      xmlFree((xmlChar *) encoding);
10482
29.4k
            return(NULL);
10483
29.4k
  }
10484
10485
  /*
10486
   * UTF-16 encoding switch has already taken place at this stage,
10487
   * more over the little-endian/big-endian selection is already done
10488
   */
10489
156k
        if ((encoding != NULL) &&
10490
156k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10491
155k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10492
      /*
10493
       * If no encoding was passed to the parser, that we are
10494
       * using UTF-16 and no decoder is present i.e. the
10495
       * document is apparently UTF-8 compatible, then raise an
10496
       * encoding mismatch fatal error
10497
       */
10498
3.85k
      if ((ctxt->encoding == NULL) &&
10499
3.85k
          (ctxt->input->buf != NULL) &&
10500
3.85k
          (ctxt->input->buf->encoder == NULL)) {
10501
3.85k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10502
3.85k
      "Document labelled UTF-16 but has UTF-8 content\n");
10503
3.85k
      }
10504
3.85k
      if (ctxt->encoding != NULL)
10505
0
    xmlFree((xmlChar *) ctxt->encoding);
10506
3.85k
      ctxt->encoding = encoding;
10507
3.85k
  }
10508
  /*
10509
   * UTF-8 encoding is handled natively
10510
   */
10511
152k
        else if ((encoding != NULL) &&
10512
152k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10513
151k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10514
88.0k
      if (ctxt->encoding != NULL)
10515
0
    xmlFree((xmlChar *) ctxt->encoding);
10516
88.0k
      ctxt->encoding = encoding;
10517
88.0k
  }
10518
64.8k
  else if (encoding != NULL) {
10519
63.6k
      xmlCharEncodingHandlerPtr handler;
10520
10521
63.6k
      if (ctxt->input->encoding != NULL)
10522
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10523
63.6k
      ctxt->input->encoding = encoding;
10524
10525
63.6k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10526
63.6k
      if (handler != NULL) {
10527
62.0k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10528
        /* failed to convert */
10529
141
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10530
141
        return(NULL);
10531
141
    }
10532
62.0k
      } else {
10533
1.68k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10534
1.68k
      "Unsupported encoding %s\n", encoding);
10535
1.68k
    return(NULL);
10536
1.68k
      }
10537
63.6k
  }
10538
156k
    }
10539
321k
    return(encoding);
10540
364k
}
10541
10542
/**
10543
 * xmlParseSDDecl:
10544
 * @ctxt:  an XML parser context
10545
 *
10546
 * DEPRECATED: Internal function, don't use.
10547
 *
10548
 * parse the XML standalone declaration
10549
 *
10550
 * [32] SDDecl ::= S 'standalone' Eq
10551
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10552
 *
10553
 * [ VC: Standalone Document Declaration ]
10554
 * TODO The standalone document declaration must have the value "no"
10555
 * if any external markup declarations contain declarations of:
10556
 *  - attributes with default values, if elements to which these
10557
 *    attributes apply appear in the document without specifications
10558
 *    of values for these attributes, or
10559
 *  - entities (other than amp, lt, gt, apos, quot), if references
10560
 *    to those entities appear in the document, or
10561
 *  - attributes with values subject to normalization, where the
10562
 *    attribute appears in the document with a value which will change
10563
 *    as a result of normalization, or
10564
 *  - element types with element content, if white space occurs directly
10565
 *    within any instance of those types.
10566
 *
10567
 * Returns:
10568
 *   1 if standalone="yes"
10569
 *   0 if standalone="no"
10570
 *  -2 if standalone attribute is missing or invalid
10571
 *    (A standalone value of -2 means that the XML declaration was found,
10572
 *     but no value was specified for the standalone attribute).
10573
 */
10574
10575
int
10576
257k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10577
257k
    int standalone = -2;
10578
10579
257k
    SKIP_BLANKS;
10580
257k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10581
32.4k
  SKIP(10);
10582
32.4k
        SKIP_BLANKS;
10583
32.4k
  if (RAW != '=') {
10584
372
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10585
372
      return(standalone);
10586
372
        }
10587
32.0k
  NEXT;
10588
32.0k
  SKIP_BLANKS;
10589
32.0k
        if (RAW == '\''){
10590
14.4k
      NEXT;
10591
14.4k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10592
9.33k
          standalone = 0;
10593
9.33k
                SKIP(2);
10594
9.33k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10595
5.13k
                 (NXT(2) == 's')) {
10596
4.70k
          standalone = 1;
10597
4.70k
    SKIP(3);
10598
4.70k
            } else {
10599
438
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10600
438
      }
10601
14.4k
      if (RAW != '\'') {
10602
666
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10603
666
      } else
10604
13.8k
          NEXT;
10605
17.5k
  } else if (RAW == '"'){
10606
17.2k
      NEXT;
10607
17.2k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10608
7.31k
          standalone = 0;
10609
7.31k
    SKIP(2);
10610
9.97k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10611
9.97k
                 (NXT(2) == 's')) {
10612
9.41k
          standalone = 1;
10613
9.41k
                SKIP(3);
10614
9.41k
            } else {
10615
559
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10616
559
      }
10617
17.2k
      if (RAW != '"') {
10618
907
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10619
907
      } else
10620
16.3k
          NEXT;
10621
17.2k
  } else {
10622
284
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10623
284
        }
10624
32.0k
    }
10625
257k
    return(standalone);
10626
257k
}
10627
10628
/**
10629
 * xmlParseXMLDecl:
10630
 * @ctxt:  an XML parser context
10631
 *
10632
 * DEPRECATED: Internal function, don't use.
10633
 *
10634
 * parse an XML declaration header
10635
 *
10636
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10637
 */
10638
10639
void
10640
398k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10641
398k
    xmlChar *version;
10642
10643
    /*
10644
     * This value for standalone indicates that the document has an
10645
     * XML declaration but it does not have a standalone attribute.
10646
     * It will be overwritten later if a standalone attribute is found.
10647
     */
10648
398k
    ctxt->input->standalone = -2;
10649
10650
    /*
10651
     * We know that '<?xml' is here.
10652
     */
10653
398k
    SKIP(5);
10654
10655
398k
    if (!IS_BLANK_CH(RAW)) {
10656
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10657
0
                 "Blank needed after '<?xml'\n");
10658
0
    }
10659
398k
    SKIP_BLANKS;
10660
10661
    /*
10662
     * We must have the VersionInfo here.
10663
     */
10664
398k
    version = xmlParseVersionInfo(ctxt);
10665
398k
    if (version == NULL) {
10666
72.4k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10667
326k
    } else {
10668
326k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10669
      /*
10670
       * Changed here for XML-1.0 5th edition
10671
       */
10672
8.17k
      if (ctxt->options & XML_PARSE_OLD10) {
10673
1.53k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10674
1.53k
                "Unsupported version '%s'\n",
10675
1.53k
                version);
10676
6.64k
      } else {
10677
6.64k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10678
5.56k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10679
5.56k
                      "Unsupported version '%s'\n",
10680
5.56k
          version, NULL);
10681
5.56k
    } else {
10682
1.07k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10683
1.07k
              "Unsupported version '%s'\n",
10684
1.07k
              version);
10685
1.07k
    }
10686
6.64k
      }
10687
8.17k
  }
10688
326k
  if (ctxt->version != NULL)
10689
0
      xmlFree((void *) ctxt->version);
10690
326k
  ctxt->version = version;
10691
326k
    }
10692
10693
    /*
10694
     * We may have the encoding declaration
10695
     */
10696
398k
    if (!IS_BLANK_CH(RAW)) {
10697
185k
        if ((RAW == '?') && (NXT(1) == '>')) {
10698
101k
      SKIP(2);
10699
101k
      return;
10700
101k
  }
10701
83.8k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10702
83.8k
    }
10703
297k
    xmlParseEncodingDecl(ctxt);
10704
297k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10705
297k
         (ctxt->instate == XML_PARSER_EOF)) {
10706
  /*
10707
   * The XML REC instructs us to stop parsing right here
10708
   */
10709
1.57k
        return;
10710
1.57k
    }
10711
10712
    /*
10713
     * We may have the standalone status.
10714
     */
10715
295k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10716
40.8k
        if ((RAW == '?') && (NXT(1) == '>')) {
10717
38.1k
      SKIP(2);
10718
38.1k
      return;
10719
38.1k
  }
10720
2.78k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10721
2.78k
    }
10722
10723
    /*
10724
     * We can grow the input buffer freely at that point
10725
     */
10726
257k
    GROW;
10727
10728
257k
    SKIP_BLANKS;
10729
257k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10730
10731
257k
    SKIP_BLANKS;
10732
257k
    if ((RAW == '?') && (NXT(1) == '>')) {
10733
132k
        SKIP(2);
10734
132k
    } else if (RAW == '>') {
10735
        /* Deprecated old WD ... */
10736
1.36k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10737
1.36k
  NEXT;
10738
123k
    } else {
10739
123k
        int c;
10740
10741
123k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10742
5.38M
        while ((c = CUR) != 0) {
10743
5.37M
            NEXT;
10744
5.37M
            if (c == '>')
10745
111k
                break;
10746
5.37M
        }
10747
123k
    }
10748
257k
}
10749
10750
/**
10751
 * xmlParseMisc:
10752
 * @ctxt:  an XML parser context
10753
 *
10754
 * DEPRECATED: Internal function, don't use.
10755
 *
10756
 * parse an XML Misc* optional field.
10757
 *
10758
 * [27] Misc ::= Comment | PI |  S
10759
 */
10760
10761
void
10762
1.04M
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10763
1.14M
    while (ctxt->instate != XML_PARSER_EOF) {
10764
1.14M
        SKIP_BLANKS;
10765
1.14M
        GROW;
10766
1.14M
        if ((RAW == '<') && (NXT(1) == '?')) {
10767
57.1k
      xmlParsePI(ctxt);
10768
1.08M
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10769
37.2k
      xmlParseComment(ctxt);
10770
1.04M
        } else {
10771
1.04M
            break;
10772
1.04M
        }
10773
1.14M
    }
10774
1.04M
}
10775
10776
/**
10777
 * xmlParseDocument:
10778
 * @ctxt:  an XML parser context
10779
 *
10780
 * parse an XML document (and build a tree if using the standard SAX
10781
 * interface).
10782
 *
10783
 * [1] document ::= prolog element Misc*
10784
 *
10785
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10786
 *
10787
 * Returns 0, -1 in case of error. the parser context is augmented
10788
 *                as a result of the parsing.
10789
 */
10790
10791
int
10792
489k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10793
489k
    xmlChar start[4];
10794
489k
    xmlCharEncoding enc;
10795
10796
489k
    xmlInitParser();
10797
10798
489k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10799
0
        return(-1);
10800
10801
489k
    GROW;
10802
10803
    /*
10804
     * SAX: detecting the level.
10805
     */
10806
489k
    xmlDetectSAX2(ctxt);
10807
10808
    /*
10809
     * SAX: beginning of the document processing.
10810
     */
10811
489k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10812
489k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10813
489k
    if (ctxt->instate == XML_PARSER_EOF)
10814
0
  return(-1);
10815
10816
489k
    if ((ctxt->encoding == NULL) &&
10817
489k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10818
  /*
10819
   * Get the 4 first bytes and decode the charset
10820
   * if enc != XML_CHAR_ENCODING_NONE
10821
   * plug some encoding conversion routines.
10822
   */
10823
483k
  start[0] = RAW;
10824
483k
  start[1] = NXT(1);
10825
483k
  start[2] = NXT(2);
10826
483k
  start[3] = NXT(3);
10827
483k
  enc = xmlDetectCharEncoding(&start[0], 4);
10828
483k
  if (enc != XML_CHAR_ENCODING_NONE) {
10829
151k
      xmlSwitchEncoding(ctxt, enc);
10830
151k
  }
10831
483k
    }
10832
10833
10834
489k
    if (CUR == 0) {
10835
3.77k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10836
3.77k
  return(-1);
10837
3.77k
    }
10838
10839
    /*
10840
     * Check for the XMLDecl in the Prolog.
10841
     * do not GROW here to avoid the detected encoder to decode more
10842
     * than just the first line, unless the amount of data is really
10843
     * too small to hold "<?xml version="1.0" encoding="foo"
10844
     */
10845
485k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10846
21.2k
       GROW;
10847
21.2k
    }
10848
485k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10849
10850
  /*
10851
   * Note that we will switch encoding on the fly.
10852
   */
10853
137k
  xmlParseXMLDecl(ctxt);
10854
137k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10855
137k
      (ctxt->instate == XML_PARSER_EOF)) {
10856
      /*
10857
       * The XML REC instructs us to stop parsing right here
10858
       */
10859
599
      return(-1);
10860
599
  }
10861
137k
  ctxt->standalone = ctxt->input->standalone;
10862
137k
  SKIP_BLANKS;
10863
347k
    } else {
10864
347k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10865
347k
    }
10866
485k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10867
463k
        ctxt->sax->startDocument(ctxt->userData);
10868
485k
    if (ctxt->instate == XML_PARSER_EOF)
10869
0
  return(-1);
10870
485k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10871
485k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10872
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10873
0
    }
10874
10875
    /*
10876
     * The Misc part of the Prolog
10877
     */
10878
485k
    xmlParseMisc(ctxt);
10879
10880
    /*
10881
     * Then possibly doc type declaration(s) and more Misc
10882
     * (doctypedecl Misc*)?
10883
     */
10884
485k
    GROW;
10885
485k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10886
10887
201k
  ctxt->inSubset = 1;
10888
201k
  xmlParseDocTypeDecl(ctxt);
10889
201k
  if (RAW == '[') {
10890
159k
      ctxt->instate = XML_PARSER_DTD;
10891
159k
      xmlParseInternalSubset(ctxt);
10892
159k
      if (ctxt->instate == XML_PARSER_EOF)
10893
11.1k
    return(-1);
10894
159k
  }
10895
10896
  /*
10897
   * Create and update the external subset.
10898
   */
10899
189k
  ctxt->inSubset = 2;
10900
189k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10901
189k
      (!ctxt->disableSAX))
10902
151k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10903
151k
                                ctxt->extSubSystem, ctxt->extSubURI);
10904
189k
  if (ctxt->instate == XML_PARSER_EOF)
10905
4.40k
      return(-1);
10906
185k
  ctxt->inSubset = 0;
10907
10908
185k
        xmlCleanSpecialAttr(ctxt);
10909
10910
185k
  ctxt->instate = XML_PARSER_PROLOG;
10911
185k
  xmlParseMisc(ctxt);
10912
185k
    }
10913
10914
    /*
10915
     * Time to start parsing the tree itself
10916
     */
10917
469k
    GROW;
10918
469k
    if (RAW != '<') {
10919
90.1k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10920
90.1k
           "Start tag expected, '<' not found\n");
10921
379k
    } else {
10922
379k
  ctxt->instate = XML_PARSER_CONTENT;
10923
379k
  xmlParseElement(ctxt);
10924
379k
  ctxt->instate = XML_PARSER_EPILOG;
10925
10926
10927
  /*
10928
   * The Misc part at the end
10929
   */
10930
379k
  xmlParseMisc(ctxt);
10931
10932
379k
  if (RAW != 0) {
10933
122k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10934
122k
  }
10935
379k
  ctxt->instate = XML_PARSER_EOF;
10936
379k
    }
10937
10938
    /*
10939
     * SAX: end of the document processing.
10940
     */
10941
469k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10942
469k
        ctxt->sax->endDocument(ctxt->userData);
10943
10944
    /*
10945
     * Remove locally kept entity definitions if the tree was not built
10946
     */
10947
469k
    if ((ctxt->myDoc != NULL) &&
10948
469k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10949
1.79k
  xmlFreeDoc(ctxt->myDoc);
10950
1.79k
  ctxt->myDoc = NULL;
10951
1.79k
    }
10952
10953
469k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10954
33.3k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10955
33.3k
  if (ctxt->valid)
10956
21.0k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10957
33.3k
  if (ctxt->nsWellFormed)
10958
31.7k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10959
33.3k
  if (ctxt->options & XML_PARSE_OLD10)
10960
9.82k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10961
33.3k
    }
10962
469k
    if (! ctxt->wellFormed) {
10963
436k
  ctxt->valid = 0;
10964
436k
  return(-1);
10965
436k
    }
10966
33.3k
    return(0);
10967
469k
}
10968
10969
/**
10970
 * xmlParseExtParsedEnt:
10971
 * @ctxt:  an XML parser context
10972
 *
10973
 * parse a general parsed entity
10974
 * An external general parsed entity is well-formed if it matches the
10975
 * production labeled extParsedEnt.
10976
 *
10977
 * [78] extParsedEnt ::= TextDecl? content
10978
 *
10979
 * Returns 0, -1 in case of error. the parser context is augmented
10980
 *                as a result of the parsing.
10981
 */
10982
10983
int
10984
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10985
0
    xmlChar start[4];
10986
0
    xmlCharEncoding enc;
10987
10988
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10989
0
        return(-1);
10990
10991
0
    xmlDetectSAX2(ctxt);
10992
10993
0
    GROW;
10994
10995
    /*
10996
     * SAX: beginning of the document processing.
10997
     */
10998
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10999
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11000
11001
    /*
11002
     * Get the 4 first bytes and decode the charset
11003
     * if enc != XML_CHAR_ENCODING_NONE
11004
     * plug some encoding conversion routines.
11005
     */
11006
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11007
0
  start[0] = RAW;
11008
0
  start[1] = NXT(1);
11009
0
  start[2] = NXT(2);
11010
0
  start[3] = NXT(3);
11011
0
  enc = xmlDetectCharEncoding(start, 4);
11012
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11013
0
      xmlSwitchEncoding(ctxt, enc);
11014
0
  }
11015
0
    }
11016
11017
11018
0
    if (CUR == 0) {
11019
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11020
0
    }
11021
11022
    /*
11023
     * Check for the XMLDecl in the Prolog.
11024
     */
11025
0
    GROW;
11026
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11027
11028
  /*
11029
   * Note that we will switch encoding on the fly.
11030
   */
11031
0
  xmlParseXMLDecl(ctxt);
11032
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11033
      /*
11034
       * The XML REC instructs us to stop parsing right here
11035
       */
11036
0
      return(-1);
11037
0
  }
11038
0
  SKIP_BLANKS;
11039
0
    } else {
11040
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11041
0
    }
11042
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11043
0
        ctxt->sax->startDocument(ctxt->userData);
11044
0
    if (ctxt->instate == XML_PARSER_EOF)
11045
0
  return(-1);
11046
11047
    /*
11048
     * Doing validity checking on chunk doesn't make sense
11049
     */
11050
0
    ctxt->instate = XML_PARSER_CONTENT;
11051
0
    ctxt->validate = 0;
11052
0
    ctxt->loadsubset = 0;
11053
0
    ctxt->depth = 0;
11054
11055
0
    xmlParseContent(ctxt);
11056
0
    if (ctxt->instate == XML_PARSER_EOF)
11057
0
  return(-1);
11058
11059
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11060
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11061
0
    } else if (RAW != 0) {
11062
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11063
0
    }
11064
11065
    /*
11066
     * SAX: end of the document processing.
11067
     */
11068
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11069
0
        ctxt->sax->endDocument(ctxt->userData);
11070
11071
0
    if (! ctxt->wellFormed) return(-1);
11072
0
    return(0);
11073
0
}
11074
11075
#ifdef LIBXML_PUSH_ENABLED
11076
/************************************************************************
11077
 *                  *
11078
 *    Progressive parsing interfaces        *
11079
 *                  *
11080
 ************************************************************************/
11081
11082
/**
11083
 * xmlParseLookupSequence:
11084
 * @ctxt:  an XML parser context
11085
 * @first:  the first char to lookup
11086
 * @next:  the next char to lookup or zero
11087
 * @third:  the next char to lookup or zero
11088
 *
11089
 * Try to find if a sequence (first, next, third) or  just (first next) or
11090
 * (first) is available in the input stream.
11091
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11092
 * to avoid rescanning sequences of bytes, it DOES change the state of the
11093
 * parser, do not use liberally.
11094
 *
11095
 * Returns the index to the current parsing point if the full sequence
11096
 *      is available, -1 otherwise.
11097
 */
11098
static int
11099
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11100
11.3M
                       xmlChar next, xmlChar third) {
11101
11.3M
    int base, len;
11102
11.3M
    xmlParserInputPtr in;
11103
11.3M
    const xmlChar *buf;
11104
11105
11.3M
    in = ctxt->input;
11106
11.3M
    if (in == NULL) return(-1);
11107
11.3M
    base = in->cur - in->base;
11108
11.3M
    if (base < 0) return(-1);
11109
11.3M
    if (ctxt->checkIndex > base)
11110
1.17M
        base = ctxt->checkIndex;
11111
11.3M
    if (in->buf == NULL) {
11112
0
  buf = in->base;
11113
0
  len = in->length;
11114
11.3M
    } else {
11115
11.3M
  buf = xmlBufContent(in->buf->buffer);
11116
11.3M
  len = xmlBufUse(in->buf->buffer);
11117
11.3M
    }
11118
    /* take into account the sequence length */
11119
11.3M
    if (third) len -= 2;
11120
10.1M
    else if (next) len --;
11121
150G
    for (;base < len;base++) {
11122
150G
        if (buf[base] == first) {
11123
13.9M
      if (third != 0) {
11124
4.11M
    if ((buf[base + 1] != next) ||
11125
4.11M
        (buf[base + 2] != third)) continue;
11126
9.80M
      } else if (next != 0) {
11127
1.10M
    if (buf[base + 1] != next) continue;
11128
1.10M
      }
11129
9.71M
      ctxt->checkIndex = 0;
11130
#ifdef DEBUG_PUSH
11131
      if (next == 0)
11132
    xmlGenericError(xmlGenericErrorContext,
11133
      "PP: lookup '%c' found at %d\n",
11134
      first, base);
11135
      else if (third == 0)
11136
    xmlGenericError(xmlGenericErrorContext,
11137
      "PP: lookup '%c%c' found at %d\n",
11138
      first, next, base);
11139
      else
11140
    xmlGenericError(xmlGenericErrorContext,
11141
      "PP: lookup '%c%c%c' found at %d\n",
11142
      first, next, third, base);
11143
#endif
11144
9.71M
      return(base - (in->cur - in->base));
11145
13.9M
  }
11146
150G
    }
11147
1.62M
    ctxt->checkIndex = base;
11148
#ifdef DEBUG_PUSH
11149
    if (next == 0)
11150
  xmlGenericError(xmlGenericErrorContext,
11151
    "PP: lookup '%c' failed\n", first);
11152
    else if (third == 0)
11153
  xmlGenericError(xmlGenericErrorContext,
11154
    "PP: lookup '%c%c' failed\n", first, next);
11155
    else
11156
  xmlGenericError(xmlGenericErrorContext,
11157
    "PP: lookup '%c%c%c' failed\n", first, next, third);
11158
#endif
11159
1.62M
    return(-1);
11160
11.3M
}
11161
11162
/**
11163
 * xmlParseGetLasts:
11164
 * @ctxt:  an XML parser context
11165
 * @lastlt:  pointer to store the last '<' from the input
11166
 * @lastgt:  pointer to store the last '>' from the input
11167
 *
11168
 * Lookup the last < and > in the current chunk
11169
 */
11170
static void
11171
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11172
9.78M
                 const xmlChar **lastgt) {
11173
9.78M
    const xmlChar *tmp;
11174
11175
9.78M
    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11176
0
  xmlGenericError(xmlGenericErrorContext,
11177
0
        "Internal error: xmlParseGetLasts\n");
11178
0
  return;
11179
0
    }
11180
9.78M
    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11181
4.15M
        tmp = ctxt->input->end;
11182
4.15M
  tmp--;
11183
1.29G
  while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11184
4.15M
  if (tmp < ctxt->input->base) {
11185
113k
      *lastlt = NULL;
11186
113k
      *lastgt = NULL;
11187
4.04M
  } else {
11188
4.04M
      *lastlt = tmp;
11189
4.04M
      tmp++;
11190
215M
      while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11191
211M
          if (*tmp == '\'') {
11192
1.41M
        tmp++;
11193
57.2M
        while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11194
1.41M
        if (tmp < ctxt->input->end) tmp++;
11195
209M
    } else if (*tmp == '"') {
11196
2.11M
        tmp++;
11197
141M
        while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11198
2.11M
        if (tmp < ctxt->input->end) tmp++;
11199
2.11M
    } else
11200
207M
        tmp++;
11201
211M
      }
11202
4.04M
      if (tmp < ctxt->input->end)
11203
1.77M
          *lastgt = tmp;
11204
2.27M
      else {
11205
2.27M
          tmp = *lastlt;
11206
2.27M
    tmp--;
11207
59.0M
    while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11208
2.27M
    if (tmp >= ctxt->input->base)
11209
2.22M
        *lastgt = tmp;
11210
51.0k
    else
11211
51.0k
        *lastgt = NULL;
11212
2.27M
      }
11213
4.04M
  }
11214
5.62M
    } else {
11215
5.62M
        *lastlt = NULL;
11216
5.62M
  *lastgt = NULL;
11217
5.62M
    }
11218
9.78M
}
11219
/**
11220
 * xmlCheckCdataPush:
11221
 * @cur: pointer to the block of characters
11222
 * @len: length of the block in bytes
11223
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11224
 *
11225
 * Check that the block of characters is okay as SCdata content [20]
11226
 *
11227
 * Returns the number of bytes to pass if okay, a negative index where an
11228
 *         UTF-8 error occurred otherwise
11229
 */
11230
static int
11231
655k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11232
655k
    int ix;
11233
655k
    unsigned char c;
11234
655k
    int codepoint;
11235
11236
655k
    if ((utf == NULL) || (len <= 0))
11237
1.18k
        return(0);
11238
11239
14.0M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11240
14.0M
        c = utf[ix];
11241
14.0M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11242
9.84M
      if (c >= 0x20)
11243
9.26M
    ix++;
11244
572k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11245
433k
          ix++;
11246
139k
      else
11247
139k
          return(-ix);
11248
9.84M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11249
1.65M
      if (ix + 2 > len) return(complete ? -ix : ix);
11250
1.65M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11251
54.6k
          return(-ix);
11252
1.59M
      codepoint = (utf[ix] & 0x1f) << 6;
11253
1.59M
      codepoint |= utf[ix+1] & 0x3f;
11254
1.59M
      if (!xmlIsCharQ(codepoint))
11255
6.64k
          return(-ix);
11256
1.58M
      ix += 2;
11257
2.50M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11258
907k
      if (ix + 3 > len) return(complete ? -ix : ix);
11259
900k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11260
900k
          ((utf[ix+2] & 0xc0) != 0x80))
11261
30.6k
        return(-ix);
11262
869k
      codepoint = (utf[ix] & 0xf) << 12;
11263
869k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11264
869k
      codepoint |= utf[ix+2] & 0x3f;
11265
869k
      if (!xmlIsCharQ(codepoint))
11266
5.20k
          return(-ix);
11267
864k
      ix += 3;
11268
1.59M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11269
1.34M
      if (ix + 4 > len) return(complete ? -ix : ix);
11270
1.34M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11271
1.34M
          ((utf[ix+2] & 0xc0) != 0x80) ||
11272
1.34M
    ((utf[ix+3] & 0xc0) != 0x80))
11273
76.4k
        return(-ix);
11274
1.26M
      codepoint = (utf[ix] & 0x7) << 18;
11275
1.26M
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11276
1.26M
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11277
1.26M
      codepoint |= utf[ix+3] & 0x3f;
11278
1.26M
      if (!xmlIsCharQ(codepoint))
11279
12.3k
          return(-ix);
11280
1.25M
      ix += 4;
11281
1.25M
  } else       /* unknown encoding */
11282
250k
      return(-ix);
11283
14.0M
      }
11284
59.5k
      return(ix);
11285
654k
}
11286
11287
/**
11288
 * xmlParseTryOrFinish:
11289
 * @ctxt:  an XML parser context
11290
 * @terminate:  last chunk indicator
11291
 *
11292
 * Try to progress on parsing
11293
 *
11294
 * Returns zero if no parsing was possible
11295
 */
11296
static int
11297
9.19M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11298
9.19M
    int ret = 0;
11299
9.19M
    int avail, tlen;
11300
9.19M
    xmlChar cur, next;
11301
9.19M
    const xmlChar *lastlt, *lastgt;
11302
11303
9.19M
    if (ctxt->input == NULL)
11304
0
        return(0);
11305
11306
#ifdef DEBUG_PUSH
11307
    switch (ctxt->instate) {
11308
  case XML_PARSER_EOF:
11309
      xmlGenericError(xmlGenericErrorContext,
11310
        "PP: try EOF\n"); break;
11311
  case XML_PARSER_START:
11312
      xmlGenericError(xmlGenericErrorContext,
11313
        "PP: try START\n"); break;
11314
  case XML_PARSER_MISC:
11315
      xmlGenericError(xmlGenericErrorContext,
11316
        "PP: try MISC\n");break;
11317
  case XML_PARSER_COMMENT:
11318
      xmlGenericError(xmlGenericErrorContext,
11319
        "PP: try COMMENT\n");break;
11320
  case XML_PARSER_PROLOG:
11321
      xmlGenericError(xmlGenericErrorContext,
11322
        "PP: try PROLOG\n");break;
11323
  case XML_PARSER_START_TAG:
11324
      xmlGenericError(xmlGenericErrorContext,
11325
        "PP: try START_TAG\n");break;
11326
  case XML_PARSER_CONTENT:
11327
      xmlGenericError(xmlGenericErrorContext,
11328
        "PP: try CONTENT\n");break;
11329
  case XML_PARSER_CDATA_SECTION:
11330
      xmlGenericError(xmlGenericErrorContext,
11331
        "PP: try CDATA_SECTION\n");break;
11332
  case XML_PARSER_END_TAG:
11333
      xmlGenericError(xmlGenericErrorContext,
11334
        "PP: try END_TAG\n");break;
11335
  case XML_PARSER_ENTITY_DECL:
11336
      xmlGenericError(xmlGenericErrorContext,
11337
        "PP: try ENTITY_DECL\n");break;
11338
  case XML_PARSER_ENTITY_VALUE:
11339
      xmlGenericError(xmlGenericErrorContext,
11340
        "PP: try ENTITY_VALUE\n");break;
11341
  case XML_PARSER_ATTRIBUTE_VALUE:
11342
      xmlGenericError(xmlGenericErrorContext,
11343
        "PP: try ATTRIBUTE_VALUE\n");break;
11344
  case XML_PARSER_DTD:
11345
      xmlGenericError(xmlGenericErrorContext,
11346
        "PP: try DTD\n");break;
11347
  case XML_PARSER_EPILOG:
11348
      xmlGenericError(xmlGenericErrorContext,
11349
        "PP: try EPILOG\n");break;
11350
  case XML_PARSER_PI:
11351
      xmlGenericError(xmlGenericErrorContext,
11352
        "PP: try PI\n");break;
11353
        case XML_PARSER_IGNORE:
11354
            xmlGenericError(xmlGenericErrorContext,
11355
        "PP: try IGNORE\n");break;
11356
    }
11357
#endif
11358
11359
9.19M
    if ((ctxt->input != NULL) &&
11360
9.19M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11361
51.9k
  xmlSHRINK(ctxt);
11362
51.9k
  ctxt->checkIndex = 0;
11363
51.9k
    }
11364
9.19M
    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11365
11366
51.3M
    while (ctxt->instate != XML_PARSER_EOF) {
11367
51.1M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11368
223k
      return(0);
11369
11370
50.9M
  if (ctxt->input == NULL) break;
11371
50.9M
  if (ctxt->input->buf == NULL)
11372
0
      avail = ctxt->input->length -
11373
0
              (ctxt->input->cur - ctxt->input->base);
11374
50.9M
  else {
11375
      /*
11376
       * If we are operating on converted input, try to flush
11377
       * remaining chars to avoid them stalling in the non-converted
11378
       * buffer. But do not do this in document start where
11379
       * encoding="..." may not have been read and we work on a
11380
       * guessed encoding.
11381
       */
11382
50.9M
      if ((ctxt->instate != XML_PARSER_START) &&
11383
50.9M
          (ctxt->input->buf->raw != NULL) &&
11384
50.9M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11385
180k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11386
180k
                                                 ctxt->input);
11387
180k
    size_t current = ctxt->input->cur - ctxt->input->base;
11388
11389
180k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11390
180k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11391
180k
                                      base, current);
11392
180k
      }
11393
50.9M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11394
50.9M
        (ctxt->input->cur - ctxt->input->base);
11395
50.9M
  }
11396
50.9M
        if (avail < 1)
11397
276k
      goto done;
11398
50.6M
        switch (ctxt->instate) {
11399
0
            case XML_PARSER_EOF:
11400
          /*
11401
     * Document parsing is done !
11402
     */
11403
0
          goto done;
11404
1.72M
            case XML_PARSER_START:
11405
1.72M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11406
420k
        xmlChar start[4];
11407
420k
        xmlCharEncoding enc;
11408
11409
        /*
11410
         * Very first chars read from the document flow.
11411
         */
11412
420k
        if (avail < 4)
11413
25.3k
      goto done;
11414
11415
        /*
11416
         * Get the 4 first bytes and decode the charset
11417
         * if enc != XML_CHAR_ENCODING_NONE
11418
         * plug some encoding conversion routines,
11419
         * else xmlSwitchEncoding will set to (default)
11420
         * UTF8.
11421
         */
11422
394k
        start[0] = RAW;
11423
394k
        start[1] = NXT(1);
11424
394k
        start[2] = NXT(2);
11425
394k
        start[3] = NXT(3);
11426
394k
        enc = xmlDetectCharEncoding(start, 4);
11427
394k
        xmlSwitchEncoding(ctxt, enc);
11428
394k
        break;
11429
420k
    }
11430
11431
1.30M
    if (avail < 2)
11432
380
        goto done;
11433
1.30M
    cur = ctxt->input->cur[0];
11434
1.30M
    next = ctxt->input->cur[1];
11435
1.30M
    if (cur == 0) {
11436
4.70k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11437
4.70k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11438
4.70k
                  &xmlDefaultSAXLocator);
11439
4.70k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11440
4.70k
        xmlHaltParser(ctxt);
11441
#ifdef DEBUG_PUSH
11442
        xmlGenericError(xmlGenericErrorContext,
11443
          "PP: entering EOF\n");
11444
#endif
11445
4.70k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11446
4.70k
      ctxt->sax->endDocument(ctxt->userData);
11447
4.70k
        goto done;
11448
4.70k
    }
11449
1.30M
          if ((cur == '<') && (next == '?')) {
11450
        /* PI or XML decl */
11451
829k
        if (avail < 5) return(ret);
11452
828k
        if ((!terminate) &&
11453
828k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11454
525k
      return(ret);
11455
302k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11456
302k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11457
302k
                  &xmlDefaultSAXLocator);
11458
302k
        if ((ctxt->input->cur[2] == 'x') &&
11459
302k
      (ctxt->input->cur[3] == 'm') &&
11460
302k
      (ctxt->input->cur[4] == 'l') &&
11461
302k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11462
260k
      ret += 5;
11463
#ifdef DEBUG_PUSH
11464
      xmlGenericError(xmlGenericErrorContext,
11465
        "PP: Parsing XML Decl\n");
11466
#endif
11467
260k
      xmlParseXMLDecl(ctxt);
11468
260k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11469
          /*
11470
           * The XML REC instructs us to stop parsing right
11471
           * here
11472
           */
11473
974
          xmlHaltParser(ctxt);
11474
974
          return(0);
11475
974
      }
11476
259k
      ctxt->standalone = ctxt->input->standalone;
11477
259k
      if ((ctxt->encoding == NULL) &&
11478
259k
          (ctxt->input->encoding != NULL))
11479
34.3k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11480
259k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11481
259k
          (!ctxt->disableSAX))
11482
220k
          ctxt->sax->startDocument(ctxt->userData);
11483
259k
      ctxt->instate = XML_PARSER_MISC;
11484
#ifdef DEBUG_PUSH
11485
      xmlGenericError(xmlGenericErrorContext,
11486
        "PP: entering MISC\n");
11487
#endif
11488
259k
        } else {
11489
41.9k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11490
41.9k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11491
41.9k
          (!ctxt->disableSAX))
11492
41.9k
          ctxt->sax->startDocument(ctxt->userData);
11493
41.9k
      ctxt->instate = XML_PARSER_MISC;
11494
#ifdef DEBUG_PUSH
11495
      xmlGenericError(xmlGenericErrorContext,
11496
        "PP: entering MISC\n");
11497
#endif
11498
41.9k
        }
11499
473k
    } else {
11500
473k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11501
473k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11502
473k
                  &xmlDefaultSAXLocator);
11503
473k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11504
473k
        if (ctxt->version == NULL) {
11505
0
            xmlErrMemory(ctxt, NULL);
11506
0
      break;
11507
0
        }
11508
473k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11509
473k
            (!ctxt->disableSAX))
11510
473k
      ctxt->sax->startDocument(ctxt->userData);
11511
473k
        ctxt->instate = XML_PARSER_MISC;
11512
#ifdef DEBUG_PUSH
11513
        xmlGenericError(xmlGenericErrorContext,
11514
          "PP: entering MISC\n");
11515
#endif
11516
473k
    }
11517
775k
    break;
11518
8.62M
            case XML_PARSER_START_TAG: {
11519
8.62M
          const xmlChar *name;
11520
8.62M
    const xmlChar *prefix = NULL;
11521
8.62M
    const xmlChar *URI = NULL;
11522
8.62M
                int line = ctxt->input->line;
11523
8.62M
    int nsNr = ctxt->nsNr;
11524
11525
8.62M
    if ((avail < 2) && (ctxt->inputNr == 1))
11526
0
        goto done;
11527
8.62M
    cur = ctxt->input->cur[0];
11528
8.62M
          if (cur != '<') {
11529
42.0k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11530
42.0k
        xmlHaltParser(ctxt);
11531
42.0k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11532
42.0k
      ctxt->sax->endDocument(ctxt->userData);
11533
42.0k
        goto done;
11534
42.0k
    }
11535
8.58M
    if (!terminate) {
11536
8.01M
        if (ctxt->progressive) {
11537
            /* > can be found unescaped in attribute values */
11538
8.01M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11539
1.01M
          goto done;
11540
8.01M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11541
0
      goto done;
11542
0
        }
11543
8.01M
    }
11544
7.56M
    if (ctxt->spaceNr == 0)
11545
90.0k
        spacePush(ctxt, -1);
11546
7.47M
    else if (*ctxt->space == -2)
11547
1.12M
        spacePush(ctxt, -1);
11548
6.35M
    else
11549
6.35M
        spacePush(ctxt, *ctxt->space);
11550
7.56M
#ifdef LIBXML_SAX1_ENABLED
11551
7.56M
    if (ctxt->sax2)
11552
3.61M
#endif /* LIBXML_SAX1_ENABLED */
11553
3.61M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11554
3.95M
#ifdef LIBXML_SAX1_ENABLED
11555
3.95M
    else
11556
3.95M
        name = xmlParseStartTag(ctxt);
11557
7.56M
#endif /* LIBXML_SAX1_ENABLED */
11558
7.56M
    if (ctxt->instate == XML_PARSER_EOF)
11559
3
        goto done;
11560
7.56M
    if (name == NULL) {
11561
52.1k
        spacePop(ctxt);
11562
52.1k
        xmlHaltParser(ctxt);
11563
52.1k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11564
52.1k
      ctxt->sax->endDocument(ctxt->userData);
11565
52.1k
        goto done;
11566
52.1k
    }
11567
7.51M
#ifdef LIBXML_VALID_ENABLED
11568
    /*
11569
     * [ VC: Root Element Type ]
11570
     * The Name in the document type declaration must match
11571
     * the element type of the root element.
11572
     */
11573
7.51M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11574
7.51M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11575
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11576
7.51M
#endif /* LIBXML_VALID_ENABLED */
11577
11578
    /*
11579
     * Check for an Empty Element.
11580
     */
11581
7.51M
    if ((RAW == '/') && (NXT(1) == '>')) {
11582
3.02M
        SKIP(2);
11583
11584
3.02M
        if (ctxt->sax2) {
11585
1.66M
      if ((ctxt->sax != NULL) &&
11586
1.66M
          (ctxt->sax->endElementNs != NULL) &&
11587
1.66M
          (!ctxt->disableSAX))
11588
1.66M
          ctxt->sax->endElementNs(ctxt->userData, name,
11589
1.66M
                                  prefix, URI);
11590
1.66M
      if (ctxt->nsNr - nsNr > 0)
11591
11.6k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11592
1.66M
#ifdef LIBXML_SAX1_ENABLED
11593
1.66M
        } else {
11594
1.35M
      if ((ctxt->sax != NULL) &&
11595
1.35M
          (ctxt->sax->endElement != NULL) &&
11596
1.35M
          (!ctxt->disableSAX))
11597
1.35M
          ctxt->sax->endElement(ctxt->userData, name);
11598
1.35M
#endif /* LIBXML_SAX1_ENABLED */
11599
1.35M
        }
11600
3.02M
        if (ctxt->instate == XML_PARSER_EOF)
11601
0
      goto done;
11602
3.02M
        spacePop(ctxt);
11603
3.02M
        if (ctxt->nameNr == 0) {
11604
12.1k
      ctxt->instate = XML_PARSER_EPILOG;
11605
3.01M
        } else {
11606
3.01M
      ctxt->instate = XML_PARSER_CONTENT;
11607
3.01M
        }
11608
3.02M
                    ctxt->progressive = 1;
11609
3.02M
        break;
11610
3.02M
    }
11611
4.49M
    if (RAW == '>') {
11612
3.50M
        NEXT;
11613
3.50M
    } else {
11614
988k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11615
988k
           "Couldn't find end of Start Tag %s\n",
11616
988k
           name);
11617
988k
        nodePop(ctxt);
11618
988k
        spacePop(ctxt);
11619
988k
    }
11620
4.49M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11621
11622
4.49M
    ctxt->instate = XML_PARSER_CONTENT;
11623
4.49M
                ctxt->progressive = 1;
11624
4.49M
                break;
11625
7.51M
      }
11626
31.3M
            case XML_PARSER_CONTENT: {
11627
31.3M
    int id;
11628
31.3M
    unsigned long cons;
11629
31.3M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
118k
        goto done;
11631
31.2M
    cur = ctxt->input->cur[0];
11632
31.2M
    next = ctxt->input->cur[1];
11633
11634
31.2M
    id = ctxt->input->id;
11635
31.2M
          cons = CUR_CONSUMED;
11636
31.2M
    if ((cur == '<') && (next == '/')) {
11637
2.44M
        ctxt->instate = XML_PARSER_END_TAG;
11638
2.44M
        break;
11639
28.7M
          } else if ((cur == '<') && (next == '?')) {
11640
223k
        if ((!terminate) &&
11641
223k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11642
113k
                        ctxt->progressive = XML_PARSER_PI;
11643
113k
      goto done;
11644
113k
                    }
11645
109k
        xmlParsePI(ctxt);
11646
109k
        ctxt->instate = XML_PARSER_CONTENT;
11647
109k
                    ctxt->progressive = 1;
11648
28.5M
    } else if ((cur == '<') && (next != '!')) {
11649
7.05M
        ctxt->instate = XML_PARSER_START_TAG;
11650
7.05M
        break;
11651
21.4M
    } else if ((cur == '<') && (next == '!') &&
11652
21.4M
               (ctxt->input->cur[2] == '-') &&
11653
21.4M
         (ctxt->input->cur[3] == '-')) {
11654
413k
        int term;
11655
11656
413k
              if (avail < 4)
11657
0
            goto done;
11658
413k
        ctxt->input->cur += 4;
11659
413k
        term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11660
413k
        ctxt->input->cur -= 4;
11661
413k
        if ((!terminate) && (term < 0)) {
11662
173k
                        ctxt->progressive = XML_PARSER_COMMENT;
11663
173k
      goto done;
11664
173k
                    }
11665
240k
        xmlParseComment(ctxt);
11666
240k
        ctxt->instate = XML_PARSER_CONTENT;
11667
240k
                    ctxt->progressive = 1;
11668
21.0M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11669
21.0M
        (ctxt->input->cur[2] == '[') &&
11670
21.0M
        (ctxt->input->cur[3] == 'C') &&
11671
21.0M
        (ctxt->input->cur[4] == 'D') &&
11672
21.0M
        (ctxt->input->cur[5] == 'A') &&
11673
21.0M
        (ctxt->input->cur[6] == 'T') &&
11674
21.0M
        (ctxt->input->cur[7] == 'A') &&
11675
21.0M
        (ctxt->input->cur[8] == '[')) {
11676
50.7k
        SKIP(9);
11677
50.7k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11678
50.7k
        break;
11679
21.0M
    } else if ((cur == '<') && (next == '!') &&
11680
21.0M
               (avail < 9)) {
11681
21.9k
        goto done;
11682
20.9M
    } else if (cur == '&') {
11683
8.76M
        if ((!terminate) &&
11684
8.76M
            (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11685
253k
      goto done;
11686
8.51M
        xmlParseReference(ctxt);
11687
12.2M
    } else {
11688
        /* TODO Avoid the extra copy, handle directly !!! */
11689
        /*
11690
         * Goal of the following test is:
11691
         *  - minimize calls to the SAX 'character' callback
11692
         *    when they are mergeable
11693
         *  - handle an problem for isBlank when we only parse
11694
         *    a sequence of blank chars and the next one is
11695
         *    not available to check against '<' presence.
11696
         *  - tries to homogenize the differences in SAX
11697
         *    callbacks between the push and pull versions
11698
         *    of the parser.
11699
         */
11700
12.2M
        if ((ctxt->inputNr == 1) &&
11701
12.2M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11702
9.08M
      if (!terminate) {
11703
8.98M
          if (ctxt->progressive) {
11704
8.98M
        if ((lastlt == NULL) ||
11705
8.98M
            (ctxt->input->cur > lastlt))
11706
849k
            goto done;
11707
8.98M
          } else if (xmlParseLookupSequence(ctxt,
11708
0
                                            '<', 0, 0) < 0) {
11709
0
        goto done;
11710
0
          }
11711
8.98M
      }
11712
9.08M
                    }
11713
11.3M
        ctxt->checkIndex = 0;
11714
11.3M
        xmlParseCharData(ctxt, 0);
11715
11.3M
    }
11716
20.2M
    if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11717
173k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11718
173k
                    "detected an error in element content\n");
11719
173k
        xmlHaltParser(ctxt);
11720
173k
        break;
11721
173k
    }
11722
20.0M
    break;
11723
20.2M
      }
11724
20.0M
            case XML_PARSER_END_TAG:
11725
2.54M
    if (avail < 2)
11726
0
        goto done;
11727
2.54M
    if (!terminate) {
11728
2.47M
        if (ctxt->progressive) {
11729
            /* > can be found unescaped in attribute values */
11730
2.47M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11731
102k
          goto done;
11732
2.47M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11733
0
      goto done;
11734
0
        }
11735
2.47M
    }
11736
2.44M
    if (ctxt->sax2) {
11737
1.07M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11738
1.07M
        nameNsPop(ctxt);
11739
1.07M
    }
11740
1.36M
#ifdef LIBXML_SAX1_ENABLED
11741
1.36M
      else
11742
1.36M
        xmlParseEndTag1(ctxt, 0);
11743
2.44M
#endif /* LIBXML_SAX1_ENABLED */
11744
2.44M
    if (ctxt->instate == XML_PARSER_EOF) {
11745
        /* Nothing */
11746
2.44M
    } else if (ctxt->nameNr == 0) {
11747
75.9k
        ctxt->instate = XML_PARSER_EPILOG;
11748
2.36M
    } else {
11749
2.36M
        ctxt->instate = XML_PARSER_CONTENT;
11750
2.36M
    }
11751
2.44M
    break;
11752
697k
            case XML_PARSER_CDATA_SECTION: {
11753
          /*
11754
     * The Push mode need to have the SAX callback for
11755
     * cdataBlock merge back contiguous callbacks.
11756
     */
11757
697k
    int base;
11758
11759
697k
    base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11760
697k
    if (base < 0) {
11761
426k
        if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11762
383k
            int tmp;
11763
11764
383k
      tmp = xmlCheckCdataPush(ctxt->input->cur,
11765
383k
                              XML_PARSER_BIG_BUFFER_SIZE, 0);
11766
383k
      if (tmp < 0) {
11767
6.19k
          tmp = -tmp;
11768
6.19k
          ctxt->input->cur += tmp;
11769
6.19k
          goto encoding_error;
11770
6.19k
      }
11771
377k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11772
377k
          if (ctxt->sax->cdataBlock != NULL)
11773
114k
        ctxt->sax->cdataBlock(ctxt->userData,
11774
114k
                              ctxt->input->cur, tmp);
11775
263k
          else if (ctxt->sax->characters != NULL)
11776
263k
        ctxt->sax->characters(ctxt->userData,
11777
263k
                              ctxt->input->cur, tmp);
11778
377k
      }
11779
377k
      if (ctxt->instate == XML_PARSER_EOF)
11780
0
          goto done;
11781
377k
      SKIPL(tmp);
11782
377k
      ctxt->checkIndex = 0;
11783
377k
        }
11784
419k
        goto done;
11785
426k
    } else {
11786
271k
        int tmp;
11787
11788
271k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11789
271k
        if ((tmp < 0) || (tmp != base)) {
11790
240k
      tmp = -tmp;
11791
240k
      ctxt->input->cur += tmp;
11792
240k
      goto encoding_error;
11793
240k
        }
11794
31.0k
        if ((ctxt->sax != NULL) && (base == 0) &&
11795
31.0k
            (ctxt->sax->cdataBlock != NULL) &&
11796
31.0k
            (!ctxt->disableSAX)) {
11797
      /*
11798
       * Special case to provide identical behaviour
11799
       * between pull and push parsers on enpty CDATA
11800
       * sections
11801
       */
11802
683
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11803
683
           (!strncmp((const char *)&ctxt->input->cur[-9],
11804
683
                     "<![CDATA[", 9)))
11805
683
           ctxt->sax->cdataBlock(ctxt->userData,
11806
683
                                 BAD_CAST "", 0);
11807
30.4k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11808
30.4k
      (!ctxt->disableSAX)) {
11809
29.9k
      if (ctxt->sax->cdataBlock != NULL)
11810
17.0k
          ctxt->sax->cdataBlock(ctxt->userData,
11811
17.0k
              ctxt->input->cur, base);
11812
12.8k
      else if (ctxt->sax->characters != NULL)
11813
12.8k
          ctxt->sax->characters(ctxt->userData,
11814
12.8k
              ctxt->input->cur, base);
11815
29.9k
        }
11816
31.0k
        if (ctxt->instate == XML_PARSER_EOF)
11817
0
      goto done;
11818
31.0k
        SKIPL(base + 3);
11819
31.0k
        ctxt->checkIndex = 0;
11820
31.0k
        ctxt->instate = XML_PARSER_CONTENT;
11821
#ifdef DEBUG_PUSH
11822
        xmlGenericError(xmlGenericErrorContext,
11823
          "PP: entering CONTENT\n");
11824
#endif
11825
31.0k
    }
11826
31.0k
    break;
11827
697k
      }
11828
889k
            case XML_PARSER_MISC:
11829
889k
    SKIP_BLANKS;
11830
889k
    if (ctxt->input->buf == NULL)
11831
0
        avail = ctxt->input->length -
11832
0
                (ctxt->input->cur - ctxt->input->base);
11833
889k
    else
11834
889k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11835
889k
                (ctxt->input->cur - ctxt->input->base);
11836
889k
    if (avail < 2)
11837
9.64k
        goto done;
11838
879k
    cur = ctxt->input->cur[0];
11839
879k
    next = ctxt->input->cur[1];
11840
879k
          if ((cur == '<') && (next == '?')) {
11841
56.1k
        if ((!terminate) &&
11842
56.1k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11843
8.78k
                        ctxt->progressive = XML_PARSER_PI;
11844
8.78k
      goto done;
11845
8.78k
                    }
11846
#ifdef DEBUG_PUSH
11847
        xmlGenericError(xmlGenericErrorContext,
11848
          "PP: Parsing PI\n");
11849
#endif
11850
47.4k
        xmlParsePI(ctxt);
11851
47.4k
        if (ctxt->instate == XML_PARSER_EOF)
11852
0
      goto done;
11853
47.4k
        ctxt->instate = XML_PARSER_MISC;
11854
47.4k
                    ctxt->progressive = 1;
11855
47.4k
        ctxt->checkIndex = 0;
11856
823k
    } else if ((cur == '<') && (next == '!') &&
11857
823k
        (ctxt->input->cur[2] == '-') &&
11858
823k
        (ctxt->input->cur[3] == '-')) {
11859
52.8k
        if ((!terminate) &&
11860
52.8k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11861
31.2k
                        ctxt->progressive = XML_PARSER_COMMENT;
11862
31.2k
      goto done;
11863
31.2k
                    }
11864
#ifdef DEBUG_PUSH
11865
        xmlGenericError(xmlGenericErrorContext,
11866
          "PP: Parsing Comment\n");
11867
#endif
11868
21.6k
        xmlParseComment(ctxt);
11869
21.6k
        if (ctxt->instate == XML_PARSER_EOF)
11870
0
      goto done;
11871
21.6k
        ctxt->instate = XML_PARSER_MISC;
11872
21.6k
                    ctxt->progressive = 1;
11873
21.6k
        ctxt->checkIndex = 0;
11874
770k
    } else if ((cur == '<') && (next == '!') &&
11875
770k
        (ctxt->input->cur[2] == 'D') &&
11876
770k
        (ctxt->input->cur[3] == 'O') &&
11877
770k
        (ctxt->input->cur[4] == 'C') &&
11878
770k
        (ctxt->input->cur[5] == 'T') &&
11879
770k
        (ctxt->input->cur[6] == 'Y') &&
11880
770k
        (ctxt->input->cur[7] == 'P') &&
11881
770k
        (ctxt->input->cur[8] == 'E')) {
11882
355k
        if ((!terminate) &&
11883
355k
            (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11884
45.4k
                        ctxt->progressive = XML_PARSER_DTD;
11885
45.4k
      goto done;
11886
45.4k
                    }
11887
#ifdef DEBUG_PUSH
11888
        xmlGenericError(xmlGenericErrorContext,
11889
          "PP: Parsing internal subset\n");
11890
#endif
11891
310k
        ctxt->inSubset = 1;
11892
310k
                    ctxt->progressive = 0;
11893
310k
        ctxt->checkIndex = 0;
11894
310k
        xmlParseDocTypeDecl(ctxt);
11895
310k
        if (ctxt->instate == XML_PARSER_EOF)
11896
0
      goto done;
11897
310k
        if (RAW == '[') {
11898
236k
      ctxt->instate = XML_PARSER_DTD;
11899
#ifdef DEBUG_PUSH
11900
      xmlGenericError(xmlGenericErrorContext,
11901
        "PP: entering DTD\n");
11902
#endif
11903
236k
        } else {
11904
      /*
11905
       * Create and update the external subset.
11906
       */
11907
74.1k
      ctxt->inSubset = 2;
11908
74.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11909
74.1k
          (ctxt->sax->externalSubset != NULL))
11910
67.8k
          ctxt->sax->externalSubset(ctxt->userData,
11911
67.8k
            ctxt->intSubName, ctxt->extSubSystem,
11912
67.8k
            ctxt->extSubURI);
11913
74.1k
      ctxt->inSubset = 0;
11914
74.1k
      xmlCleanSpecialAttr(ctxt);
11915
74.1k
      ctxt->instate = XML_PARSER_PROLOG;
11916
#ifdef DEBUG_PUSH
11917
      xmlGenericError(xmlGenericErrorContext,
11918
        "PP: entering PROLOG\n");
11919
#endif
11920
74.1k
        }
11921
414k
    } else if ((cur == '<') && (next == '!') &&
11922
414k
               (avail < 9)) {
11923
9.58k
        goto done;
11924
405k
    } else {
11925
405k
        ctxt->instate = XML_PARSER_START_TAG;
11926
405k
        ctxt->progressive = XML_PARSER_START_TAG;
11927
405k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11928
#ifdef DEBUG_PUSH
11929
        xmlGenericError(xmlGenericErrorContext,
11930
          "PP: entering START_TAG\n");
11931
#endif
11932
405k
    }
11933
784k
    break;
11934
784k
            case XML_PARSER_PROLOG:
11935
287k
    SKIP_BLANKS;
11936
287k
    if (ctxt->input->buf == NULL)
11937
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11938
287k
    else
11939
287k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11940
287k
                            (ctxt->input->cur - ctxt->input->base);
11941
287k
    if (avail < 2)
11942
10.7k
        goto done;
11943
277k
    cur = ctxt->input->cur[0];
11944
277k
    next = ctxt->input->cur[1];
11945
277k
          if ((cur == '<') && (next == '?')) {
11946
49.5k
        if ((!terminate) &&
11947
49.5k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11948
16.9k
                        ctxt->progressive = XML_PARSER_PI;
11949
16.9k
      goto done;
11950
16.9k
                    }
11951
#ifdef DEBUG_PUSH
11952
        xmlGenericError(xmlGenericErrorContext,
11953
          "PP: Parsing PI\n");
11954
#endif
11955
32.6k
        xmlParsePI(ctxt);
11956
32.6k
        if (ctxt->instate == XML_PARSER_EOF)
11957
0
      goto done;
11958
32.6k
        ctxt->instate = XML_PARSER_PROLOG;
11959
32.6k
                    ctxt->progressive = 1;
11960
227k
    } else if ((cur == '<') && (next == '!') &&
11961
227k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11962
42.2k
        if ((!terminate) &&
11963
42.2k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11964
9.39k
                        ctxt->progressive = XML_PARSER_COMMENT;
11965
9.39k
      goto done;
11966
9.39k
                    }
11967
#ifdef DEBUG_PUSH
11968
        xmlGenericError(xmlGenericErrorContext,
11969
          "PP: Parsing Comment\n");
11970
#endif
11971
32.8k
        xmlParseComment(ctxt);
11972
32.8k
        if (ctxt->instate == XML_PARSER_EOF)
11973
0
      goto done;
11974
32.8k
        ctxt->instate = XML_PARSER_PROLOG;
11975
32.8k
                    ctxt->progressive = 1;
11976
185k
    } else if ((cur == '<') && (next == '!') &&
11977
185k
               (avail < 4)) {
11978
677
        goto done;
11979
184k
    } else {
11980
184k
        ctxt->instate = XML_PARSER_START_TAG;
11981
184k
        if (ctxt->progressive == 0)
11982
153k
      ctxt->progressive = XML_PARSER_START_TAG;
11983
184k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11984
#ifdef DEBUG_PUSH
11985
        xmlGenericError(xmlGenericErrorContext,
11986
          "PP: entering START_TAG\n");
11987
#endif
11988
184k
    }
11989
250k
    break;
11990
250k
            case XML_PARSER_EPILOG:
11991
101k
    SKIP_BLANKS;
11992
101k
    if (ctxt->input->buf == NULL)
11993
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11994
101k
    else
11995
101k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11996
101k
                            (ctxt->input->cur - ctxt->input->base);
11997
101k
    if (avail < 2)
11998
68.8k
        goto done;
11999
32.4k
    cur = ctxt->input->cur[0];
12000
32.4k
    next = ctxt->input->cur[1];
12001
32.4k
          if ((cur == '<') && (next == '?')) {
12002
10.4k
        if ((!terminate) &&
12003
10.4k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12004
8.35k
                        ctxt->progressive = XML_PARSER_PI;
12005
8.35k
      goto done;
12006
8.35k
                    }
12007
#ifdef DEBUG_PUSH
12008
        xmlGenericError(xmlGenericErrorContext,
12009
          "PP: Parsing PI\n");
12010
#endif
12011
2.14k
        xmlParsePI(ctxt);
12012
2.14k
        if (ctxt->instate == XML_PARSER_EOF)
12013
0
      goto done;
12014
2.14k
        ctxt->instate = XML_PARSER_EPILOG;
12015
2.14k
                    ctxt->progressive = 1;
12016
21.9k
    } else if ((cur == '<') && (next == '!') &&
12017
21.9k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12018
7.73k
        if ((!terminate) &&
12019
7.73k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12020
6.49k
                        ctxt->progressive = XML_PARSER_COMMENT;
12021
6.49k
      goto done;
12022
6.49k
                    }
12023
#ifdef DEBUG_PUSH
12024
        xmlGenericError(xmlGenericErrorContext,
12025
          "PP: Parsing Comment\n");
12026
#endif
12027
1.24k
        xmlParseComment(ctxt);
12028
1.24k
        if (ctxt->instate == XML_PARSER_EOF)
12029
0
      goto done;
12030
1.24k
        ctxt->instate = XML_PARSER_EPILOG;
12031
1.24k
                    ctxt->progressive = 1;
12032
14.2k
    } else if ((cur == '<') && (next == '!') &&
12033
14.2k
               (avail < 4)) {
12034
305
        goto done;
12035
13.9k
    } else {
12036
13.9k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12037
13.9k
        xmlHaltParser(ctxt);
12038
#ifdef DEBUG_PUSH
12039
        xmlGenericError(xmlGenericErrorContext,
12040
          "PP: entering EOF\n");
12041
#endif
12042
13.9k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12043
13.9k
      ctxt->sax->endDocument(ctxt->userData);
12044
13.9k
        goto done;
12045
13.9k
    }
12046
3.38k
    break;
12047
4.48M
            case XML_PARSER_DTD: {
12048
          /*
12049
     * Sorry but progressive parsing of the internal subset
12050
     * is not expected to be supported. We first check that
12051
     * the full content of the internal subset is available and
12052
     * the parsing is launched only at that point.
12053
     * Internal subset ends up with "']' S? '>'" in an unescaped
12054
     * section and not in a ']]>' sequence which are conditional
12055
     * sections (whoever argued to keep that crap in XML deserve
12056
     * a place in hell !).
12057
     */
12058
4.48M
    int base, i;
12059
4.48M
    xmlChar *buf;
12060
4.48M
          xmlChar quote = 0;
12061
4.48M
                size_t use;
12062
12063
4.48M
    base = ctxt->input->cur - ctxt->input->base;
12064
4.48M
    if (base < 0) return(0);
12065
4.48M
    if (ctxt->checkIndex > base)
12066
2.04M
        base = ctxt->checkIndex;
12067
4.48M
    buf = xmlBufContent(ctxt->input->buf->buffer);
12068
4.48M
                use = xmlBufUse(ctxt->input->buf->buffer);
12069
293G
    for (;(unsigned int) base < use; base++) {
12070
293G
        if (quote != 0) {
12071
181G
            if (buf[base] == quote)
12072
13.0G
          quote = 0;
12073
181G
      continue;
12074
181G
        }
12075
111G
        if ((quote == 0) && (buf[base] == '<')) {
12076
3.35G
            int found  = 0;
12077
      /* special handling of comments */
12078
3.35G
            if (((unsigned int) base + 4 < use) &&
12079
3.35G
          (buf[base + 1] == '!') &&
12080
3.35G
          (buf[base + 2] == '-') &&
12081
3.35G
          (buf[base + 3] == '-')) {
12082
8.46G
          for (;(unsigned int) base + 3 < use; base++) {
12083
8.46G
        if ((buf[base] == '-') &&
12084
8.46G
            (buf[base + 1] == '-') &&
12085
8.46G
            (buf[base + 2] == '>')) {
12086
10.3M
            found = 1;
12087
10.3M
            base += 2;
12088
10.3M
            break;
12089
10.3M
        }
12090
8.46G
                }
12091
10.5M
          if (!found) {
12092
#if 0
12093
              fprintf(stderr, "unfinished comment\n");
12094
#endif
12095
209k
              break; /* for */
12096
209k
                }
12097
10.3M
                continue;
12098
10.5M
      }
12099
3.35G
        }
12100
111G
        if (buf[base] == '"') {
12101
13.0G
            quote = '"';
12102
13.0G
      continue;
12103
13.0G
        }
12104
98.4G
        if (buf[base] == '\'') {
12105
30.6M
            quote = '\'';
12106
30.6M
      continue;
12107
30.6M
        }
12108
98.4G
        if (buf[base] == ']') {
12109
#if 0
12110
            fprintf(stderr, "%c%c%c%c: ", buf[base],
12111
              buf[base + 1], buf[base + 2], buf[base + 3]);
12112
#endif
12113
4.89M
            if ((unsigned int) base +1 >= use)
12114
1.02k
          break;
12115
4.89M
      if (buf[base + 1] == ']') {
12116
          /* conditional crap, skip both ']' ! */
12117
3.57M
          base++;
12118
3.57M
          continue;
12119
3.57M
      }
12120
2.29M
            for (i = 1; (unsigned int) base + i < use; i++) {
12121
2.29M
          if (buf[base + i] == '>') {
12122
#if 0
12123
              fprintf(stderr, "found\n");
12124
#endif
12125
184k
              goto found_end_int_subset;
12126
184k
          }
12127
2.10M
          if (!IS_BLANK_CH(buf[base + i])) {
12128
#if 0
12129
              fprintf(stderr, "not found\n");
12130
#endif
12131
1.13M
              goto not_end_of_int_subset;
12132
1.13M
          }
12133
2.10M
      }
12134
#if 0
12135
      fprintf(stderr, "end of stream\n");
12136
#endif
12137
384
            break;
12138
12139
1.32M
        }
12140
98.4G
not_end_of_int_subset:
12141
98.4G
                    continue; /* for */
12142
98.4G
    }
12143
    /*
12144
     * We didn't found the end of the Internal subset
12145
     */
12146
4.29M
                if (quote == 0)
12147
2.07M
                    ctxt->checkIndex = base;
12148
2.22M
                else
12149
2.22M
                    ctxt->checkIndex = 0;
12150
#ifdef DEBUG_PUSH
12151
    if (next == 0)
12152
        xmlGenericError(xmlGenericErrorContext,
12153
          "PP: lookup of int subset end filed\n");
12154
#endif
12155
4.29M
          goto done;
12156
12157
184k
found_end_int_subset:
12158
184k
                ctxt->checkIndex = 0;
12159
184k
    xmlParseInternalSubset(ctxt);
12160
184k
    if (ctxt->instate == XML_PARSER_EOF)
12161
5.46k
        goto done;
12162
179k
    ctxt->inSubset = 2;
12163
179k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12164
179k
        (ctxt->sax->externalSubset != NULL))
12165
150k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12166
150k
          ctxt->extSubSystem, ctxt->extSubURI);
12167
179k
    ctxt->inSubset = 0;
12168
179k
    xmlCleanSpecialAttr(ctxt);
12169
179k
    if (ctxt->instate == XML_PARSER_EOF)
12170
4.70k
        goto done;
12171
174k
    ctxt->instate = XML_PARSER_PROLOG;
12172
174k
    ctxt->checkIndex = 0;
12173
#ifdef DEBUG_PUSH
12174
    xmlGenericError(xmlGenericErrorContext,
12175
      "PP: entering PROLOG\n");
12176
#endif
12177
174k
                break;
12178
179k
      }
12179
0
            case XML_PARSER_COMMENT:
12180
0
    xmlGenericError(xmlGenericErrorContext,
12181
0
      "PP: internal error, state == COMMENT\n");
12182
0
    ctxt->instate = XML_PARSER_CONTENT;
12183
#ifdef DEBUG_PUSH
12184
    xmlGenericError(xmlGenericErrorContext,
12185
      "PP: entering CONTENT\n");
12186
#endif
12187
0
    break;
12188
0
            case XML_PARSER_IGNORE:
12189
0
    xmlGenericError(xmlGenericErrorContext,
12190
0
      "PP: internal error, state == IGNORE");
12191
0
          ctxt->instate = XML_PARSER_DTD;
12192
#ifdef DEBUG_PUSH
12193
    xmlGenericError(xmlGenericErrorContext,
12194
      "PP: entering DTD\n");
12195
#endif
12196
0
          break;
12197
0
            case XML_PARSER_PI:
12198
0
    xmlGenericError(xmlGenericErrorContext,
12199
0
      "PP: internal error, state == PI\n");
12200
0
    ctxt->instate = XML_PARSER_CONTENT;
12201
#ifdef DEBUG_PUSH
12202
    xmlGenericError(xmlGenericErrorContext,
12203
      "PP: entering CONTENT\n");
12204
#endif
12205
0
    break;
12206
0
            case XML_PARSER_ENTITY_DECL:
12207
0
    xmlGenericError(xmlGenericErrorContext,
12208
0
      "PP: internal error, state == ENTITY_DECL\n");
12209
0
    ctxt->instate = XML_PARSER_DTD;
12210
#ifdef DEBUG_PUSH
12211
    xmlGenericError(xmlGenericErrorContext,
12212
      "PP: entering DTD\n");
12213
#endif
12214
0
    break;
12215
0
            case XML_PARSER_ENTITY_VALUE:
12216
0
    xmlGenericError(xmlGenericErrorContext,
12217
0
      "PP: internal error, state == ENTITY_VALUE\n");
12218
0
    ctxt->instate = XML_PARSER_CONTENT;
12219
#ifdef DEBUG_PUSH
12220
    xmlGenericError(xmlGenericErrorContext,
12221
      "PP: entering DTD\n");
12222
#endif
12223
0
    break;
12224
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12225
0
    xmlGenericError(xmlGenericErrorContext,
12226
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12227
0
    ctxt->instate = XML_PARSER_START_TAG;
12228
#ifdef DEBUG_PUSH
12229
    xmlGenericError(xmlGenericErrorContext,
12230
      "PP: entering START_TAG\n");
12231
#endif
12232
0
    break;
12233
0
            case XML_PARSER_SYSTEM_LITERAL:
12234
0
    xmlGenericError(xmlGenericErrorContext,
12235
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12236
0
    ctxt->instate = XML_PARSER_START_TAG;
12237
#ifdef DEBUG_PUSH
12238
    xmlGenericError(xmlGenericErrorContext,
12239
      "PP: entering START_TAG\n");
12240
#endif
12241
0
    break;
12242
0
            case XML_PARSER_PUBLIC_LITERAL:
12243
0
    xmlGenericError(xmlGenericErrorContext,
12244
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12245
0
    ctxt->instate = XML_PARSER_START_TAG;
12246
#ifdef DEBUG_PUSH
12247
    xmlGenericError(xmlGenericErrorContext,
12248
      "PP: entering START_TAG\n");
12249
#endif
12250
0
    break;
12251
50.6M
  }
12252
50.6M
    }
12253
8.19M
done:
12254
#ifdef DEBUG_PUSH
12255
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12256
#endif
12257
8.19M
    return(ret);
12258
246k
encoding_error:
12259
246k
    {
12260
246k
        char buffer[150];
12261
12262
246k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12263
246k
      ctxt->input->cur[0], ctxt->input->cur[1],
12264
246k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12265
246k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12266
246k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12267
246k
         BAD_CAST buffer, NULL);
12268
246k
    }
12269
246k
    return(0);
12270
9.19M
}
12271
12272
/**
12273
 * xmlParseCheckTransition:
12274
 * @ctxt:  an XML parser context
12275
 * @chunk:  a char array
12276
 * @size:  the size in byte of the chunk
12277
 *
12278
 * Check depending on the current parser state if the chunk given must be
12279
 * processed immediately or one need more data to advance on parsing.
12280
 *
12281
 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12282
 */
12283
static int
12284
10.2M
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12285
10.2M
    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12286
0
        return(-1);
12287
10.2M
    if (ctxt->instate == XML_PARSER_START_TAG) {
12288
1.56M
        if (memchr(chunk, '>', size) != NULL)
12289
948k
            return(1);
12290
617k
        return(0);
12291
1.56M
    }
12292
8.72M
    if (ctxt->progressive == XML_PARSER_COMMENT) {
12293
303k
        if (memchr(chunk, '>', size) != NULL)
12294
208k
            return(1);
12295
94.8k
        return(0);
12296
303k
    }
12297
8.42M
    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12298
877k
        if (memchr(chunk, '>', size) != NULL)
12299
639k
            return(1);
12300
238k
        return(0);
12301
877k
    }
12302
7.54M
    if (ctxt->progressive == XML_PARSER_PI) {
12303
177k
        if (memchr(chunk, '>', size) != NULL)
12304
138k
            return(1);
12305
39.2k
        return(0);
12306
177k
    }
12307
7.36M
    if (ctxt->instate == XML_PARSER_END_TAG) {
12308
106k
        if (memchr(chunk, '>', size) != NULL)
12309
90.8k
            return(1);
12310
15.5k
        return(0);
12311
106k
    }
12312
7.26M
    if ((ctxt->progressive == XML_PARSER_DTD) ||
12313
7.26M
        (ctxt->instate == XML_PARSER_DTD)) {
12314
5.10M
        if (memchr(chunk, '>', size) != NULL)
12315
4.22M
            return(1);
12316
876k
        return(0);
12317
5.10M
    }
12318
2.16M
    return(1);
12319
7.26M
}
12320
12321
/**
12322
 * xmlParseChunk:
12323
 * @ctxt:  an XML parser context
12324
 * @chunk:  an char array
12325
 * @size:  the size in byte of the chunk
12326
 * @terminate:  last chunk indicator
12327
 *
12328
 * Parse a Chunk of memory
12329
 *
12330
 * Returns zero if no error, the xmlParserErrors otherwise.
12331
 */
12332
int
12333
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12334
18.3M
              int terminate) {
12335
18.3M
    int end_in_lf = 0;
12336
18.3M
    int remain = 0;
12337
18.3M
    size_t old_avail = 0;
12338
18.3M
    size_t avail = 0;
12339
12340
18.3M
    if (ctxt == NULL)
12341
0
        return(XML_ERR_INTERNAL_ERROR);
12342
18.3M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12343
7.33M
        return(ctxt->errNo);
12344
11.0M
    if (ctxt->instate == XML_PARSER_EOF)
12345
1.63k
        return(-1);
12346
11.0M
    if (ctxt->instate == XML_PARSER_START)
12347
1.28M
        xmlDetectSAX2(ctxt);
12348
11.0M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12349
11.0M
        (chunk[size - 1] == '\r')) {
12350
151k
  end_in_lf = 1;
12351
151k
  size--;
12352
151k
    }
12353
12354
11.0M
xmldecl_done:
12355
12356
11.0M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12357
11.0M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12358
10.7M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12359
10.7M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12360
10.7M
  int res;
12361
12362
10.7M
        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12363
        /*
12364
         * Specific handling if we autodetected an encoding, we should not
12365
         * push more than the first line ... which depend on the encoding
12366
         * And only push the rest once the final encoding was detected
12367
         */
12368
10.7M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12369
10.7M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12370
86.3k
            unsigned int len = 45;
12371
12372
86.3k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12373
86.3k
                               BAD_CAST "UTF-16")) ||
12374
86.3k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12375
30.1k
                               BAD_CAST "UTF16")))
12376
56.2k
                len = 90;
12377
30.1k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12378
30.1k
                                    BAD_CAST "UCS-4")) ||
12379
30.1k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12380
29.7k
                                    BAD_CAST "UCS4")))
12381
405
                len = 180;
12382
12383
86.3k
            if (ctxt->input->buf->rawconsumed < len)
12384
11.5k
                len -= ctxt->input->buf->rawconsumed;
12385
12386
            /*
12387
             * Change size for reading the initial declaration only
12388
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12389
             * will blindly copy extra bytes from memory.
12390
             */
12391
86.3k
            if ((unsigned int) size > len) {
12392
58.7k
                remain = size - len;
12393
58.7k
                size = len;
12394
58.7k
            } else {
12395
27.6k
                remain = 0;
12396
27.6k
            }
12397
86.3k
        }
12398
10.7M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12399
10.7M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12400
10.7M
  if (res < 0) {
12401
4.35k
      ctxt->errNo = XML_PARSER_EOF;
12402
4.35k
      xmlHaltParser(ctxt);
12403
4.35k
      return (XML_PARSER_EOF);
12404
4.35k
  }
12405
#ifdef DEBUG_PUSH
12406
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12407
#endif
12408
12409
10.7M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12410
315k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12411
315k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12412
315k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12413
315k
        (in->raw != NULL)) {
12414
25.2k
    int nbchars;
12415
25.2k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12416
25.2k
    size_t current = ctxt->input->cur - ctxt->input->base;
12417
12418
25.2k
    nbchars = xmlCharEncInput(in, terminate);
12419
25.2k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12420
25.2k
    if (nbchars < 0) {
12421
        /* TODO 2.6.0 */
12422
4.02k
        xmlGenericError(xmlGenericErrorContext,
12423
4.02k
            "xmlParseChunk: encoder error\n");
12424
4.02k
                    xmlHaltParser(ctxt);
12425
4.02k
        return(XML_ERR_INVALID_ENCODING);
12426
4.02k
    }
12427
25.2k
      }
12428
315k
  }
12429
315k
    }
12430
11.0M
    if (remain != 0) {
12431
57.0k
        xmlParseTryOrFinish(ctxt, 0);
12432
11.0M
    } else {
12433
11.0M
        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12434
11.0M
            avail = xmlBufUse(ctxt->input->buf->buffer);
12435
        /*
12436
         * Depending on the current state it may not be such
12437
         * a good idea to try parsing if there is nothing in the chunk
12438
         * which would be worth doing a parser state transition and we
12439
         * need to wait for more data
12440
         */
12441
11.0M
        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12442
11.0M
            (old_avail == 0) || (avail == 0) ||
12443
11.0M
            (xmlParseCheckTransition(ctxt,
12444
10.2M
                       (const char *)&ctxt->input->base[old_avail],
12445
10.2M
                                     avail - old_avail)))
12446
9.13M
            xmlParseTryOrFinish(ctxt, terminate);
12447
11.0M
    }
12448
11.0M
    if (ctxt->instate == XML_PARSER_EOF)
12449
301k
        return(ctxt->errNo);
12450
12451
10.7M
    if ((ctxt->input != NULL) &&
12452
10.7M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12453
10.7M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12454
10.7M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12455
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12456
0
        xmlHaltParser(ctxt);
12457
0
    }
12458
10.7M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12459
230k
        return(ctxt->errNo);
12460
12461
10.5M
    if (remain != 0) {
12462
55.0k
        chunk += size;
12463
55.0k
        size = remain;
12464
55.0k
        remain = 0;
12465
55.0k
        goto xmldecl_done;
12466
55.0k
    }
12467
10.4M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12468
10.4M
        (ctxt->input->buf != NULL)) {
12469
150k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12470
150k
           ctxt->input);
12471
150k
  size_t current = ctxt->input->cur - ctxt->input->base;
12472
12473
150k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12474
12475
150k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12476
150k
            base, current);
12477
150k
    }
12478
10.4M
    if (terminate) {
12479
  /*
12480
   * Check for termination
12481
   */
12482
166k
  int cur_avail = 0;
12483
12484
166k
  if (ctxt->input != NULL) {
12485
166k
      if (ctxt->input->buf == NULL)
12486
0
    cur_avail = ctxt->input->length -
12487
0
          (ctxt->input->cur - ctxt->input->base);
12488
166k
      else
12489
166k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12490
166k
                    (ctxt->input->cur - ctxt->input->base);
12491
166k
  }
12492
12493
166k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12494
166k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12495
105k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12496
105k
  }
12497
166k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12498
828
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12499
828
  }
12500
166k
  if (ctxt->instate != XML_PARSER_EOF) {
12501
166k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12502
166k
    ctxt->sax->endDocument(ctxt->userData);
12503
166k
  }
12504
166k
  ctxt->instate = XML_PARSER_EOF;
12505
166k
    }
12506
10.4M
    if (ctxt->wellFormed == 0)
12507
3.78M
  return((xmlParserErrors) ctxt->errNo);
12508
6.70M
    else
12509
6.70M
        return(0);
12510
10.4M
}
12511
12512
/************************************************************************
12513
 *                  *
12514
 *    I/O front end functions to the parser     *
12515
 *                  *
12516
 ************************************************************************/
12517
12518
/**
12519
 * xmlCreatePushParserCtxt:
12520
 * @sax:  a SAX handler
12521
 * @user_data:  The user data returned on SAX callbacks
12522
 * @chunk:  a pointer to an array of chars
12523
 * @size:  number of chars in the array
12524
 * @filename:  an optional file name or URI
12525
 *
12526
 * Create a parser context for using the XML parser in push mode.
12527
 * If @buffer and @size are non-NULL, the data is used to detect
12528
 * the encoding.  The remaining characters will be parsed so they
12529
 * don't need to be fed in again through xmlParseChunk.
12530
 * To allow content encoding detection, @size should be >= 4
12531
 * The value of @filename is used for fetching external entities
12532
 * and error/warning reports.
12533
 *
12534
 * Returns the new parser context or NULL
12535
 */
12536
12537
xmlParserCtxtPtr
12538
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12539
800k
                        const char *chunk, int size, const char *filename) {
12540
800k
    xmlParserCtxtPtr ctxt;
12541
800k
    xmlParserInputPtr inputStream;
12542
800k
    xmlParserInputBufferPtr buf;
12543
800k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12544
12545
    /*
12546
     * plug some encoding conversion routines
12547
     */
12548
800k
    if ((chunk != NULL) && (size >= 4))
12549
393k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12550
12551
800k
    buf = xmlAllocParserInputBuffer(enc);
12552
800k
    if (buf == NULL) return(NULL);
12553
12554
800k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12555
800k
    if (ctxt == NULL) {
12556
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12557
0
  xmlFreeParserInputBuffer(buf);
12558
0
  return(NULL);
12559
0
    }
12560
800k
    ctxt->dictNames = 1;
12561
800k
    if (filename == NULL) {
12562
399k
  ctxt->directory = NULL;
12563
400k
    } else {
12564
400k
        ctxt->directory = xmlParserGetDirectory(filename);
12565
400k
    }
12566
12567
800k
    inputStream = xmlNewInputStream(ctxt);
12568
800k
    if (inputStream == NULL) {
12569
0
  xmlFreeParserCtxt(ctxt);
12570
0
  xmlFreeParserInputBuffer(buf);
12571
0
  return(NULL);
12572
0
    }
12573
12574
800k
    if (filename == NULL)
12575
399k
  inputStream->filename = NULL;
12576
400k
    else {
12577
400k
  inputStream->filename = (char *)
12578
400k
      xmlCanonicPath((const xmlChar *) filename);
12579
400k
  if (inputStream->filename == NULL) {
12580
0
      xmlFreeParserCtxt(ctxt);
12581
0
      xmlFreeParserInputBuffer(buf);
12582
0
      return(NULL);
12583
0
  }
12584
400k
    }
12585
800k
    inputStream->buf = buf;
12586
800k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12587
800k
    inputPush(ctxt, inputStream);
12588
12589
    /*
12590
     * If the caller didn't provide an initial 'chunk' for determining
12591
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12592
     * that it can be automatically determined later
12593
     */
12594
800k
    if ((size == 0) || (chunk == NULL)) {
12595
406k
  ctxt->charset = XML_CHAR_ENCODING_NONE;
12596
406k
    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12597
393k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12598
393k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12599
12600
393k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12601
12602
393k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12603
#ifdef DEBUG_PUSH
12604
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12605
#endif
12606
393k
    }
12607
12608
800k
    if (enc != XML_CHAR_ENCODING_NONE) {
12609
145k
        xmlSwitchEncoding(ctxt, enc);
12610
145k
    }
12611
12612
800k
    return(ctxt);
12613
800k
}
12614
#endif /* LIBXML_PUSH_ENABLED */
12615
12616
/**
12617
 * xmlHaltParser:
12618
 * @ctxt:  an XML parser context
12619
 *
12620
 * Blocks further parser processing don't override error
12621
 * for internal use
12622
 */
12623
static void
12624
1.59M
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12625
1.59M
    if (ctxt == NULL)
12626
0
        return;
12627
1.59M
    ctxt->instate = XML_PARSER_EOF;
12628
1.59M
    ctxt->disableSAX = 1;
12629
1.59M
    while (ctxt->inputNr > 1)
12630
3.74k
        xmlFreeInputStream(inputPop(ctxt));
12631
1.59M
    if (ctxt->input != NULL) {
12632
        /*
12633
   * in case there was a specific allocation deallocate before
12634
   * overriding base
12635
   */
12636
1.59M
        if (ctxt->input->free != NULL) {
12637
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12638
0
      ctxt->input->free = NULL;
12639
0
  }
12640
1.59M
        if (ctxt->input->buf != NULL) {
12641
1.45M
            xmlFreeParserInputBuffer(ctxt->input->buf);
12642
1.45M
            ctxt->input->buf = NULL;
12643
1.45M
        }
12644
1.59M
  ctxt->input->cur = BAD_CAST"";
12645
1.59M
        ctxt->input->length = 0;
12646
1.59M
  ctxt->input->base = ctxt->input->cur;
12647
1.59M
        ctxt->input->end = ctxt->input->cur;
12648
1.59M
    }
12649
1.59M
}
12650
12651
/**
12652
 * xmlStopParser:
12653
 * @ctxt:  an XML parser context
12654
 *
12655
 * Blocks further parser processing
12656
 */
12657
void
12658
399k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12659
399k
    if (ctxt == NULL)
12660
0
        return;
12661
399k
    xmlHaltParser(ctxt);
12662
399k
    ctxt->errNo = XML_ERR_USER_STOP;
12663
399k
}
12664
12665
/**
12666
 * xmlCreateIOParserCtxt:
12667
 * @sax:  a SAX handler
12668
 * @user_data:  The user data returned on SAX callbacks
12669
 * @ioread:  an I/O read function
12670
 * @ioclose:  an I/O close function
12671
 * @ioctx:  an I/O handler
12672
 * @enc:  the charset encoding if known
12673
 *
12674
 * Create a parser context for using the XML parser with an existing
12675
 * I/O stream
12676
 *
12677
 * Returns the new parser context or NULL
12678
 */
12679
xmlParserCtxtPtr
12680
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12681
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12682
0
  void *ioctx, xmlCharEncoding enc) {
12683
0
    xmlParserCtxtPtr ctxt;
12684
0
    xmlParserInputPtr inputStream;
12685
0
    xmlParserInputBufferPtr buf;
12686
12687
0
    if (ioread == NULL) return(NULL);
12688
12689
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12690
0
    if (buf == NULL) {
12691
0
        if (ioclose != NULL)
12692
0
            ioclose(ioctx);
12693
0
        return (NULL);
12694
0
    }
12695
12696
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12697
0
    if (ctxt == NULL) {
12698
0
  xmlFreeParserInputBuffer(buf);
12699
0
  return(NULL);
12700
0
    }
12701
12702
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12703
0
    if (inputStream == NULL) {
12704
0
  xmlFreeParserCtxt(ctxt);
12705
0
  return(NULL);
12706
0
    }
12707
0
    inputPush(ctxt, inputStream);
12708
12709
0
    return(ctxt);
12710
0
}
12711
12712
#ifdef LIBXML_VALID_ENABLED
12713
/************************************************************************
12714
 *                  *
12715
 *    Front ends when parsing a DTD       *
12716
 *                  *
12717
 ************************************************************************/
12718
12719
/**
12720
 * xmlIOParseDTD:
12721
 * @sax:  the SAX handler block or NULL
12722
 * @input:  an Input Buffer
12723
 * @enc:  the charset encoding if known
12724
 *
12725
 * Load and parse a DTD
12726
 *
12727
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12728
 * @input will be freed by the function in any case.
12729
 */
12730
12731
xmlDtdPtr
12732
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12733
0
        xmlCharEncoding enc) {
12734
0
    xmlDtdPtr ret = NULL;
12735
0
    xmlParserCtxtPtr ctxt;
12736
0
    xmlParserInputPtr pinput = NULL;
12737
0
    xmlChar start[4];
12738
12739
0
    if (input == NULL)
12740
0
  return(NULL);
12741
12742
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12743
0
    if (ctxt == NULL) {
12744
0
        xmlFreeParserInputBuffer(input);
12745
0
  return(NULL);
12746
0
    }
12747
12748
    /* We are loading a DTD */
12749
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12750
12751
0
    xmlDetectSAX2(ctxt);
12752
12753
    /*
12754
     * generate a parser input from the I/O handler
12755
     */
12756
12757
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12758
0
    if (pinput == NULL) {
12759
0
        xmlFreeParserInputBuffer(input);
12760
0
  xmlFreeParserCtxt(ctxt);
12761
0
  return(NULL);
12762
0
    }
12763
12764
    /*
12765
     * plug some encoding conversion routines here.
12766
     */
12767
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12768
0
  xmlFreeParserCtxt(ctxt);
12769
0
  return(NULL);
12770
0
    }
12771
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12772
0
        xmlSwitchEncoding(ctxt, enc);
12773
0
    }
12774
12775
0
    pinput->filename = NULL;
12776
0
    pinput->line = 1;
12777
0
    pinput->col = 1;
12778
0
    pinput->base = ctxt->input->cur;
12779
0
    pinput->cur = ctxt->input->cur;
12780
0
    pinput->free = NULL;
12781
12782
    /*
12783
     * let's parse that entity knowing it's an external subset.
12784
     */
12785
0
    ctxt->inSubset = 2;
12786
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12787
0
    if (ctxt->myDoc == NULL) {
12788
0
  xmlErrMemory(ctxt, "New Doc failed");
12789
0
  return(NULL);
12790
0
    }
12791
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12792
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793
0
                                 BAD_CAST "none", BAD_CAST "none");
12794
12795
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12796
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12797
  /*
12798
   * Get the 4 first bytes and decode the charset
12799
   * if enc != XML_CHAR_ENCODING_NONE
12800
   * plug some encoding conversion routines.
12801
   */
12802
0
  start[0] = RAW;
12803
0
  start[1] = NXT(1);
12804
0
  start[2] = NXT(2);
12805
0
  start[3] = NXT(3);
12806
0
  enc = xmlDetectCharEncoding(start, 4);
12807
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12808
0
      xmlSwitchEncoding(ctxt, enc);
12809
0
  }
12810
0
    }
12811
12812
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12813
12814
0
    if (ctxt->myDoc != NULL) {
12815
0
  if (ctxt->wellFormed) {
12816
0
      ret = ctxt->myDoc->extSubset;
12817
0
      ctxt->myDoc->extSubset = NULL;
12818
0
      if (ret != NULL) {
12819
0
    xmlNodePtr tmp;
12820
12821
0
    ret->doc = NULL;
12822
0
    tmp = ret->children;
12823
0
    while (tmp != NULL) {
12824
0
        tmp->doc = NULL;
12825
0
        tmp = tmp->next;
12826
0
    }
12827
0
      }
12828
0
  } else {
12829
0
      ret = NULL;
12830
0
  }
12831
0
        xmlFreeDoc(ctxt->myDoc);
12832
0
        ctxt->myDoc = NULL;
12833
0
    }
12834
0
    xmlFreeParserCtxt(ctxt);
12835
12836
0
    return(ret);
12837
0
}
12838
12839
/**
12840
 * xmlSAXParseDTD:
12841
 * @sax:  the SAX handler block
12842
 * @ExternalID:  a NAME* containing the External ID of the DTD
12843
 * @SystemID:  a NAME* containing the URL to the DTD
12844
 *
12845
 * DEPRECATED: Don't use.
12846
 *
12847
 * Load and parse an external subset.
12848
 *
12849
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12850
 */
12851
12852
xmlDtdPtr
12853
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12854
0
                          const xmlChar *SystemID) {
12855
0
    xmlDtdPtr ret = NULL;
12856
0
    xmlParserCtxtPtr ctxt;
12857
0
    xmlParserInputPtr input = NULL;
12858
0
    xmlCharEncoding enc;
12859
0
    xmlChar* systemIdCanonic;
12860
12861
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12862
12863
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12864
0
    if (ctxt == NULL) {
12865
0
  return(NULL);
12866
0
    }
12867
12868
    /* We are loading a DTD */
12869
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12870
12871
    /*
12872
     * Canonicalise the system ID
12873
     */
12874
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12875
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12876
0
  xmlFreeParserCtxt(ctxt);
12877
0
  return(NULL);
12878
0
    }
12879
12880
    /*
12881
     * Ask the Entity resolver to load the damn thing
12882
     */
12883
12884
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12885
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12886
0
                                   systemIdCanonic);
12887
0
    if (input == NULL) {
12888
0
  xmlFreeParserCtxt(ctxt);
12889
0
  if (systemIdCanonic != NULL)
12890
0
      xmlFree(systemIdCanonic);
12891
0
  return(NULL);
12892
0
    }
12893
12894
    /*
12895
     * plug some encoding conversion routines here.
12896
     */
12897
0
    if (xmlPushInput(ctxt, input) < 0) {
12898
0
  xmlFreeParserCtxt(ctxt);
12899
0
  if (systemIdCanonic != NULL)
12900
0
      xmlFree(systemIdCanonic);
12901
0
  return(NULL);
12902
0
    }
12903
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12904
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12905
0
  xmlSwitchEncoding(ctxt, enc);
12906
0
    }
12907
12908
0
    if (input->filename == NULL)
12909
0
  input->filename = (char *) systemIdCanonic;
12910
0
    else
12911
0
  xmlFree(systemIdCanonic);
12912
0
    input->line = 1;
12913
0
    input->col = 1;
12914
0
    input->base = ctxt->input->cur;
12915
0
    input->cur = ctxt->input->cur;
12916
0
    input->free = NULL;
12917
12918
    /*
12919
     * let's parse that entity knowing it's an external subset.
12920
     */
12921
0
    ctxt->inSubset = 2;
12922
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12923
0
    if (ctxt->myDoc == NULL) {
12924
0
  xmlErrMemory(ctxt, "New Doc failed");
12925
0
  xmlFreeParserCtxt(ctxt);
12926
0
  return(NULL);
12927
0
    }
12928
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12929
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12930
0
                                 ExternalID, SystemID);
12931
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12932
12933
0
    if (ctxt->myDoc != NULL) {
12934
0
  if (ctxt->wellFormed) {
12935
0
      ret = ctxt->myDoc->extSubset;
12936
0
      ctxt->myDoc->extSubset = NULL;
12937
0
      if (ret != NULL) {
12938
0
    xmlNodePtr tmp;
12939
12940
0
    ret->doc = NULL;
12941
0
    tmp = ret->children;
12942
0
    while (tmp != NULL) {
12943
0
        tmp->doc = NULL;
12944
0
        tmp = tmp->next;
12945
0
    }
12946
0
      }
12947
0
  } else {
12948
0
      ret = NULL;
12949
0
  }
12950
0
        xmlFreeDoc(ctxt->myDoc);
12951
0
        ctxt->myDoc = NULL;
12952
0
    }
12953
0
    xmlFreeParserCtxt(ctxt);
12954
12955
0
    return(ret);
12956
0
}
12957
12958
12959
/**
12960
 * xmlParseDTD:
12961
 * @ExternalID:  a NAME* containing the External ID of the DTD
12962
 * @SystemID:  a NAME* containing the URL to the DTD
12963
 *
12964
 * Load and parse an external subset.
12965
 *
12966
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12967
 */
12968
12969
xmlDtdPtr
12970
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12971
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12972
0
}
12973
#endif /* LIBXML_VALID_ENABLED */
12974
12975
/************************************************************************
12976
 *                  *
12977
 *    Front ends when parsing an Entity     *
12978
 *                  *
12979
 ************************************************************************/
12980
12981
/**
12982
 * xmlParseCtxtExternalEntity:
12983
 * @ctx:  the existing parsing context
12984
 * @URL:  the URL for the entity to load
12985
 * @ID:  the System ID for the entity to load
12986
 * @lst:  the return value for the set of parsed nodes
12987
 *
12988
 * Parse an external general entity within an existing parsing context
12989
 * An external general parsed entity is well-formed if it matches the
12990
 * production labeled extParsedEnt.
12991
 *
12992
 * [78] extParsedEnt ::= TextDecl? content
12993
 *
12994
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12995
 *    the parser error code otherwise
12996
 */
12997
12998
int
12999
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13000
0
                 const xmlChar *ID, xmlNodePtr *lst) {
13001
0
    void *userData;
13002
13003
0
    if (ctx == NULL) return(-1);
13004
    /*
13005
     * If the user provided their own SAX callbacks, then reuse the
13006
     * userData callback field, otherwise the expected setup in a
13007
     * DOM builder is to have userData == ctxt
13008
     */
13009
0
    if (ctx->userData == ctx)
13010
0
        userData = NULL;
13011
0
    else
13012
0
        userData = ctx->userData;
13013
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
13014
0
                                         userData, ctx->depth + 1,
13015
0
                                         URL, ID, lst);
13016
0
}
13017
13018
/**
13019
 * xmlParseExternalEntityPrivate:
13020
 * @doc:  the document the chunk pertains to
13021
 * @oldctxt:  the previous parser context if available
13022
 * @sax:  the SAX handler block (possibly NULL)
13023
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13024
 * @depth:  Used for loop detection, use 0
13025
 * @URL:  the URL for the entity to load
13026
 * @ID:  the System ID for the entity to load
13027
 * @list:  the return value for the set of parsed nodes
13028
 *
13029
 * Private version of xmlParseExternalEntity()
13030
 *
13031
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13032
 *    the parser error code otherwise
13033
 */
13034
13035
static xmlParserErrors
13036
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13037
                xmlSAXHandlerPtr sax,
13038
          void *user_data, int depth, const xmlChar *URL,
13039
4.42M
          const xmlChar *ID, xmlNodePtr *list) {
13040
4.42M
    xmlParserCtxtPtr ctxt;
13041
4.42M
    xmlDocPtr newDoc;
13042
4.42M
    xmlNodePtr newRoot;
13043
4.42M
    xmlParserErrors ret = XML_ERR_OK;
13044
4.42M
    xmlChar start[4];
13045
4.42M
    xmlCharEncoding enc;
13046
13047
4.42M
    if (((depth > 40) &&
13048
4.42M
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13049
4.42M
  (depth > 1024)) {
13050
4.58k
  return(XML_ERR_ENTITY_LOOP);
13051
4.58k
    }
13052
13053
4.41M
    if (list != NULL)
13054
4.33M
        *list = NULL;
13055
4.41M
    if ((URL == NULL) && (ID == NULL))
13056
320
  return(XML_ERR_INTERNAL_ERROR);
13057
4.41M
    if (doc == NULL)
13058
0
  return(XML_ERR_INTERNAL_ERROR);
13059
13060
4.41M
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
13061
4.41M
                                             oldctxt);
13062
4.41M
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13063
4.30M
    xmlDetectSAX2(ctxt);
13064
13065
4.30M
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13066
4.30M
    if (newDoc == NULL) {
13067
0
  xmlFreeParserCtxt(ctxt);
13068
0
  return(XML_ERR_INTERNAL_ERROR);
13069
0
    }
13070
4.30M
    newDoc->properties = XML_DOC_INTERNAL;
13071
4.30M
    if (doc) {
13072
4.30M
        newDoc->intSubset = doc->intSubset;
13073
4.30M
        newDoc->extSubset = doc->extSubset;
13074
4.30M
        if (doc->dict) {
13075
3.01M
            newDoc->dict = doc->dict;
13076
3.01M
            xmlDictReference(newDoc->dict);
13077
3.01M
        }
13078
4.30M
        if (doc->URL != NULL) {
13079
2.20M
            newDoc->URL = xmlStrdup(doc->URL);
13080
2.20M
        }
13081
4.30M
    }
13082
4.30M
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13083
4.30M
    if (newRoot == NULL) {
13084
0
  if (sax != NULL)
13085
0
  xmlFreeParserCtxt(ctxt);
13086
0
  newDoc->intSubset = NULL;
13087
0
  newDoc->extSubset = NULL;
13088
0
        xmlFreeDoc(newDoc);
13089
0
  return(XML_ERR_INTERNAL_ERROR);
13090
0
    }
13091
4.30M
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13092
4.30M
    nodePush(ctxt, newDoc->children);
13093
4.30M
    if (doc == NULL) {
13094
0
        ctxt->myDoc = newDoc;
13095
4.30M
    } else {
13096
4.30M
        ctxt->myDoc = doc;
13097
4.30M
        newRoot->doc = doc;
13098
4.30M
    }
13099
13100
    /*
13101
     * Get the 4 first bytes and decode the charset
13102
     * if enc != XML_CHAR_ENCODING_NONE
13103
     * plug some encoding conversion routines.
13104
     */
13105
4.30M
    GROW;
13106
4.30M
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13107
4.29M
  start[0] = RAW;
13108
4.29M
  start[1] = NXT(1);
13109
4.29M
  start[2] = NXT(2);
13110
4.29M
  start[3] = NXT(3);
13111
4.29M
  enc = xmlDetectCharEncoding(start, 4);
13112
4.29M
  if (enc != XML_CHAR_ENCODING_NONE) {
13113
20.2k
      xmlSwitchEncoding(ctxt, enc);
13114
20.2k
  }
13115
4.29M
    }
13116
13117
    /*
13118
     * Parse a possible text declaration first
13119
     */
13120
4.30M
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13121
18.2k
  xmlParseTextDecl(ctxt);
13122
        /*
13123
         * An XML-1.0 document can't reference an entity not XML-1.0
13124
         */
13125
18.2k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13126
18.2k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13127
289
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13128
289
                           "Version mismatch between document and entity\n");
13129
289
        }
13130
18.2k
    }
13131
13132
4.30M
    ctxt->instate = XML_PARSER_CONTENT;
13133
4.30M
    ctxt->depth = depth;
13134
4.30M
    if (oldctxt != NULL) {
13135
4.30M
  ctxt->_private = oldctxt->_private;
13136
4.30M
  ctxt->loadsubset = oldctxt->loadsubset;
13137
4.30M
  ctxt->validate = oldctxt->validate;
13138
4.30M
  ctxt->valid = oldctxt->valid;
13139
4.30M
  ctxt->replaceEntities = oldctxt->replaceEntities;
13140
4.30M
        if (oldctxt->validate) {
13141
3.90M
            ctxt->vctxt.error = oldctxt->vctxt.error;
13142
3.90M
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
13143
3.90M
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
13144
3.90M
        }
13145
4.30M
  ctxt->external = oldctxt->external;
13146
4.30M
        if (ctxt->dict) xmlDictFree(ctxt->dict);
13147
4.30M
        ctxt->dict = oldctxt->dict;
13148
4.30M
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13149
4.30M
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13150
4.30M
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13151
4.30M
        ctxt->dictNames = oldctxt->dictNames;
13152
4.30M
        ctxt->attsDefault = oldctxt->attsDefault;
13153
4.30M
        ctxt->attsSpecial = oldctxt->attsSpecial;
13154
4.30M
        ctxt->linenumbers = oldctxt->linenumbers;
13155
4.30M
  ctxt->record_info = oldctxt->record_info;
13156
4.30M
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13157
4.30M
  ctxt->node_seq.length = oldctxt->node_seq.length;
13158
4.30M
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13159
4.30M
    } else {
13160
  /*
13161
   * Doing validity checking on chunk without context
13162
   * doesn't make sense
13163
   */
13164
0
  ctxt->_private = NULL;
13165
0
  ctxt->validate = 0;
13166
0
  ctxt->external = 2;
13167
0
  ctxt->loadsubset = 0;
13168
0
    }
13169
13170
4.30M
    xmlParseContent(ctxt);
13171
13172
4.30M
    if ((RAW == '<') && (NXT(1) == '/')) {
13173
571k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13174
3.72M
    } else if (RAW != 0) {
13175
6.92k
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13176
6.92k
    }
13177
4.30M
    if (ctxt->node != newDoc->children) {
13178
3.27M
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13179
3.27M
    }
13180
13181
4.30M
    if (!ctxt->wellFormed) {
13182
4.28M
        if (ctxt->errNo == 0)
13183
0
      ret = XML_ERR_INTERNAL_ERROR;
13184
4.28M
  else
13185
4.28M
      ret = (xmlParserErrors)ctxt->errNo;
13186
4.28M
    } else {
13187
17.9k
  if (list != NULL) {
13188
6.44k
      xmlNodePtr cur;
13189
13190
      /*
13191
       * Return the newly created nodeset after unlinking it from
13192
       * they pseudo parent.
13193
       */
13194
6.44k
      cur = newDoc->children->children;
13195
6.44k
      *list = cur;
13196
12.7k
      while (cur != NULL) {
13197
6.32k
    cur->parent = NULL;
13198
6.32k
    cur = cur->next;
13199
6.32k
      }
13200
6.44k
            newDoc->children->children = NULL;
13201
6.44k
  }
13202
17.9k
  ret = XML_ERR_OK;
13203
17.9k
    }
13204
13205
    /*
13206
     * Record in the parent context the number of entities replacement
13207
     * done when parsing that reference.
13208
     */
13209
4.30M
    if (oldctxt != NULL)
13210
4.29M
        oldctxt->nbentities += ctxt->nbentities;
13211
13212
    /*
13213
     * Also record the size of the entity parsed
13214
     */
13215
4.30M
    if (ctxt->input != NULL && oldctxt != NULL) {
13216
4.29M
  oldctxt->sizeentities += ctxt->input->consumed;
13217
4.29M
  oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13218
4.29M
    }
13219
    /*
13220
     * And record the last error if any
13221
     */
13222
4.30M
    if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13223
4.28M
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13224
13225
4.30M
    if (oldctxt != NULL) {
13226
4.29M
        ctxt->dict = NULL;
13227
4.29M
        ctxt->attsDefault = NULL;
13228
4.29M
        ctxt->attsSpecial = NULL;
13229
4.29M
        oldctxt->validate = ctxt->validate;
13230
4.29M
        oldctxt->valid = ctxt->valid;
13231
4.29M
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13232
4.29M
        oldctxt->node_seq.length = ctxt->node_seq.length;
13233
4.29M
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13234
4.29M
    }
13235
4.30M
    ctxt->node_seq.maximum = 0;
13236
4.30M
    ctxt->node_seq.length = 0;
13237
4.30M
    ctxt->node_seq.buffer = NULL;
13238
4.30M
    xmlFreeParserCtxt(ctxt);
13239
4.30M
    newDoc->intSubset = NULL;
13240
4.30M
    newDoc->extSubset = NULL;
13241
4.30M
    xmlFreeDoc(newDoc);
13242
13243
4.30M
    return(ret);
13244
4.30M
}
13245
13246
#ifdef LIBXML_SAX1_ENABLED
13247
/**
13248
 * xmlParseExternalEntity:
13249
 * @doc:  the document the chunk pertains to
13250
 * @sax:  the SAX handler block (possibly NULL)
13251
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13252
 * @depth:  Used for loop detection, use 0
13253
 * @URL:  the URL for the entity to load
13254
 * @ID:  the System ID for the entity to load
13255
 * @lst:  the return value for the set of parsed nodes
13256
 *
13257
 * Parse an external general entity
13258
 * An external general parsed entity is well-formed if it matches the
13259
 * production labeled extParsedEnt.
13260
 *
13261
 * [78] extParsedEnt ::= TextDecl? content
13262
 *
13263
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13264
 *    the parser error code otherwise
13265
 */
13266
13267
int
13268
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13269
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13270
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13271
0
                           ID, lst));
13272
0
}
13273
13274
/**
13275
 * xmlParseBalancedChunkMemory:
13276
 * @doc:  the document the chunk pertains to (must not be NULL)
13277
 * @sax:  the SAX handler block (possibly NULL)
13278
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13279
 * @depth:  Used for loop detection, use 0
13280
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13281
 * @lst:  the return value for the set of parsed nodes
13282
 *
13283
 * Parse a well-balanced chunk of an XML document
13284
 * called by the parser
13285
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13286
 * the content production in the XML grammar:
13287
 *
13288
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13289
 *
13290
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13291
 *    the parser error code otherwise
13292
 */
13293
13294
int
13295
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13296
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13297
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13298
0
                                                depth, string, lst, 0 );
13299
0
}
13300
#endif /* LIBXML_SAX1_ENABLED */
13301
13302
/**
13303
 * xmlParseBalancedChunkMemoryInternal:
13304
 * @oldctxt:  the existing parsing context
13305
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13306
 * @user_data:  the user data field for the parser context
13307
 * @lst:  the return value for the set of parsed nodes
13308
 *
13309
 *
13310
 * Parse a well-balanced chunk of an XML document
13311
 * called by the parser
13312
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13313
 * the content production in the XML grammar:
13314
 *
13315
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13316
 *
13317
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13318
 * error code otherwise
13319
 *
13320
 * In case recover is set to 1, the nodelist will not be empty even if
13321
 * the parsed chunk is not well balanced.
13322
 */
13323
static xmlParserErrors
13324
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13325
293k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13326
293k
    xmlParserCtxtPtr ctxt;
13327
293k
    xmlDocPtr newDoc = NULL;
13328
293k
    xmlNodePtr newRoot;
13329
293k
    xmlSAXHandlerPtr oldsax = NULL;
13330
293k
    xmlNodePtr content = NULL;
13331
293k
    xmlNodePtr last = NULL;
13332
293k
    int size;
13333
293k
    xmlParserErrors ret = XML_ERR_OK;
13334
293k
#ifdef SAX2
13335
293k
    int i;
13336
293k
#endif
13337
13338
293k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13339
293k
        (oldctxt->depth >  1024)) {
13340
920
  return(XML_ERR_ENTITY_LOOP);
13341
920
    }
13342
13343
13344
292k
    if (lst != NULL)
13345
261k
        *lst = NULL;
13346
292k
    if (string == NULL)
13347
115
        return(XML_ERR_INTERNAL_ERROR);
13348
13349
292k
    size = xmlStrlen(string);
13350
13351
292k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13352
292k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13353
245k
    if (user_data != NULL)
13354
0
  ctxt->userData = user_data;
13355
245k
    else
13356
245k
  ctxt->userData = ctxt;
13357
245k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13358
245k
    ctxt->dict = oldctxt->dict;
13359
245k
    ctxt->input_id = oldctxt->input_id + 1;
13360
245k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13361
245k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13362
245k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13363
13364
245k
#ifdef SAX2
13365
    /* propagate namespaces down the entity */
13366
1.03M
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13367
790k
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13368
790k
    }
13369
245k
#endif
13370
13371
245k
    oldsax = ctxt->sax;
13372
245k
    ctxt->sax = oldctxt->sax;
13373
245k
    xmlDetectSAX2(ctxt);
13374
245k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13375
245k
    ctxt->options = oldctxt->options;
13376
13377
245k
    ctxt->_private = oldctxt->_private;
13378
245k
    if (oldctxt->myDoc == NULL) {
13379
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13380
0
  if (newDoc == NULL) {
13381
0
      ctxt->sax = oldsax;
13382
0
      ctxt->dict = NULL;
13383
0
      xmlFreeParserCtxt(ctxt);
13384
0
      return(XML_ERR_INTERNAL_ERROR);
13385
0
  }
13386
0
  newDoc->properties = XML_DOC_INTERNAL;
13387
0
  newDoc->dict = ctxt->dict;
13388
0
  xmlDictReference(newDoc->dict);
13389
0
  ctxt->myDoc = newDoc;
13390
245k
    } else {
13391
245k
  ctxt->myDoc = oldctxt->myDoc;
13392
245k
        content = ctxt->myDoc->children;
13393
245k
  last = ctxt->myDoc->last;
13394
245k
    }
13395
245k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13396
245k
    if (newRoot == NULL) {
13397
0
  ctxt->sax = oldsax;
13398
0
  ctxt->dict = NULL;
13399
0
  xmlFreeParserCtxt(ctxt);
13400
0
  if (newDoc != NULL) {
13401
0
      xmlFreeDoc(newDoc);
13402
0
  }
13403
0
  return(XML_ERR_INTERNAL_ERROR);
13404
0
    }
13405
245k
    ctxt->myDoc->children = NULL;
13406
245k
    ctxt->myDoc->last = NULL;
13407
245k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13408
245k
    nodePush(ctxt, ctxt->myDoc->children);
13409
245k
    ctxt->instate = XML_PARSER_CONTENT;
13410
245k
    ctxt->depth = oldctxt->depth + 1;
13411
13412
245k
    ctxt->validate = 0;
13413
245k
    ctxt->loadsubset = oldctxt->loadsubset;
13414
245k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13415
  /*
13416
   * ID/IDREF registration will be done in xmlValidateElement below
13417
   */
13418
104k
  ctxt->loadsubset |= XML_SKIP_IDS;
13419
104k
    }
13420
245k
    ctxt->dictNames = oldctxt->dictNames;
13421
245k
    ctxt->attsDefault = oldctxt->attsDefault;
13422
245k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13423
13424
245k
    xmlParseContent(ctxt);
13425
245k
    if ((RAW == '<') && (NXT(1) == '/')) {
13426
5.74k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13427
239k
    } else if (RAW != 0) {
13428
237
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13429
237
    }
13430
245k
    if (ctxt->node != ctxt->myDoc->children) {
13431
85.9k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13432
85.9k
    }
13433
13434
245k
    if (!ctxt->wellFormed) {
13435
199k
        if (ctxt->errNo == 0)
13436
0
      ret = XML_ERR_INTERNAL_ERROR;
13437
199k
  else
13438
199k
      ret = (xmlParserErrors)ctxt->errNo;
13439
199k
    } else {
13440
46.1k
      ret = XML_ERR_OK;
13441
46.1k
    }
13442
13443
245k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13444
39.1k
  xmlNodePtr cur;
13445
13446
  /*
13447
   * Return the newly created nodeset after unlinking it from
13448
   * they pseudo parent.
13449
   */
13450
39.1k
  cur = ctxt->myDoc->children->children;
13451
39.1k
  *lst = cur;
13452
95.6k
  while (cur != NULL) {
13453
56.4k
#ifdef LIBXML_VALID_ENABLED
13454
56.4k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13455
56.4k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13456
56.4k
    (cur->type == XML_ELEMENT_NODE)) {
13457
5.84k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13458
5.84k
      oldctxt->myDoc, cur);
13459
5.84k
      }
13460
56.4k
#endif /* LIBXML_VALID_ENABLED */
13461
56.4k
      cur->parent = NULL;
13462
56.4k
      cur = cur->next;
13463
56.4k
  }
13464
39.1k
  ctxt->myDoc->children->children = NULL;
13465
39.1k
    }
13466
245k
    if (ctxt->myDoc != NULL) {
13467
245k
  xmlFreeNode(ctxt->myDoc->children);
13468
245k
        ctxt->myDoc->children = content;
13469
245k
        ctxt->myDoc->last = last;
13470
245k
    }
13471
13472
    /*
13473
     * Record in the parent context the number of entities replacement
13474
     * done when parsing that reference.
13475
     */
13476
245k
    if (oldctxt != NULL)
13477
245k
        oldctxt->nbentities += ctxt->nbentities;
13478
13479
    /*
13480
     * Also record the last error if any
13481
     */
13482
245k
    if (ctxt->lastError.code != XML_ERR_OK)
13483
200k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13484
13485
245k
    ctxt->sax = oldsax;
13486
245k
    ctxt->dict = NULL;
13487
245k
    ctxt->attsDefault = NULL;
13488
245k
    ctxt->attsSpecial = NULL;
13489
245k
    xmlFreeParserCtxt(ctxt);
13490
245k
    if (newDoc != NULL) {
13491
0
  xmlFreeDoc(newDoc);
13492
0
    }
13493
13494
245k
    return(ret);
13495
245k
}
13496
13497
/**
13498
 * xmlParseInNodeContext:
13499
 * @node:  the context node
13500
 * @data:  the input string
13501
 * @datalen:  the input string length in bytes
13502
 * @options:  a combination of xmlParserOption
13503
 * @lst:  the return value for the set of parsed nodes
13504
 *
13505
 * Parse a well-balanced chunk of an XML document
13506
 * within the context (DTD, namespaces, etc ...) of the given node.
13507
 *
13508
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13509
 * the content production in the XML grammar:
13510
 *
13511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13512
 *
13513
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13514
 * error code otherwise
13515
 */
13516
xmlParserErrors
13517
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13518
0
                      int options, xmlNodePtr *lst) {
13519
0
#ifdef SAX2
13520
0
    xmlParserCtxtPtr ctxt;
13521
0
    xmlDocPtr doc = NULL;
13522
0
    xmlNodePtr fake, cur;
13523
0
    int nsnr = 0;
13524
13525
0
    xmlParserErrors ret = XML_ERR_OK;
13526
13527
    /*
13528
     * check all input parameters, grab the document
13529
     */
13530
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13531
0
        return(XML_ERR_INTERNAL_ERROR);
13532
0
    switch (node->type) {
13533
0
        case XML_ELEMENT_NODE:
13534
0
        case XML_ATTRIBUTE_NODE:
13535
0
        case XML_TEXT_NODE:
13536
0
        case XML_CDATA_SECTION_NODE:
13537
0
        case XML_ENTITY_REF_NODE:
13538
0
        case XML_PI_NODE:
13539
0
        case XML_COMMENT_NODE:
13540
0
        case XML_DOCUMENT_NODE:
13541
0
        case XML_HTML_DOCUMENT_NODE:
13542
0
      break;
13543
0
  default:
13544
0
      return(XML_ERR_INTERNAL_ERROR);
13545
13546
0
    }
13547
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13548
0
           (node->type != XML_DOCUMENT_NODE) &&
13549
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13550
0
  node = node->parent;
13551
0
    if (node == NULL)
13552
0
  return(XML_ERR_INTERNAL_ERROR);
13553
0
    if (node->type == XML_ELEMENT_NODE)
13554
0
  doc = node->doc;
13555
0
    else
13556
0
        doc = (xmlDocPtr) node;
13557
0
    if (doc == NULL)
13558
0
  return(XML_ERR_INTERNAL_ERROR);
13559
13560
    /*
13561
     * allocate a context and set-up everything not related to the
13562
     * node position in the tree
13563
     */
13564
0
    if (doc->type == XML_DOCUMENT_NODE)
13565
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13566
0
#ifdef LIBXML_HTML_ENABLED
13567
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13568
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13569
        /*
13570
         * When parsing in context, it makes no sense to add implied
13571
         * elements like html/body/etc...
13572
         */
13573
0
        options |= HTML_PARSE_NOIMPLIED;
13574
0
    }
13575
0
#endif
13576
0
    else
13577
0
        return(XML_ERR_INTERNAL_ERROR);
13578
13579
0
    if (ctxt == NULL)
13580
0
        return(XML_ERR_NO_MEMORY);
13581
13582
    /*
13583
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13584
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13585
     * we must wait until the last moment to free the original one.
13586
     */
13587
0
    if (doc->dict != NULL) {
13588
0
        if (ctxt->dict != NULL)
13589
0
      xmlDictFree(ctxt->dict);
13590
0
  ctxt->dict = doc->dict;
13591
0
    } else
13592
0
        options |= XML_PARSE_NODICT;
13593
13594
0
    if (doc->encoding != NULL) {
13595
0
        xmlCharEncodingHandlerPtr hdlr;
13596
13597
0
        if (ctxt->encoding != NULL)
13598
0
      xmlFree((xmlChar *) ctxt->encoding);
13599
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13600
13601
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13602
0
        if (hdlr != NULL) {
13603
0
            xmlSwitchToEncoding(ctxt, hdlr);
13604
0
  } else {
13605
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13606
0
        }
13607
0
    }
13608
13609
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13610
0
    xmlDetectSAX2(ctxt);
13611
0
    ctxt->myDoc = doc;
13612
    /* parsing in context, i.e. as within existing content */
13613
0
    ctxt->input_id = 2;
13614
0
    ctxt->instate = XML_PARSER_CONTENT;
13615
13616
0
    fake = xmlNewDocComment(node->doc, NULL);
13617
0
    if (fake == NULL) {
13618
0
        xmlFreeParserCtxt(ctxt);
13619
0
  return(XML_ERR_NO_MEMORY);
13620
0
    }
13621
0
    xmlAddChild(node, fake);
13622
13623
0
    if (node->type == XML_ELEMENT_NODE) {
13624
0
  nodePush(ctxt, node);
13625
  /*
13626
   * initialize the SAX2 namespaces stack
13627
   */
13628
0
  cur = node;
13629
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13630
0
      xmlNsPtr ns = cur->nsDef;
13631
0
      const xmlChar *iprefix, *ihref;
13632
13633
0
      while (ns != NULL) {
13634
0
    if (ctxt->dict) {
13635
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13636
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13637
0
    } else {
13638
0
        iprefix = ns->prefix;
13639
0
        ihref = ns->href;
13640
0
    }
13641
13642
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13643
0
        nsPush(ctxt, iprefix, ihref);
13644
0
        nsnr++;
13645
0
    }
13646
0
    ns = ns->next;
13647
0
      }
13648
0
      cur = cur->parent;
13649
0
  }
13650
0
    }
13651
13652
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13653
  /*
13654
   * ID/IDREF registration will be done in xmlValidateElement below
13655
   */
13656
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13657
0
    }
13658
13659
0
#ifdef LIBXML_HTML_ENABLED
13660
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13661
0
        __htmlParseContent(ctxt);
13662
0
    else
13663
0
#endif
13664
0
  xmlParseContent(ctxt);
13665
13666
0
    nsPop(ctxt, nsnr);
13667
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13668
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13669
0
    } else if (RAW != 0) {
13670
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13671
0
    }
13672
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13673
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13674
0
  ctxt->wellFormed = 0;
13675
0
    }
13676
13677
0
    if (!ctxt->wellFormed) {
13678
0
        if (ctxt->errNo == 0)
13679
0
      ret = XML_ERR_INTERNAL_ERROR;
13680
0
  else
13681
0
      ret = (xmlParserErrors)ctxt->errNo;
13682
0
    } else {
13683
0
        ret = XML_ERR_OK;
13684
0
    }
13685
13686
    /*
13687
     * Return the newly created nodeset after unlinking it from
13688
     * the pseudo sibling.
13689
     */
13690
13691
0
    cur = fake->next;
13692
0
    fake->next = NULL;
13693
0
    node->last = fake;
13694
13695
0
    if (cur != NULL) {
13696
0
  cur->prev = NULL;
13697
0
    }
13698
13699
0
    *lst = cur;
13700
13701
0
    while (cur != NULL) {
13702
0
  cur->parent = NULL;
13703
0
  cur = cur->next;
13704
0
    }
13705
13706
0
    xmlUnlinkNode(fake);
13707
0
    xmlFreeNode(fake);
13708
13709
13710
0
    if (ret != XML_ERR_OK) {
13711
0
        xmlFreeNodeList(*lst);
13712
0
  *lst = NULL;
13713
0
    }
13714
13715
0
    if (doc->dict != NULL)
13716
0
        ctxt->dict = NULL;
13717
0
    xmlFreeParserCtxt(ctxt);
13718
13719
0
    return(ret);
13720
#else /* !SAX2 */
13721
    return(XML_ERR_INTERNAL_ERROR);
13722
#endif
13723
0
}
13724
13725
#ifdef LIBXML_SAX1_ENABLED
13726
/**
13727
 * xmlParseBalancedChunkMemoryRecover:
13728
 * @doc:  the document the chunk pertains to (must not be NULL)
13729
 * @sax:  the SAX handler block (possibly NULL)
13730
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13731
 * @depth:  Used for loop detection, use 0
13732
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13733
 * @lst:  the return value for the set of parsed nodes
13734
 * @recover: return nodes even if the data is broken (use 0)
13735
 *
13736
 *
13737
 * Parse a well-balanced chunk of an XML document
13738
 * called by the parser
13739
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13740
 * the content production in the XML grammar:
13741
 *
13742
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13743
 *
13744
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13745
 *    the parser error code otherwise
13746
 *
13747
 * In case recover is set to 1, the nodelist will not be empty even if
13748
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13749
 * some extent.
13750
 */
13751
int
13752
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13753
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13754
0
     int recover) {
13755
0
    xmlParserCtxtPtr ctxt;
13756
0
    xmlDocPtr newDoc;
13757
0
    xmlSAXHandlerPtr oldsax = NULL;
13758
0
    xmlNodePtr content, newRoot;
13759
0
    int size;
13760
0
    int ret = 0;
13761
13762
0
    if (depth > 40) {
13763
0
  return(XML_ERR_ENTITY_LOOP);
13764
0
    }
13765
13766
13767
0
    if (lst != NULL)
13768
0
        *lst = NULL;
13769
0
    if (string == NULL)
13770
0
        return(-1);
13771
13772
0
    size = xmlStrlen(string);
13773
13774
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13775
0
    if (ctxt == NULL) return(-1);
13776
0
    ctxt->userData = ctxt;
13777
0
    if (sax != NULL) {
13778
0
  oldsax = ctxt->sax;
13779
0
        ctxt->sax = sax;
13780
0
  if (user_data != NULL)
13781
0
      ctxt->userData = user_data;
13782
0
    }
13783
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13784
0
    if (newDoc == NULL) {
13785
0
  xmlFreeParserCtxt(ctxt);
13786
0
  return(-1);
13787
0
    }
13788
0
    newDoc->properties = XML_DOC_INTERNAL;
13789
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13790
0
        xmlDictFree(ctxt->dict);
13791
0
  ctxt->dict = doc->dict;
13792
0
  xmlDictReference(ctxt->dict);
13793
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13794
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13795
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13796
0
  ctxt->dictNames = 1;
13797
0
    } else {
13798
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13799
0
    }
13800
    /* doc == NULL is only supported for historic reasons */
13801
0
    if (doc != NULL) {
13802
0
  newDoc->intSubset = doc->intSubset;
13803
0
  newDoc->extSubset = doc->extSubset;
13804
0
    }
13805
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13806
0
    if (newRoot == NULL) {
13807
0
  if (sax != NULL)
13808
0
      ctxt->sax = oldsax;
13809
0
  xmlFreeParserCtxt(ctxt);
13810
0
  newDoc->intSubset = NULL;
13811
0
  newDoc->extSubset = NULL;
13812
0
        xmlFreeDoc(newDoc);
13813
0
  return(-1);
13814
0
    }
13815
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13816
0
    nodePush(ctxt, newRoot);
13817
    /* doc == NULL is only supported for historic reasons */
13818
0
    if (doc == NULL) {
13819
0
  ctxt->myDoc = newDoc;
13820
0
    } else {
13821
0
  ctxt->myDoc = newDoc;
13822
0
  newDoc->children->doc = doc;
13823
  /* Ensure that doc has XML spec namespace */
13824
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13825
0
  newDoc->oldNs = doc->oldNs;
13826
0
    }
13827
0
    ctxt->instate = XML_PARSER_CONTENT;
13828
0
    ctxt->input_id = 2;
13829
0
    ctxt->depth = depth;
13830
13831
    /*
13832
     * Doing validity checking on chunk doesn't make sense
13833
     */
13834
0
    ctxt->validate = 0;
13835
0
    ctxt->loadsubset = 0;
13836
0
    xmlDetectSAX2(ctxt);
13837
13838
0
    if ( doc != NULL ){
13839
0
        content = doc->children;
13840
0
        doc->children = NULL;
13841
0
        xmlParseContent(ctxt);
13842
0
        doc->children = content;
13843
0
    }
13844
0
    else {
13845
0
        xmlParseContent(ctxt);
13846
0
    }
13847
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13848
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13849
0
    } else if (RAW != 0) {
13850
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13851
0
    }
13852
0
    if (ctxt->node != newDoc->children) {
13853
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13854
0
    }
13855
13856
0
    if (!ctxt->wellFormed) {
13857
0
        if (ctxt->errNo == 0)
13858
0
      ret = 1;
13859
0
  else
13860
0
      ret = ctxt->errNo;
13861
0
    } else {
13862
0
      ret = 0;
13863
0
    }
13864
13865
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13866
0
  xmlNodePtr cur;
13867
13868
  /*
13869
   * Return the newly created nodeset after unlinking it from
13870
   * they pseudo parent.
13871
   */
13872
0
  cur = newDoc->children->children;
13873
0
  *lst = cur;
13874
0
  while (cur != NULL) {
13875
0
      xmlSetTreeDoc(cur, doc);
13876
0
      cur->parent = NULL;
13877
0
      cur = cur->next;
13878
0
  }
13879
0
  newDoc->children->children = NULL;
13880
0
    }
13881
13882
0
    if (sax != NULL)
13883
0
  ctxt->sax = oldsax;
13884
0
    xmlFreeParserCtxt(ctxt);
13885
0
    newDoc->intSubset = NULL;
13886
0
    newDoc->extSubset = NULL;
13887
    /* This leaks the namespace list if doc == NULL */
13888
0
    newDoc->oldNs = NULL;
13889
0
    xmlFreeDoc(newDoc);
13890
13891
0
    return(ret);
13892
0
}
13893
13894
/**
13895
 * xmlSAXParseEntity:
13896
 * @sax:  the SAX handler block
13897
 * @filename:  the filename
13898
 *
13899
 * DEPRECATED: Don't use.
13900
 *
13901
 * parse an XML external entity out of context and build a tree.
13902
 * It use the given SAX function block to handle the parsing callback.
13903
 * If sax is NULL, fallback to the default DOM tree building routines.
13904
 *
13905
 * [78] extParsedEnt ::= TextDecl? content
13906
 *
13907
 * This correspond to a "Well Balanced" chunk
13908
 *
13909
 * Returns the resulting document tree
13910
 */
13911
13912
xmlDocPtr
13913
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13914
0
    xmlDocPtr ret;
13915
0
    xmlParserCtxtPtr ctxt;
13916
13917
0
    ctxt = xmlCreateFileParserCtxt(filename);
13918
0
    if (ctxt == NULL) {
13919
0
  return(NULL);
13920
0
    }
13921
0
    if (sax != NULL) {
13922
0
  if (ctxt->sax != NULL)
13923
0
      xmlFree(ctxt->sax);
13924
0
        ctxt->sax = sax;
13925
0
        ctxt->userData = NULL;
13926
0
    }
13927
13928
0
    xmlParseExtParsedEnt(ctxt);
13929
13930
0
    if (ctxt->wellFormed)
13931
0
  ret = ctxt->myDoc;
13932
0
    else {
13933
0
        ret = NULL;
13934
0
        xmlFreeDoc(ctxt->myDoc);
13935
0
        ctxt->myDoc = NULL;
13936
0
    }
13937
0
    if (sax != NULL)
13938
0
        ctxt->sax = NULL;
13939
0
    xmlFreeParserCtxt(ctxt);
13940
13941
0
    return(ret);
13942
0
}
13943
13944
/**
13945
 * xmlParseEntity:
13946
 * @filename:  the filename
13947
 *
13948
 * parse an XML external entity out of context and build a tree.
13949
 *
13950
 * [78] extParsedEnt ::= TextDecl? content
13951
 *
13952
 * This correspond to a "Well Balanced" chunk
13953
 *
13954
 * Returns the resulting document tree
13955
 */
13956
13957
xmlDocPtr
13958
0
xmlParseEntity(const char *filename) {
13959
0
    return(xmlSAXParseEntity(NULL, filename));
13960
0
}
13961
#endif /* LIBXML_SAX1_ENABLED */
13962
13963
/**
13964
 * xmlCreateEntityParserCtxtInternal:
13965
 * @URL:  the entity URL
13966
 * @ID:  the entity PUBLIC ID
13967
 * @base:  a possible base for the target URI
13968
 * @pctx:  parser context used to set options on new context
13969
 *
13970
 * Create a parser context for an external entity
13971
 * Automatic support for ZLIB/Compress compressed document is provided
13972
 * by default if found at compile-time.
13973
 *
13974
 * Returns the new parser context or NULL
13975
 */
13976
static xmlParserCtxtPtr
13977
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13978
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13979
4.41M
        xmlParserCtxtPtr pctx) {
13980
4.41M
    xmlParserCtxtPtr ctxt;
13981
4.41M
    xmlParserInputPtr inputStream;
13982
4.41M
    char *directory = NULL;
13983
4.41M
    xmlChar *uri;
13984
13985
4.41M
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13986
4.41M
    if (ctxt == NULL) {
13987
0
  return(NULL);
13988
0
    }
13989
13990
4.41M
    if (pctx != NULL) {
13991
4.41M
        ctxt->options = pctx->options;
13992
4.41M
        ctxt->_private = pctx->_private;
13993
  /*
13994
   * this is a subparser of pctx, so the input_id should be
13995
   * incremented to distinguish from main entity
13996
   */
13997
4.41M
  ctxt->input_id = pctx->input_id + 1;
13998
4.41M
    }
13999
14000
    /* Don't read from stdin. */
14001
4.41M
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
14002
0
        URL = BAD_CAST "./-";
14003
14004
4.41M
    uri = xmlBuildURI(URL, base);
14005
14006
4.41M
    if (uri == NULL) {
14007
77.2k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14008
77.2k
  if (inputStream == NULL) {
14009
77.0k
      xmlFreeParserCtxt(ctxt);
14010
77.0k
      return(NULL);
14011
77.0k
  }
14012
14013
211
  inputPush(ctxt, inputStream);
14014
14015
211
  if ((ctxt->directory == NULL) && (directory == NULL))
14016
211
      directory = xmlParserGetDirectory((char *)URL);
14017
211
  if ((ctxt->directory == NULL) && (directory != NULL))
14018
211
      ctxt->directory = directory;
14019
4.33M
    } else {
14020
4.33M
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14021
4.33M
  if (inputStream == NULL) {
14022
39.1k
      xmlFree(uri);
14023
39.1k
      xmlFreeParserCtxt(ctxt);
14024
39.1k
      return(NULL);
14025
39.1k
  }
14026
14027
4.29M
  inputPush(ctxt, inputStream);
14028
14029
4.29M
  if ((ctxt->directory == NULL) && (directory == NULL))
14030
4.29M
      directory = xmlParserGetDirectory((char *)uri);
14031
4.29M
  if ((ctxt->directory == NULL) && (directory != NULL))
14032
4.29M
      ctxt->directory = directory;
14033
4.29M
  xmlFree(uri);
14034
4.29M
    }
14035
4.30M
    return(ctxt);
14036
4.41M
}
14037
14038
/**
14039
 * xmlCreateEntityParserCtxt:
14040
 * @URL:  the entity URL
14041
 * @ID:  the entity PUBLIC ID
14042
 * @base:  a possible base for the target URI
14043
 *
14044
 * Create a parser context for an external entity
14045
 * Automatic support for ZLIB/Compress compressed document is provided
14046
 * by default if found at compile-time.
14047
 *
14048
 * Returns the new parser context or NULL
14049
 */
14050
xmlParserCtxtPtr
14051
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14052
0
                    const xmlChar *base) {
14053
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
14054
14055
0
}
14056
14057
/************************************************************************
14058
 *                  *
14059
 *    Front ends when parsing from a file     *
14060
 *                  *
14061
 ************************************************************************/
14062
14063
/**
14064
 * xmlCreateURLParserCtxt:
14065
 * @filename:  the filename or URL
14066
 * @options:  a combination of xmlParserOption
14067
 *
14068
 * Create a parser context for a file or URL content.
14069
 * Automatic support for ZLIB/Compress compressed document is provided
14070
 * by default if found at compile-time and for file accesses
14071
 *
14072
 * Returns the new parser context or NULL
14073
 */
14074
xmlParserCtxtPtr
14075
xmlCreateURLParserCtxt(const char *filename, int options)
14076
0
{
14077
0
    xmlParserCtxtPtr ctxt;
14078
0
    xmlParserInputPtr inputStream;
14079
0
    char *directory = NULL;
14080
14081
0
    ctxt = xmlNewParserCtxt();
14082
0
    if (ctxt == NULL) {
14083
0
  xmlErrMemory(NULL, "cannot allocate parser context");
14084
0
  return(NULL);
14085
0
    }
14086
14087
0
    if (options)
14088
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14089
0
    ctxt->linenumbers = 1;
14090
14091
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14092
0
    if (inputStream == NULL) {
14093
0
  xmlFreeParserCtxt(ctxt);
14094
0
  return(NULL);
14095
0
    }
14096
14097
0
    inputPush(ctxt, inputStream);
14098
0
    if ((ctxt->directory == NULL) && (directory == NULL))
14099
0
        directory = xmlParserGetDirectory(filename);
14100
0
    if ((ctxt->directory == NULL) && (directory != NULL))
14101
0
        ctxt->directory = directory;
14102
14103
0
    return(ctxt);
14104
0
}
14105
14106
/**
14107
 * xmlCreateFileParserCtxt:
14108
 * @filename:  the filename
14109
 *
14110
 * Create a parser context for a file content.
14111
 * Automatic support for ZLIB/Compress compressed document is provided
14112
 * by default if found at compile-time.
14113
 *
14114
 * Returns the new parser context or NULL
14115
 */
14116
xmlParserCtxtPtr
14117
xmlCreateFileParserCtxt(const char *filename)
14118
0
{
14119
0
    return(xmlCreateURLParserCtxt(filename, 0));
14120
0
}
14121
14122
#ifdef LIBXML_SAX1_ENABLED
14123
/**
14124
 * xmlSAXParseFileWithData:
14125
 * @sax:  the SAX handler block
14126
 * @filename:  the filename
14127
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14128
 *             documents
14129
 * @data:  the userdata
14130
 *
14131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14132
 *
14133
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14134
 * compressed document is provided by default if found at compile-time.
14135
 * It use the given SAX function block to handle the parsing callback.
14136
 * If sax is NULL, fallback to the default DOM tree building routines.
14137
 *
14138
 * User data (void *) is stored within the parser context in the
14139
 * context's _private member, so it is available nearly everywhere in libxml
14140
 *
14141
 * Returns the resulting document tree
14142
 */
14143
14144
xmlDocPtr
14145
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14146
0
                        int recovery, void *data) {
14147
0
    xmlDocPtr ret;
14148
0
    xmlParserCtxtPtr ctxt;
14149
14150
0
    xmlInitParser();
14151
14152
0
    ctxt = xmlCreateFileParserCtxt(filename);
14153
0
    if (ctxt == NULL) {
14154
0
  return(NULL);
14155
0
    }
14156
0
    if (sax != NULL) {
14157
0
  if (ctxt->sax != NULL)
14158
0
      xmlFree(ctxt->sax);
14159
0
        ctxt->sax = sax;
14160
0
    }
14161
0
    xmlDetectSAX2(ctxt);
14162
0
    if (data!=NULL) {
14163
0
  ctxt->_private = data;
14164
0
    }
14165
14166
0
    if (ctxt->directory == NULL)
14167
0
        ctxt->directory = xmlParserGetDirectory(filename);
14168
14169
0
    ctxt->recovery = recovery;
14170
14171
0
    xmlParseDocument(ctxt);
14172
14173
0
    if ((ctxt->wellFormed) || recovery) {
14174
0
        ret = ctxt->myDoc;
14175
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14176
0
      if (ctxt->input->buf->compressed > 0)
14177
0
    ret->compression = 9;
14178
0
      else
14179
0
    ret->compression = ctxt->input->buf->compressed;
14180
0
  }
14181
0
    }
14182
0
    else {
14183
0
       ret = NULL;
14184
0
       xmlFreeDoc(ctxt->myDoc);
14185
0
       ctxt->myDoc = NULL;
14186
0
    }
14187
0
    if (sax != NULL)
14188
0
        ctxt->sax = NULL;
14189
0
    xmlFreeParserCtxt(ctxt);
14190
14191
0
    return(ret);
14192
0
}
14193
14194
/**
14195
 * xmlSAXParseFile:
14196
 * @sax:  the SAX handler block
14197
 * @filename:  the filename
14198
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14199
 *             documents
14200
 *
14201
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14202
 *
14203
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14204
 * compressed document is provided by default if found at compile-time.
14205
 * It use the given SAX function block to handle the parsing callback.
14206
 * If sax is NULL, fallback to the default DOM tree building routines.
14207
 *
14208
 * Returns the resulting document tree
14209
 */
14210
14211
xmlDocPtr
14212
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14213
0
                          int recovery) {
14214
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14215
0
}
14216
14217
/**
14218
 * xmlRecoverDoc:
14219
 * @cur:  a pointer to an array of xmlChar
14220
 *
14221
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14222
 *
14223
 * parse an XML in-memory document and build a tree.
14224
 * In the case the document is not Well Formed, a attempt to build a
14225
 * tree is tried anyway
14226
 *
14227
 * Returns the resulting document tree or NULL in case of failure
14228
 */
14229
14230
xmlDocPtr
14231
0
xmlRecoverDoc(const xmlChar *cur) {
14232
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14233
0
}
14234
14235
/**
14236
 * xmlParseFile:
14237
 * @filename:  the filename
14238
 *
14239
 * DEPRECATED: Use xmlReadFile.
14240
 *
14241
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14242
 * compressed document is provided by default if found at compile-time.
14243
 *
14244
 * Returns the resulting document tree if the file was wellformed,
14245
 * NULL otherwise.
14246
 */
14247
14248
xmlDocPtr
14249
0
xmlParseFile(const char *filename) {
14250
0
    return(xmlSAXParseFile(NULL, filename, 0));
14251
0
}
14252
14253
/**
14254
 * xmlRecoverFile:
14255
 * @filename:  the filename
14256
 *
14257
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14258
 *
14259
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14260
 * compressed document is provided by default if found at compile-time.
14261
 * In the case the document is not Well Formed, it attempts to build
14262
 * a tree anyway
14263
 *
14264
 * Returns the resulting document tree or NULL in case of failure
14265
 */
14266
14267
xmlDocPtr
14268
0
xmlRecoverFile(const char *filename) {
14269
0
    return(xmlSAXParseFile(NULL, filename, 1));
14270
0
}
14271
14272
14273
/**
14274
 * xmlSetupParserForBuffer:
14275
 * @ctxt:  an XML parser context
14276
 * @buffer:  a xmlChar * buffer
14277
 * @filename:  a file name
14278
 *
14279
 * DEPRECATED: Don't use.
14280
 *
14281
 * Setup the parser context to parse a new buffer; Clears any prior
14282
 * contents from the parser context. The buffer parameter must not be
14283
 * NULL, but the filename parameter can be
14284
 */
14285
void
14286
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14287
                             const char* filename)
14288
0
{
14289
0
    xmlParserInputPtr input;
14290
14291
0
    if ((ctxt == NULL) || (buffer == NULL))
14292
0
        return;
14293
14294
0
    input = xmlNewInputStream(ctxt);
14295
0
    if (input == NULL) {
14296
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14297
0
        xmlClearParserCtxt(ctxt);
14298
0
        return;
14299
0
    }
14300
14301
0
    xmlClearParserCtxt(ctxt);
14302
0
    if (filename != NULL)
14303
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14304
0
    input->base = buffer;
14305
0
    input->cur = buffer;
14306
0
    input->end = &buffer[xmlStrlen(buffer)];
14307
0
    inputPush(ctxt, input);
14308
0
}
14309
14310
/**
14311
 * xmlSAXUserParseFile:
14312
 * @sax:  a SAX handler
14313
 * @user_data:  The user data returned on SAX callbacks
14314
 * @filename:  a file name
14315
 *
14316
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14317
 *
14318
 * parse an XML file and call the given SAX handler routines.
14319
 * Automatic support for ZLIB/Compress compressed document is provided
14320
 *
14321
 * Returns 0 in case of success or a error number otherwise
14322
 */
14323
int
14324
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14325
0
                    const char *filename) {
14326
0
    int ret = 0;
14327
0
    xmlParserCtxtPtr ctxt;
14328
14329
0
    ctxt = xmlCreateFileParserCtxt(filename);
14330
0
    if (ctxt == NULL) return -1;
14331
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14332
0
  xmlFree(ctxt->sax);
14333
0
    ctxt->sax = sax;
14334
0
    xmlDetectSAX2(ctxt);
14335
14336
0
    if (user_data != NULL)
14337
0
  ctxt->userData = user_data;
14338
14339
0
    xmlParseDocument(ctxt);
14340
14341
0
    if (ctxt->wellFormed)
14342
0
  ret = 0;
14343
0
    else {
14344
0
        if (ctxt->errNo != 0)
14345
0
      ret = ctxt->errNo;
14346
0
  else
14347
0
      ret = -1;
14348
0
    }
14349
0
    if (sax != NULL)
14350
0
  ctxt->sax = NULL;
14351
0
    if (ctxt->myDoc != NULL) {
14352
0
        xmlFreeDoc(ctxt->myDoc);
14353
0
  ctxt->myDoc = NULL;
14354
0
    }
14355
0
    xmlFreeParserCtxt(ctxt);
14356
14357
0
    return ret;
14358
0
}
14359
#endif /* LIBXML_SAX1_ENABLED */
14360
14361
/************************************************************************
14362
 *                  *
14363
 *    Front ends when parsing from memory     *
14364
 *                  *
14365
 ************************************************************************/
14366
14367
/**
14368
 * xmlCreateMemoryParserCtxt:
14369
 * @buffer:  a pointer to a char array
14370
 * @size:  the size of the array
14371
 *
14372
 * Create a parser context for an XML in-memory document.
14373
 *
14374
 * Returns the new parser context or NULL
14375
 */
14376
xmlParserCtxtPtr
14377
699k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14378
699k
    xmlParserCtxtPtr ctxt;
14379
699k
    xmlParserInputPtr input;
14380
699k
    xmlParserInputBufferPtr buf;
14381
14382
699k
    if (buffer == NULL)
14383
0
  return(NULL);
14384
699k
    if (size <= 0)
14385
47.4k
  return(NULL);
14386
14387
651k
    ctxt = xmlNewParserCtxt();
14388
651k
    if (ctxt == NULL)
14389
0
  return(NULL);
14390
14391
    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14392
651k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14393
651k
    if (buf == NULL) {
14394
0
  xmlFreeParserCtxt(ctxt);
14395
0
  return(NULL);
14396
0
    }
14397
14398
651k
    input = xmlNewInputStream(ctxt);
14399
651k
    if (input == NULL) {
14400
0
  xmlFreeParserInputBuffer(buf);
14401
0
  xmlFreeParserCtxt(ctxt);
14402
0
  return(NULL);
14403
0
    }
14404
14405
651k
    input->filename = NULL;
14406
651k
    input->buf = buf;
14407
651k
    xmlBufResetInput(input->buf->buffer, input);
14408
14409
651k
    inputPush(ctxt, input);
14410
651k
    return(ctxt);
14411
651k
}
14412
14413
#ifdef LIBXML_SAX1_ENABLED
14414
/**
14415
 * xmlSAXParseMemoryWithData:
14416
 * @sax:  the SAX handler block
14417
 * @buffer:  an pointer to a char array
14418
 * @size:  the size of the array
14419
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14420
 *             documents
14421
 * @data:  the userdata
14422
 *
14423
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14424
 *
14425
 * parse an XML in-memory block and use the given SAX function block
14426
 * to handle the parsing callback. If sax is NULL, fallback to the default
14427
 * DOM tree building routines.
14428
 *
14429
 * User data (void *) is stored within the parser context in the
14430
 * context's _private member, so it is available nearly everywhere in libxml
14431
 *
14432
 * Returns the resulting document tree
14433
 */
14434
14435
xmlDocPtr
14436
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14437
0
            int size, int recovery, void *data) {
14438
0
    xmlDocPtr ret;
14439
0
    xmlParserCtxtPtr ctxt;
14440
14441
0
    xmlInitParser();
14442
14443
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14444
0
    if (ctxt == NULL) return(NULL);
14445
0
    if (sax != NULL) {
14446
0
  if (ctxt->sax != NULL)
14447
0
      xmlFree(ctxt->sax);
14448
0
        ctxt->sax = sax;
14449
0
    }
14450
0
    xmlDetectSAX2(ctxt);
14451
0
    if (data!=NULL) {
14452
0
  ctxt->_private=data;
14453
0
    }
14454
14455
0
    ctxt->recovery = recovery;
14456
14457
0
    xmlParseDocument(ctxt);
14458
14459
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14460
0
    else {
14461
0
       ret = NULL;
14462
0
       xmlFreeDoc(ctxt->myDoc);
14463
0
       ctxt->myDoc = NULL;
14464
0
    }
14465
0
    if (sax != NULL)
14466
0
  ctxt->sax = NULL;
14467
0
    xmlFreeParserCtxt(ctxt);
14468
14469
0
    return(ret);
14470
0
}
14471
14472
/**
14473
 * xmlSAXParseMemory:
14474
 * @sax:  the SAX handler block
14475
 * @buffer:  an pointer to a char array
14476
 * @size:  the size of the array
14477
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14478
 *             documents
14479
 *
14480
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14481
 *
14482
 * parse an XML in-memory block and use the given SAX function block
14483
 * to handle the parsing callback. If sax is NULL, fallback to the default
14484
 * DOM tree building routines.
14485
 *
14486
 * Returns the resulting document tree
14487
 */
14488
xmlDocPtr
14489
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14490
0
            int size, int recovery) {
14491
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14492
0
}
14493
14494
/**
14495
 * xmlParseMemory:
14496
 * @buffer:  an pointer to a char array
14497
 * @size:  the size of the array
14498
 *
14499
 * DEPRECATED: Use xmlReadMemory.
14500
 *
14501
 * parse an XML in-memory block and build a tree.
14502
 *
14503
 * Returns the resulting document tree
14504
 */
14505
14506
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14507
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14508
0
}
14509
14510
/**
14511
 * xmlRecoverMemory:
14512
 * @buffer:  an pointer to a char array
14513
 * @size:  the size of the array
14514
 *
14515
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14516
 *
14517
 * parse an XML in-memory block and build a tree.
14518
 * In the case the document is not Well Formed, an attempt to
14519
 * build a tree is tried anyway
14520
 *
14521
 * Returns the resulting document tree or NULL in case of error
14522
 */
14523
14524
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14525
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14526
0
}
14527
14528
/**
14529
 * xmlSAXUserParseMemory:
14530
 * @sax:  a SAX handler
14531
 * @user_data:  The user data returned on SAX callbacks
14532
 * @buffer:  an in-memory XML document input
14533
 * @size:  the length of the XML document in bytes
14534
 *
14535
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14536
 *
14537
 * parse an XML in-memory buffer and call the given SAX handler routines.
14538
 *
14539
 * Returns 0 in case of success or a error number otherwise
14540
 */
14541
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14542
0
        const char *buffer, int size) {
14543
0
    int ret = 0;
14544
0
    xmlParserCtxtPtr ctxt;
14545
14546
0
    xmlInitParser();
14547
14548
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14549
0
    if (ctxt == NULL) return -1;
14550
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14551
0
        xmlFree(ctxt->sax);
14552
0
    ctxt->sax = sax;
14553
0
    xmlDetectSAX2(ctxt);
14554
14555
0
    if (user_data != NULL)
14556
0
  ctxt->userData = user_data;
14557
14558
0
    xmlParseDocument(ctxt);
14559
14560
0
    if (ctxt->wellFormed)
14561
0
  ret = 0;
14562
0
    else {
14563
0
        if (ctxt->errNo != 0)
14564
0
      ret = ctxt->errNo;
14565
0
  else
14566
0
      ret = -1;
14567
0
    }
14568
0
    if (sax != NULL)
14569
0
        ctxt->sax = NULL;
14570
0
    if (ctxt->myDoc != NULL) {
14571
0
        xmlFreeDoc(ctxt->myDoc);
14572
0
  ctxt->myDoc = NULL;
14573
0
    }
14574
0
    xmlFreeParserCtxt(ctxt);
14575
14576
0
    return ret;
14577
0
}
14578
#endif /* LIBXML_SAX1_ENABLED */
14579
14580
/**
14581
 * xmlCreateDocParserCtxt:
14582
 * @cur:  a pointer to an array of xmlChar
14583
 *
14584
 * Creates a parser context for an XML in-memory document.
14585
 *
14586
 * Returns the new parser context or NULL
14587
 */
14588
xmlParserCtxtPtr
14589
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14590
0
    int len;
14591
14592
0
    if (cur == NULL)
14593
0
  return(NULL);
14594
0
    len = xmlStrlen(cur);
14595
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14596
0
}
14597
14598
#ifdef LIBXML_SAX1_ENABLED
14599
/**
14600
 * xmlSAXParseDoc:
14601
 * @sax:  the SAX handler block
14602
 * @cur:  a pointer to an array of xmlChar
14603
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14604
 *             documents
14605
 *
14606
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14607
 *
14608
 * parse an XML in-memory document and build a tree.
14609
 * It use the given SAX function block to handle the parsing callback.
14610
 * If sax is NULL, fallback to the default DOM tree building routines.
14611
 *
14612
 * Returns the resulting document tree
14613
 */
14614
14615
xmlDocPtr
14616
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14617
0
    xmlDocPtr ret;
14618
0
    xmlParserCtxtPtr ctxt;
14619
0
    xmlSAXHandlerPtr oldsax = NULL;
14620
14621
0
    if (cur == NULL) return(NULL);
14622
14623
14624
0
    ctxt = xmlCreateDocParserCtxt(cur);
14625
0
    if (ctxt == NULL) return(NULL);
14626
0
    if (sax != NULL) {
14627
0
        oldsax = ctxt->sax;
14628
0
        ctxt->sax = sax;
14629
0
        ctxt->userData = NULL;
14630
0
    }
14631
0
    xmlDetectSAX2(ctxt);
14632
14633
0
    xmlParseDocument(ctxt);
14634
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14635
0
    else {
14636
0
       ret = NULL;
14637
0
       xmlFreeDoc(ctxt->myDoc);
14638
0
       ctxt->myDoc = NULL;
14639
0
    }
14640
0
    if (sax != NULL)
14641
0
  ctxt->sax = oldsax;
14642
0
    xmlFreeParserCtxt(ctxt);
14643
14644
0
    return(ret);
14645
0
}
14646
14647
/**
14648
 * xmlParseDoc:
14649
 * @cur:  a pointer to an array of xmlChar
14650
 *
14651
 * DEPRECATED: Use xmlReadDoc.
14652
 *
14653
 * parse an XML in-memory document and build a tree.
14654
 *
14655
 * Returns the resulting document tree
14656
 */
14657
14658
xmlDocPtr
14659
0
xmlParseDoc(const xmlChar *cur) {
14660
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14661
0
}
14662
#endif /* LIBXML_SAX1_ENABLED */
14663
14664
#ifdef LIBXML_LEGACY_ENABLED
14665
/************************************************************************
14666
 *                  *
14667
 *  Specific function to keep track of entities references    *
14668
 *  and used by the XSLT debugger         *
14669
 *                  *
14670
 ************************************************************************/
14671
14672
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14673
14674
/**
14675
 * xmlAddEntityReference:
14676
 * @ent : A valid entity
14677
 * @firstNode : A valid first node for children of entity
14678
 * @lastNode : A valid last node of children entity
14679
 *
14680
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14681
 */
14682
static void
14683
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14684
                      xmlNodePtr lastNode)
14685
{
14686
    if (xmlEntityRefFunc != NULL) {
14687
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14688
    }
14689
}
14690
14691
14692
/**
14693
 * xmlSetEntityReferenceFunc:
14694
 * @func: A valid function
14695
 *
14696
 * Set the function to call call back when a xml reference has been made
14697
 */
14698
void
14699
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14700
{
14701
    xmlEntityRefFunc = func;
14702
}
14703
#endif /* LIBXML_LEGACY_ENABLED */
14704
14705
/************************************************************************
14706
 *                  *
14707
 *        Miscellaneous       *
14708
 *                  *
14709
 ************************************************************************/
14710
14711
static int xmlParserInitialized = 0;
14712
14713
/**
14714
 * xmlInitParser:
14715
 *
14716
 * Initialization function for the XML parser.
14717
 * This is not reentrant. Call once before processing in case of
14718
 * use in multithreaded programs.
14719
 */
14720
14721
void
14722
7.82M
xmlInitParser(void) {
14723
7.82M
    if (xmlParserInitialized != 0)
14724
7.81M
  return;
14725
14726
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14727
    if (xmlFree == free)
14728
        atexit(xmlCleanupParser);
14729
#endif
14730
14731
10.8k
#ifdef LIBXML_THREAD_ENABLED
14732
10.8k
    __xmlGlobalInitMutexLock();
14733
10.8k
    if (xmlParserInitialized == 0) {
14734
10.8k
#endif
14735
10.8k
  xmlInitThreads();
14736
10.8k
  xmlInitGlobals();
14737
10.8k
  xmlInitMemory();
14738
10.8k
        xmlInitializeDict();
14739
10.8k
  xmlInitCharEncodingHandlers();
14740
10.8k
  xmlDefaultSAXHandlerInit();
14741
10.8k
  xmlRegisterDefaultInputCallbacks();
14742
10.8k
#ifdef LIBXML_OUTPUT_ENABLED
14743
10.8k
  xmlRegisterDefaultOutputCallbacks();
14744
10.8k
#endif /* LIBXML_OUTPUT_ENABLED */
14745
10.8k
#ifdef LIBXML_HTML_ENABLED
14746
10.8k
  htmlInitAutoClose();
14747
10.8k
  htmlDefaultSAXHandlerInit();
14748
10.8k
#endif
14749
10.8k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14750
10.8k
  xmlXPathInit();
14751
10.8k
#endif
14752
10.8k
  xmlParserInitialized = 1;
14753
10.8k
#ifdef LIBXML_THREAD_ENABLED
14754
10.8k
    }
14755
10.8k
    __xmlGlobalInitMutexUnlock();
14756
10.8k
#endif
14757
10.8k
}
14758
14759
/**
14760
 * xmlCleanupParser:
14761
 *
14762
 * This function name is somewhat misleading. It does not clean up
14763
 * parser state, it cleans up memory allocated by the library itself.
14764
 * It is a cleanup function for the XML library. It tries to reclaim all
14765
 * related global memory allocated for the library processing.
14766
 * It doesn't deallocate any document related memory. One should
14767
 * call xmlCleanupParser() only when the process has finished using
14768
 * the library and all XML/HTML documents built with it.
14769
 * See also xmlInitParser() which has the opposite function of preparing
14770
 * the library for operations.
14771
 *
14772
 * WARNING: if your application is multithreaded or has plugin support
14773
 *          calling this may crash the application if another thread or
14774
 *          a plugin is still using libxml2. It's sometimes very hard to
14775
 *          guess if libxml2 is in use in the application, some libraries
14776
 *          or plugins may use it without notice. In case of doubt abstain
14777
 *          from calling this function or do it just before calling exit()
14778
 *          to avoid leak reports from valgrind !
14779
 */
14780
14781
void
14782
0
xmlCleanupParser(void) {
14783
0
    if (!xmlParserInitialized)
14784
0
  return;
14785
14786
0
    xmlCleanupCharEncodingHandlers();
14787
0
#ifdef LIBXML_CATALOG_ENABLED
14788
0
    xmlCatalogCleanup();
14789
0
#endif
14790
0
    xmlDictCleanup();
14791
0
    xmlCleanupInputCallbacks();
14792
0
#ifdef LIBXML_OUTPUT_ENABLED
14793
0
    xmlCleanupOutputCallbacks();
14794
0
#endif
14795
0
#ifdef LIBXML_SCHEMAS_ENABLED
14796
0
    xmlSchemaCleanupTypes();
14797
0
    xmlRelaxNGCleanupTypes();
14798
0
#endif
14799
0
    xmlCleanupGlobals();
14800
0
    xmlCleanupThreads(); /* must be last if called not from the main thread */
14801
0
    xmlCleanupMemory();
14802
0
    xmlParserInitialized = 0;
14803
0
}
14804
14805
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14806
    !defined(_WIN32)
14807
static void
14808
ATTRIBUTE_DESTRUCTOR
14809
xmlDestructor(void) {
14810
    /*
14811
     * Calling custom deallocation functions in a destructor can cause
14812
     * problems, for example with Nokogiri.
14813
     */
14814
    if (xmlFree == free)
14815
        xmlCleanupParser();
14816
}
14817
#endif
14818
14819
/************************************************************************
14820
 *                  *
14821
 *  New set (2.6.0) of simpler and more flexible APIs   *
14822
 *                  *
14823
 ************************************************************************/
14824
14825
/**
14826
 * DICT_FREE:
14827
 * @str:  a string
14828
 *
14829
 * Free a string if it is not owned by the "dict" dictionary in the
14830
 * current scope
14831
 */
14832
#define DICT_FREE(str)            \
14833
0
  if ((str) && ((!dict) ||       \
14834
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14835
0
      xmlFree((char *)(str));
14836
14837
/**
14838
 * xmlCtxtReset:
14839
 * @ctxt: an XML parser context
14840
 *
14841
 * Reset a parser context
14842
 */
14843
void
14844
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14845
0
{
14846
0
    xmlParserInputPtr input;
14847
0
    xmlDictPtr dict;
14848
14849
0
    if (ctxt == NULL)
14850
0
        return;
14851
14852
0
    dict = ctxt->dict;
14853
14854
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14855
0
        xmlFreeInputStream(input);
14856
0
    }
14857
0
    ctxt->inputNr = 0;
14858
0
    ctxt->input = NULL;
14859
14860
0
    ctxt->spaceNr = 0;
14861
0
    if (ctxt->spaceTab != NULL) {
14862
0
  ctxt->spaceTab[0] = -1;
14863
0
  ctxt->space = &ctxt->spaceTab[0];
14864
0
    } else {
14865
0
        ctxt->space = NULL;
14866
0
    }
14867
14868
14869
0
    ctxt->nodeNr = 0;
14870
0
    ctxt->node = NULL;
14871
14872
0
    ctxt->nameNr = 0;
14873
0
    ctxt->name = NULL;
14874
14875
0
    ctxt->nsNr = 0;
14876
14877
0
    DICT_FREE(ctxt->version);
14878
0
    ctxt->version = NULL;
14879
0
    DICT_FREE(ctxt->encoding);
14880
0
    ctxt->encoding = NULL;
14881
0
    DICT_FREE(ctxt->directory);
14882
0
    ctxt->directory = NULL;
14883
0
    DICT_FREE(ctxt->extSubURI);
14884
0
    ctxt->extSubURI = NULL;
14885
0
    DICT_FREE(ctxt->extSubSystem);
14886
0
    ctxt->extSubSystem = NULL;
14887
0
    if (ctxt->myDoc != NULL)
14888
0
        xmlFreeDoc(ctxt->myDoc);
14889
0
    ctxt->myDoc = NULL;
14890
14891
0
    ctxt->standalone = -1;
14892
0
    ctxt->hasExternalSubset = 0;
14893
0
    ctxt->hasPErefs = 0;
14894
0
    ctxt->html = 0;
14895
0
    ctxt->external = 0;
14896
0
    ctxt->instate = XML_PARSER_START;
14897
0
    ctxt->token = 0;
14898
14899
0
    ctxt->wellFormed = 1;
14900
0
    ctxt->nsWellFormed = 1;
14901
0
    ctxt->disableSAX = 0;
14902
0
    ctxt->valid = 1;
14903
#if 0
14904
    ctxt->vctxt.userData = ctxt;
14905
    ctxt->vctxt.error = xmlParserValidityError;
14906
    ctxt->vctxt.warning = xmlParserValidityWarning;
14907
#endif
14908
0
    ctxt->record_info = 0;
14909
0
    ctxt->checkIndex = 0;
14910
0
    ctxt->inSubset = 0;
14911
0
    ctxt->errNo = XML_ERR_OK;
14912
0
    ctxt->depth = 0;
14913
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14914
0
    ctxt->catalogs = NULL;
14915
0
    ctxt->nbentities = 0;
14916
0
    ctxt->sizeentities = 0;
14917
0
    ctxt->sizeentcopy = 0;
14918
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14919
14920
0
    if (ctxt->attsDefault != NULL) {
14921
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14922
0
        ctxt->attsDefault = NULL;
14923
0
    }
14924
0
    if (ctxt->attsSpecial != NULL) {
14925
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14926
0
        ctxt->attsSpecial = NULL;
14927
0
    }
14928
14929
0
#ifdef LIBXML_CATALOG_ENABLED
14930
0
    if (ctxt->catalogs != NULL)
14931
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14932
0
#endif
14933
0
    if (ctxt->lastError.code != XML_ERR_OK)
14934
0
        xmlResetError(&ctxt->lastError);
14935
0
}
14936
14937
/**
14938
 * xmlCtxtResetPush:
14939
 * @ctxt: an XML parser context
14940
 * @chunk:  a pointer to an array of chars
14941
 * @size:  number of chars in the array
14942
 * @filename:  an optional file name or URI
14943
 * @encoding:  the document encoding, or NULL
14944
 *
14945
 * Reset a push parser context
14946
 *
14947
 * Returns 0 in case of success and 1 in case of error
14948
 */
14949
int
14950
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14951
                 int size, const char *filename, const char *encoding)
14952
0
{
14953
0
    xmlParserInputPtr inputStream;
14954
0
    xmlParserInputBufferPtr buf;
14955
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14956
14957
0
    if (ctxt == NULL)
14958
0
        return(1);
14959
14960
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14961
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14962
14963
0
    buf = xmlAllocParserInputBuffer(enc);
14964
0
    if (buf == NULL)
14965
0
        return(1);
14966
14967
0
    if (ctxt == NULL) {
14968
0
        xmlFreeParserInputBuffer(buf);
14969
0
        return(1);
14970
0
    }
14971
14972
0
    xmlCtxtReset(ctxt);
14973
14974
0
    if (filename == NULL) {
14975
0
        ctxt->directory = NULL;
14976
0
    } else {
14977
0
        ctxt->directory = xmlParserGetDirectory(filename);
14978
0
    }
14979
14980
0
    inputStream = xmlNewInputStream(ctxt);
14981
0
    if (inputStream == NULL) {
14982
0
        xmlFreeParserInputBuffer(buf);
14983
0
        return(1);
14984
0
    }
14985
14986
0
    if (filename == NULL)
14987
0
        inputStream->filename = NULL;
14988
0
    else
14989
0
        inputStream->filename = (char *)
14990
0
            xmlCanonicPath((const xmlChar *) filename);
14991
0
    inputStream->buf = buf;
14992
0
    xmlBufResetInput(buf->buffer, inputStream);
14993
14994
0
    inputPush(ctxt, inputStream);
14995
14996
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14997
0
        (ctxt->input->buf != NULL)) {
14998
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14999
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
15000
15001
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15002
15003
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15004
#ifdef DEBUG_PUSH
15005
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15006
#endif
15007
0
    }
15008
15009
0
    if (encoding != NULL) {
15010
0
        xmlCharEncodingHandlerPtr hdlr;
15011
15012
0
        if (ctxt->encoding != NULL)
15013
0
      xmlFree((xmlChar *) ctxt->encoding);
15014
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15015
15016
0
        hdlr = xmlFindCharEncodingHandler(encoding);
15017
0
        if (hdlr != NULL) {
15018
0
            xmlSwitchToEncoding(ctxt, hdlr);
15019
0
  } else {
15020
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15021
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
15022
0
        }
15023
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
15024
0
        xmlSwitchEncoding(ctxt, enc);
15025
0
    }
15026
15027
0
    return(0);
15028
0
}
15029
15030
15031
/**
15032
 * xmlCtxtUseOptionsInternal:
15033
 * @ctxt: an XML parser context
15034
 * @options:  a combination of xmlParserOption
15035
 * @encoding:  the user provided encoding to use
15036
 *
15037
 * Applies the options to the parser context
15038
 *
15039
 * Returns 0 in case of success, the set of unknown or unimplemented options
15040
 *         in case of error.
15041
 */
15042
static int
15043
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15044
1.47M
{
15045
1.47M
    if (ctxt == NULL)
15046
0
        return(-1);
15047
1.47M
    if (encoding != NULL) {
15048
0
        if (ctxt->encoding != NULL)
15049
0
      xmlFree((xmlChar *) ctxt->encoding);
15050
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15051
0
    }
15052
1.47M
    if (options & XML_PARSE_RECOVER) {
15053
887k
        ctxt->recovery = 1;
15054
887k
        options -= XML_PARSE_RECOVER;
15055
887k
  ctxt->options |= XML_PARSE_RECOVER;
15056
887k
    } else
15057
592k
        ctxt->recovery = 0;
15058
1.47M
    if (options & XML_PARSE_DTDLOAD) {
15059
1.26M
        ctxt->loadsubset = XML_DETECT_IDS;
15060
1.26M
        options -= XML_PARSE_DTDLOAD;
15061
1.26M
  ctxt->options |= XML_PARSE_DTDLOAD;
15062
1.26M
    } else
15063
212k
        ctxt->loadsubset = 0;
15064
1.47M
    if (options & XML_PARSE_DTDATTR) {
15065
537k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15066
537k
        options -= XML_PARSE_DTDATTR;
15067
537k
  ctxt->options |= XML_PARSE_DTDATTR;
15068
537k
    }
15069
1.47M
    if (options & XML_PARSE_NOENT) {
15070
953k
        ctxt->replaceEntities = 1;
15071
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
15072
953k
        options -= XML_PARSE_NOENT;
15073
953k
  ctxt->options |= XML_PARSE_NOENT;
15074
953k
    } else
15075
525k
        ctxt->replaceEntities = 0;
15076
1.47M
    if (options & XML_PARSE_PEDANTIC) {
15077
373k
        ctxt->pedantic = 1;
15078
373k
        options -= XML_PARSE_PEDANTIC;
15079
373k
  ctxt->options |= XML_PARSE_PEDANTIC;
15080
373k
    } else
15081
1.10M
        ctxt->pedantic = 0;
15082
1.47M
    if (options & XML_PARSE_NOBLANKS) {
15083
621k
        ctxt->keepBlanks = 0;
15084
621k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15085
621k
        options -= XML_PARSE_NOBLANKS;
15086
621k
  ctxt->options |= XML_PARSE_NOBLANKS;
15087
621k
    } else
15088
858k
        ctxt->keepBlanks = 1;
15089
1.47M
    if (options & XML_PARSE_DTDVALID) {
15090
558k
        ctxt->validate = 1;
15091
558k
        if (options & XML_PARSE_NOWARNING)
15092
372k
            ctxt->vctxt.warning = NULL;
15093
558k
        if (options & XML_PARSE_NOERROR)
15094
403k
            ctxt->vctxt.error = NULL;
15095
558k
        options -= XML_PARSE_DTDVALID;
15096
558k
  ctxt->options |= XML_PARSE_DTDVALID;
15097
558k
    } else
15098
921k
        ctxt->validate = 0;
15099
1.47M
    if (options & XML_PARSE_NOWARNING) {
15100
493k
        ctxt->sax->warning = NULL;
15101
493k
        options -= XML_PARSE_NOWARNING;
15102
493k
    }
15103
1.47M
    if (options & XML_PARSE_NOERROR) {
15104
598k
        ctxt->sax->error = NULL;
15105
598k
        ctxt->sax->fatalError = NULL;
15106
598k
        options -= XML_PARSE_NOERROR;
15107
598k
    }
15108
1.47M
#ifdef LIBXML_SAX1_ENABLED
15109
1.47M
    if (options & XML_PARSE_SAX1) {
15110
673k
        ctxt->sax->startElement = xmlSAX2StartElement;
15111
673k
        ctxt->sax->endElement = xmlSAX2EndElement;
15112
673k
        ctxt->sax->startElementNs = NULL;
15113
673k
        ctxt->sax->endElementNs = NULL;
15114
673k
        ctxt->sax->initialized = 1;
15115
673k
        options -= XML_PARSE_SAX1;
15116
673k
  ctxt->options |= XML_PARSE_SAX1;
15117
673k
    }
15118
1.47M
#endif /* LIBXML_SAX1_ENABLED */
15119
1.47M
    if (options & XML_PARSE_NODICT) {
15120
655k
        ctxt->dictNames = 0;
15121
655k
        options -= XML_PARSE_NODICT;
15122
655k
  ctxt->options |= XML_PARSE_NODICT;
15123
824k
    } else {
15124
824k
        ctxt->dictNames = 1;
15125
824k
    }
15126
1.47M
    if (options & XML_PARSE_NOCDATA) {
15127
679k
        ctxt->sax->cdataBlock = NULL;
15128
679k
        options -= XML_PARSE_NOCDATA;
15129
679k
  ctxt->options |= XML_PARSE_NOCDATA;
15130
679k
    }
15131
1.47M
    if (options & XML_PARSE_NSCLEAN) {
15132
803k
  ctxt->options |= XML_PARSE_NSCLEAN;
15133
803k
        options -= XML_PARSE_NSCLEAN;
15134
803k
    }
15135
1.47M
    if (options & XML_PARSE_NONET) {
15136
678k
  ctxt->options |= XML_PARSE_NONET;
15137
678k
        options -= XML_PARSE_NONET;
15138
678k
    }
15139
1.47M
    if (options & XML_PARSE_COMPACT) {
15140
822k
  ctxt->options |= XML_PARSE_COMPACT;
15141
822k
        options -= XML_PARSE_COMPACT;
15142
822k
    }
15143
1.47M
    if (options & XML_PARSE_OLD10) {
15144
505k
  ctxt->options |= XML_PARSE_OLD10;
15145
505k
        options -= XML_PARSE_OLD10;
15146
505k
    }
15147
1.47M
    if (options & XML_PARSE_NOBASEFIX) {
15148
550k
  ctxt->options |= XML_PARSE_NOBASEFIX;
15149
550k
        options -= XML_PARSE_NOBASEFIX;
15150
550k
    }
15151
1.47M
    if (options & XML_PARSE_HUGE) {
15152
474k
  ctxt->options |= XML_PARSE_HUGE;
15153
474k
        options -= XML_PARSE_HUGE;
15154
474k
        if (ctxt->dict != NULL)
15155
474k
            xmlDictSetLimit(ctxt->dict, 0);
15156
474k
    }
15157
1.47M
    if (options & XML_PARSE_OLDSAX) {
15158
431k
  ctxt->options |= XML_PARSE_OLDSAX;
15159
431k
        options -= XML_PARSE_OLDSAX;
15160
431k
    }
15161
1.47M
    if (options & XML_PARSE_IGNORE_ENC) {
15162
606k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
15163
606k
        options -= XML_PARSE_IGNORE_ENC;
15164
606k
    }
15165
1.47M
    if (options & XML_PARSE_BIG_LINES) {
15166
535k
  ctxt->options |= XML_PARSE_BIG_LINES;
15167
535k
        options -= XML_PARSE_BIG_LINES;
15168
535k
    }
15169
1.47M
    ctxt->linenumbers = 1;
15170
1.47M
    return (options);
15171
1.47M
}
15172
15173
/**
15174
 * xmlCtxtUseOptions:
15175
 * @ctxt: an XML parser context
15176
 * @options:  a combination of xmlParserOption
15177
 *
15178
 * Applies the options to the parser context
15179
 *
15180
 * Returns 0 in case of success, the set of unknown or unimplemented options
15181
 *         in case of error.
15182
 */
15183
int
15184
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15185
1.07M
{
15186
1.07M
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15187
1.07M
}
15188
15189
/**
15190
 * xmlDoRead:
15191
 * @ctxt:  an XML parser context
15192
 * @URL:  the base URL to use for the document
15193
 * @encoding:  the document encoding, or NULL
15194
 * @options:  a combination of xmlParserOption
15195
 * @reuse:  keep the context for reuse
15196
 *
15197
 * Common front-end for the xmlRead functions
15198
 *
15199
 * Returns the resulting document tree or NULL
15200
 */
15201
static xmlDocPtr
15202
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15203
          int options, int reuse)
15204
405k
{
15205
405k
    xmlDocPtr ret;
15206
15207
405k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15208
405k
    if (encoding != NULL) {
15209
0
        xmlCharEncodingHandlerPtr hdlr;
15210
15211
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15212
0
  if (hdlr != NULL)
15213
0
      xmlSwitchToEncoding(ctxt, hdlr);
15214
0
    }
15215
405k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15216
405k
        (ctxt->input->filename == NULL))
15217
405k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15218
405k
    xmlParseDocument(ctxt);
15219
405k
    if ((ctxt->wellFormed) || ctxt->recovery)
15220
233k
        ret = ctxt->myDoc;
15221
172k
    else {
15222
172k
        ret = NULL;
15223
172k
  if (ctxt->myDoc != NULL) {
15224
149k
      xmlFreeDoc(ctxt->myDoc);
15225
149k
  }
15226
172k
    }
15227
405k
    ctxt->myDoc = NULL;
15228
405k
    if (!reuse) {
15229
405k
  xmlFreeParserCtxt(ctxt);
15230
405k
    }
15231
15232
405k
    return (ret);
15233
405k
}
15234
15235
/**
15236
 * xmlReadDoc:
15237
 * @cur:  a pointer to a zero terminated string
15238
 * @URL:  the base URL to use for the document
15239
 * @encoding:  the document encoding, or NULL
15240
 * @options:  a combination of xmlParserOption
15241
 *
15242
 * parse an XML in-memory document and build a tree.
15243
 *
15244
 * Returns the resulting document tree
15245
 */
15246
xmlDocPtr
15247
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15248
0
{
15249
0
    xmlParserCtxtPtr ctxt;
15250
15251
0
    if (cur == NULL)
15252
0
        return (NULL);
15253
0
    xmlInitParser();
15254
15255
0
    ctxt = xmlCreateDocParserCtxt(cur);
15256
0
    if (ctxt == NULL)
15257
0
        return (NULL);
15258
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15259
0
}
15260
15261
/**
15262
 * xmlReadFile:
15263
 * @filename:  a file or URL
15264
 * @encoding:  the document encoding, or NULL
15265
 * @options:  a combination of xmlParserOption
15266
 *
15267
 * parse an XML file from the filesystem or the network.
15268
 *
15269
 * Returns the resulting document tree
15270
 */
15271
xmlDocPtr
15272
xmlReadFile(const char *filename, const char *encoding, int options)
15273
0
{
15274
0
    xmlParserCtxtPtr ctxt;
15275
15276
0
    xmlInitParser();
15277
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15278
0
    if (ctxt == NULL)
15279
0
        return (NULL);
15280
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15281
0
}
15282
15283
/**
15284
 * xmlReadMemory:
15285
 * @buffer:  a pointer to a char array
15286
 * @size:  the size of the array
15287
 * @URL:  the base URL to use for the document
15288
 * @encoding:  the document encoding, or NULL
15289
 * @options:  a combination of xmlParserOption
15290
 *
15291
 * parse an XML in-memory document and build a tree.
15292
 *
15293
 * Returns the resulting document tree
15294
 */
15295
xmlDocPtr
15296
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15297
406k
{
15298
406k
    xmlParserCtxtPtr ctxt;
15299
15300
406k
    xmlInitParser();
15301
406k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15302
406k
    if (ctxt == NULL)
15303
645
        return (NULL);
15304
405k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15305
406k
}
15306
15307
/**
15308
 * xmlReadFd:
15309
 * @fd:  an open file descriptor
15310
 * @URL:  the base URL to use for the document
15311
 * @encoding:  the document encoding, or NULL
15312
 * @options:  a combination of xmlParserOption
15313
 *
15314
 * parse an XML from a file descriptor and build a tree.
15315
 * NOTE that the file descriptor will not be closed when the
15316
 *      reader is closed or reset.
15317
 *
15318
 * Returns the resulting document tree
15319
 */
15320
xmlDocPtr
15321
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15322
0
{
15323
0
    xmlParserCtxtPtr ctxt;
15324
0
    xmlParserInputBufferPtr input;
15325
0
    xmlParserInputPtr stream;
15326
15327
0
    if (fd < 0)
15328
0
        return (NULL);
15329
0
    xmlInitParser();
15330
15331
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15332
0
    if (input == NULL)
15333
0
        return (NULL);
15334
0
    input->closecallback = NULL;
15335
0
    ctxt = xmlNewParserCtxt();
15336
0
    if (ctxt == NULL) {
15337
0
        xmlFreeParserInputBuffer(input);
15338
0
        return (NULL);
15339
0
    }
15340
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15341
0
    if (stream == NULL) {
15342
0
        xmlFreeParserInputBuffer(input);
15343
0
  xmlFreeParserCtxt(ctxt);
15344
0
        return (NULL);
15345
0
    }
15346
0
    inputPush(ctxt, stream);
15347
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15348
0
}
15349
15350
/**
15351
 * xmlReadIO:
15352
 * @ioread:  an I/O read function
15353
 * @ioclose:  an I/O close function
15354
 * @ioctx:  an I/O handler
15355
 * @URL:  the base URL to use for the document
15356
 * @encoding:  the document encoding, or NULL
15357
 * @options:  a combination of xmlParserOption
15358
 *
15359
 * parse an XML document from I/O functions and source and build a tree.
15360
 *
15361
 * Returns the resulting document tree
15362
 */
15363
xmlDocPtr
15364
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15365
          void *ioctx, const char *URL, const char *encoding, int options)
15366
0
{
15367
0
    xmlParserCtxtPtr ctxt;
15368
0
    xmlParserInputBufferPtr input;
15369
0
    xmlParserInputPtr stream;
15370
15371
0
    if (ioread == NULL)
15372
0
        return (NULL);
15373
0
    xmlInitParser();
15374
15375
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15376
0
                                         XML_CHAR_ENCODING_NONE);
15377
0
    if (input == NULL) {
15378
0
        if (ioclose != NULL)
15379
0
            ioclose(ioctx);
15380
0
        return (NULL);
15381
0
    }
15382
0
    ctxt = xmlNewParserCtxt();
15383
0
    if (ctxt == NULL) {
15384
0
        xmlFreeParserInputBuffer(input);
15385
0
        return (NULL);
15386
0
    }
15387
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15388
0
    if (stream == NULL) {
15389
0
        xmlFreeParserInputBuffer(input);
15390
0
  xmlFreeParserCtxt(ctxt);
15391
0
        return (NULL);
15392
0
    }
15393
0
    inputPush(ctxt, stream);
15394
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15395
0
}
15396
15397
/**
15398
 * xmlCtxtReadDoc:
15399
 * @ctxt:  an XML parser context
15400
 * @cur:  a pointer to a zero terminated string
15401
 * @URL:  the base URL to use for the document
15402
 * @encoding:  the document encoding, or NULL
15403
 * @options:  a combination of xmlParserOption
15404
 *
15405
 * parse an XML in-memory document and build a tree.
15406
 * This reuses the existing @ctxt parser context
15407
 *
15408
 * Returns the resulting document tree
15409
 */
15410
xmlDocPtr
15411
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15412
               const char *URL, const char *encoding, int options)
15413
0
{
15414
0
    if (cur == NULL)
15415
0
        return (NULL);
15416
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15417
0
                              encoding, options));
15418
0
}
15419
15420
/**
15421
 * xmlCtxtReadFile:
15422
 * @ctxt:  an XML parser context
15423
 * @filename:  a file or URL
15424
 * @encoding:  the document encoding, or NULL
15425
 * @options:  a combination of xmlParserOption
15426
 *
15427
 * parse an XML file from the filesystem or the network.
15428
 * This reuses the existing @ctxt parser context
15429
 *
15430
 * Returns the resulting document tree
15431
 */
15432
xmlDocPtr
15433
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15434
                const char *encoding, int options)
15435
0
{
15436
0
    xmlParserInputPtr stream;
15437
15438
0
    if (filename == NULL)
15439
0
        return (NULL);
15440
0
    if (ctxt == NULL)
15441
0
        return (NULL);
15442
0
    xmlInitParser();
15443
15444
0
    xmlCtxtReset(ctxt);
15445
15446
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15447
0
    if (stream == NULL) {
15448
0
        return (NULL);
15449
0
    }
15450
0
    inputPush(ctxt, stream);
15451
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15452
0
}
15453
15454
/**
15455
 * xmlCtxtReadMemory:
15456
 * @ctxt:  an XML parser context
15457
 * @buffer:  a pointer to a char array
15458
 * @size:  the size of the array
15459
 * @URL:  the base URL to use for the document
15460
 * @encoding:  the document encoding, or NULL
15461
 * @options:  a combination of xmlParserOption
15462
 *
15463
 * parse an XML in-memory document and build a tree.
15464
 * This reuses the existing @ctxt parser context
15465
 *
15466
 * Returns the resulting document tree
15467
 */
15468
xmlDocPtr
15469
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15470
                  const char *URL, const char *encoding, int options)
15471
0
{
15472
0
    xmlParserInputBufferPtr input;
15473
0
    xmlParserInputPtr stream;
15474
15475
0
    if (ctxt == NULL)
15476
0
        return (NULL);
15477
0
    if (buffer == NULL)
15478
0
        return (NULL);
15479
0
    xmlInitParser();
15480
15481
0
    xmlCtxtReset(ctxt);
15482
15483
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15484
0
    if (input == NULL) {
15485
0
  return(NULL);
15486
0
    }
15487
15488
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15489
0
    if (stream == NULL) {
15490
0
  xmlFreeParserInputBuffer(input);
15491
0
  return(NULL);
15492
0
    }
15493
15494
0
    inputPush(ctxt, stream);
15495
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15496
0
}
15497
15498
/**
15499
 * xmlCtxtReadFd:
15500
 * @ctxt:  an XML parser context
15501
 * @fd:  an open file descriptor
15502
 * @URL:  the base URL to use for the document
15503
 * @encoding:  the document encoding, or NULL
15504
 * @options:  a combination of xmlParserOption
15505
 *
15506
 * parse an XML from a file descriptor and build a tree.
15507
 * This reuses the existing @ctxt parser context
15508
 * NOTE that the file descriptor will not be closed when the
15509
 *      reader is closed or reset.
15510
 *
15511
 * Returns the resulting document tree
15512
 */
15513
xmlDocPtr
15514
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15515
              const char *URL, const char *encoding, int options)
15516
0
{
15517
0
    xmlParserInputBufferPtr input;
15518
0
    xmlParserInputPtr stream;
15519
15520
0
    if (fd < 0)
15521
0
        return (NULL);
15522
0
    if (ctxt == NULL)
15523
0
        return (NULL);
15524
0
    xmlInitParser();
15525
15526
0
    xmlCtxtReset(ctxt);
15527
15528
15529
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15530
0
    if (input == NULL)
15531
0
        return (NULL);
15532
0
    input->closecallback = NULL;
15533
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15534
0
    if (stream == NULL) {
15535
0
        xmlFreeParserInputBuffer(input);
15536
0
        return (NULL);
15537
0
    }
15538
0
    inputPush(ctxt, stream);
15539
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15540
0
}
15541
15542
/**
15543
 * xmlCtxtReadIO:
15544
 * @ctxt:  an XML parser context
15545
 * @ioread:  an I/O read function
15546
 * @ioclose:  an I/O close function
15547
 * @ioctx:  an I/O handler
15548
 * @URL:  the base URL to use for the document
15549
 * @encoding:  the document encoding, or NULL
15550
 * @options:  a combination of xmlParserOption
15551
 *
15552
 * parse an XML document from I/O functions and source and build a tree.
15553
 * This reuses the existing @ctxt parser context
15554
 *
15555
 * Returns the resulting document tree
15556
 */
15557
xmlDocPtr
15558
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15559
              xmlInputCloseCallback ioclose, void *ioctx,
15560
        const char *URL,
15561
              const char *encoding, int options)
15562
0
{
15563
0
    xmlParserInputBufferPtr input;
15564
0
    xmlParserInputPtr stream;
15565
15566
0
    if (ioread == NULL)
15567
0
        return (NULL);
15568
0
    if (ctxt == NULL)
15569
0
        return (NULL);
15570
0
    xmlInitParser();
15571
15572
0
    xmlCtxtReset(ctxt);
15573
15574
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15575
0
                                         XML_CHAR_ENCODING_NONE);
15576
0
    if (input == NULL) {
15577
0
        if (ioclose != NULL)
15578
0
            ioclose(ioctx);
15579
0
        return (NULL);
15580
0
    }
15581
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15582
0
    if (stream == NULL) {
15583
0
        xmlFreeParserInputBuffer(input);
15584
0
        return (NULL);
15585
0
    }
15586
0
    inputPush(ctxt, stream);
15587
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15588
0
}
15589