Coverage Report

Created: 2024-08-16 12:09

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/enc.h"
80
#include "private/error.h"
81
#include "private/html.h"
82
#include "private/io.h"
83
#include "private/parser.h"
84
#include "private/threads.h"
85
86
struct _xmlStartTag {
87
    const xmlChar *prefix;
88
    const xmlChar *URI;
89
    int line;
90
    int nsNr;
91
};
92
93
static void
94
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
95
96
static xmlParserCtxtPtr
97
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
98
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
99
        xmlParserCtxtPtr pctx);
100
101
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
2.51M
#define XML_MAX_HUGE_LENGTH 1000000000
116
117
10.1k
#define XML_PARSER_BIG_ENTITY 1000
118
#define XML_PARSER_LOT_ENTITY 5000
119
120
/*
121
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
122
 *    replacement over the size in byte of the input indicates that you have
123
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
124
 *    replacement per byte of input.
125
 */
126
433k
#define XML_PARSER_NON_LINEAR 10
127
128
/*
129
 * xmlParserEntityCheck
130
 *
131
 * Function to check non-linear entity expansion behaviour
132
 * This is here to detect and stop exponential linear entity expansion
133
 * This is not a limitation of the parser but a safety
134
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
135
 * parser option.
136
 */
137
static int
138
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
139
                     xmlEntityPtr ent, size_t replacement)
140
1.36M
{
141
1.36M
    size_t consumed = 0;
142
1.36M
    int i;
143
144
1.36M
    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
145
750k
        return (0);
146
611k
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
147
1.21k
        return (1);
148
149
    /*
150
     * This may look absurd but is needed to detect
151
     * entities problems
152
     */
153
610k
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
154
610k
  (ent->content != NULL) && (ent->checked == 0) &&
155
610k
  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
156
68.3k
  unsigned long oldnbent = ctxt->nbentities, diff;
157
68.3k
  xmlChar *rep;
158
159
68.3k
  ent->checked = 1;
160
161
68.3k
        ++ctxt->depth;
162
68.3k
  rep = xmlStringDecodeEntities(ctxt, ent->content,
163
68.3k
          XML_SUBSTITUTE_REF, 0, 0, 0);
164
68.3k
        --ctxt->depth;
165
68.3k
  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
166
763
      ent->content[0] = 0;
167
763
  }
168
169
68.3k
        diff = ctxt->nbentities - oldnbent + 1;
170
68.3k
        if (diff > INT_MAX / 2)
171
0
            diff = INT_MAX / 2;
172
68.3k
  ent->checked = diff * 2;
173
68.3k
  if (rep != NULL) {
174
67.7k
      if (xmlStrchr(rep, '<'))
175
4.12k
    ent->checked |= 1;
176
67.7k
      xmlFree(rep);
177
67.7k
      rep = NULL;
178
67.7k
  }
179
68.3k
    }
180
181
    /*
182
     * Prevent entity exponential check, not just replacement while
183
     * parsing the DTD
184
     * The check is potentially costly so do that only once in a thousand
185
     */
186
610k
    if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
187
610k
        (ctxt->nbentities % 1024 == 0)) {
188
0
  for (i = 0;i < ctxt->inputNr;i++) {
189
0
      consumed += ctxt->inputTab[i]->consumed +
190
0
                 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
191
0
  }
192
0
  if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
193
0
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
194
0
      ctxt->instate = XML_PARSER_EOF;
195
0
      return (1);
196
0
  }
197
0
  consumed = 0;
198
0
    }
199
200
201
202
610k
    if (replacement != 0) {
203
33.3k
  if (replacement < XML_MAX_TEXT_LENGTH)
204
33.3k
      return(0);
205
206
        /*
207
   * If the volume of entity copy reaches 10 times the
208
   * amount of parsed data and over the large text threshold
209
   * then that's very likely to be an abuse.
210
   */
211
0
        if (ctxt->input != NULL) {
212
0
      consumed = ctxt->input->consumed +
213
0
                 (ctxt->input->cur - ctxt->input->base);
214
0
  }
215
0
        consumed += ctxt->sizeentities;
216
217
0
        if (replacement < XML_PARSER_NON_LINEAR * consumed)
218
0
      return(0);
219
576k
    } else if (size != 0) {
220
        /*
221
         * Do the check based on the replacement size of the entity
222
         */
223
10.1k
        if (size < XML_PARSER_BIG_ENTITY)
224
9.59k
      return(0);
225
226
        /*
227
         * A limit on the amount of text data reasonably used
228
         */
229
535
        if (ctxt->input != NULL) {
230
535
            consumed = ctxt->input->consumed +
231
535
                (ctxt->input->cur - ctxt->input->base);
232
535
        }
233
535
        consumed += ctxt->sizeentities;
234
235
535
        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
236
535
      (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
237
424
            return (0);
238
566k
    } else if (ent != NULL) {
239
        /*
240
         * use the number of parsed entities in the replacement
241
         */
242
433k
        size = ent->checked / 2;
243
244
        /*
245
         * The amount of data parsed counting entities size only once
246
         */
247
433k
        if (ctxt->input != NULL) {
248
433k
            consumed = ctxt->input->consumed +
249
433k
                (ctxt->input->cur - ctxt->input->base);
250
433k
        }
251
433k
        consumed += ctxt->sizeentities;
252
253
        /*
254
         * Check the density of entities for the amount of data
255
   * knowing an entity reference will take at least 3 bytes
256
         */
257
433k
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
258
433k
            return (0);
259
433k
    } else {
260
        /*
261
         * strange we got no data for checking
262
         */
263
133k
  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
264
133k
       (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
265
133k
      (ctxt->nbentities <= 10000))
266
133k
      return (0);
267
133k
    }
268
112
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
269
112
    return (1);
270
610k
}
271
272
/**
273
 * xmlParserMaxDepth:
274
 *
275
 * arbitrary depth limit for the XML documents that we allow to
276
 * process. This is not a limitation of the parser but a safety
277
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
278
 * parser option.
279
 */
280
unsigned int xmlParserMaxDepth = 256;
281
282
283
284
#define SAX2 1
285
36.7M
#define XML_PARSER_BIG_BUFFER_SIZE 300
286
139M
#define XML_PARSER_BUFFER_SIZE 100
287
816k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
288
289
/**
290
 * XML_PARSER_CHUNK_SIZE
291
 *
292
 * When calling GROW that's the minimal amount of data
293
 * the parser expected to have received. It is not a hard
294
 * limit but an optimization when reading strings like Names
295
 * It is not strictly needed as long as inputs available characters
296
 * are followed by 0, which should be provided by the I/O level
297
 */
298
24.7M
#define XML_PARSER_CHUNK_SIZE 100
299
300
/*
301
 * List of XML prefixed PI allowed by W3C specs
302
 */
303
304
static const char* const xmlW3CPIs[] = {
305
    "xml-stylesheet",
306
    "xml-model",
307
    NULL
308
};
309
310
311
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
312
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
313
                                              const xmlChar **str);
314
315
static xmlParserErrors
316
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
317
                xmlSAXHandlerPtr sax,
318
          void *user_data, int depth, const xmlChar *URL,
319
          const xmlChar *ID, xmlNodePtr *list);
320
321
static int
322
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
323
                          const char *encoding);
324
#ifdef LIBXML_LEGACY_ENABLED
325
static void
326
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
327
                      xmlNodePtr lastNode);
328
#endif /* LIBXML_LEGACY_ENABLED */
329
330
static xmlParserErrors
331
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
332
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
333
334
static int
335
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
336
337
/************************************************************************
338
 *                  *
339
 *    Some factorized error routines        *
340
 *                  *
341
 ************************************************************************/
342
343
/**
344
 * xmlErrAttributeDup:
345
 * @ctxt:  an XML parser context
346
 * @prefix:  the attribute prefix
347
 * @localname:  the attribute localname
348
 *
349
 * Handle a redefinition of attribute error
350
 */
351
static void
352
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
353
                   const xmlChar * localname)
354
8.08k
{
355
8.08k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
356
8.08k
        (ctxt->instate == XML_PARSER_EOF))
357
0
  return;
358
8.08k
    if (ctxt != NULL)
359
8.08k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
360
361
8.08k
    if (prefix == NULL)
362
5.83k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
363
5.83k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
364
5.83k
                        (const char *) localname, NULL, NULL, 0, 0,
365
5.83k
                        "Attribute %s redefined\n", localname);
366
2.25k
    else
367
2.25k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
368
2.25k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
369
2.25k
                        (const char *) prefix, (const char *) localname,
370
2.25k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
371
2.25k
                        localname);
372
8.08k
    if (ctxt != NULL) {
373
8.08k
  ctxt->wellFormed = 0;
374
8.08k
  if (ctxt->recovery == 0)
375
3.99k
      ctxt->disableSAX = 1;
376
8.08k
    }
377
8.08k
}
378
379
/**
380
 * xmlFatalErr:
381
 * @ctxt:  an XML parser context
382
 * @error:  the error number
383
 * @extra:  extra information string
384
 *
385
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
386
 */
387
static void
388
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
389
2.25M
{
390
2.25M
    const char *errmsg;
391
392
2.25M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393
2.25M
        (ctxt->instate == XML_PARSER_EOF))
394
651k
  return;
395
1.60M
    switch (error) {
396
10.1k
        case XML_ERR_INVALID_HEX_CHARREF:
397
10.1k
            errmsg = "CharRef: invalid hexadecimal value";
398
10.1k
            break;
399
19.3k
        case XML_ERR_INVALID_DEC_CHARREF:
400
19.3k
            errmsg = "CharRef: invalid decimal value";
401
19.3k
            break;
402
0
        case XML_ERR_INVALID_CHARREF:
403
0
            errmsg = "CharRef: invalid value";
404
0
            break;
405
98.4k
        case XML_ERR_INTERNAL_ERROR:
406
98.4k
            errmsg = "internal error";
407
98.4k
            break;
408
0
        case XML_ERR_PEREF_AT_EOF:
409
0
            errmsg = "PEReference at end of document";
410
0
            break;
411
0
        case XML_ERR_PEREF_IN_PROLOG:
412
0
            errmsg = "PEReference in prolog";
413
0
            break;
414
0
        case XML_ERR_PEREF_IN_EPILOG:
415
0
            errmsg = "PEReference in epilog";
416
0
            break;
417
0
        case XML_ERR_PEREF_NO_NAME:
418
0
            errmsg = "PEReference: no name";
419
0
            break;
420
9.51k
        case XML_ERR_PEREF_SEMICOL_MISSING:
421
9.51k
            errmsg = "PEReference: expecting ';'";
422
9.51k
            break;
423
778k
        case XML_ERR_ENTITY_LOOP:
424
778k
            errmsg = "Detected an entity reference loop";
425
778k
            break;
426
0
        case XML_ERR_ENTITY_NOT_STARTED:
427
0
            errmsg = "EntityValue: \" or ' expected";
428
0
            break;
429
178
        case XML_ERR_ENTITY_PE_INTERNAL:
430
178
            errmsg = "PEReferences forbidden in internal subset";
431
178
            break;
432
2.01k
        case XML_ERR_ENTITY_NOT_FINISHED:
433
2.01k
            errmsg = "EntityValue: \" or ' expected";
434
2.01k
            break;
435
16.0k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
436
16.0k
            errmsg = "AttValue: \" or ' expected";
437
16.0k
            break;
438
58.2k
        case XML_ERR_LT_IN_ATTRIBUTE:
439
58.2k
            errmsg = "Unescaped '<' not allowed in attributes values";
440
58.2k
            break;
441
4.01k
        case XML_ERR_LITERAL_NOT_STARTED:
442
4.01k
            errmsg = "SystemLiteral \" or ' expected";
443
4.01k
            break;
444
5.18k
        case XML_ERR_LITERAL_NOT_FINISHED:
445
5.18k
            errmsg = "Unfinished System or Public ID \" or ' expected";
446
5.18k
            break;
447
4.97k
        case XML_ERR_MISPLACED_CDATA_END:
448
4.97k
            errmsg = "Sequence ']]>' not allowed in content";
449
4.97k
            break;
450
3.54k
        case XML_ERR_URI_REQUIRED:
451
3.54k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
452
3.54k
            break;
453
472
        case XML_ERR_PUBID_REQUIRED:
454
472
            errmsg = "PUBLIC, the Public Identifier is missing";
455
472
            break;
456
53.3k
        case XML_ERR_HYPHEN_IN_COMMENT:
457
53.3k
            errmsg = "Comment must not contain '--' (double-hyphen)";
458
53.3k
            break;
459
4.77k
        case XML_ERR_PI_NOT_STARTED:
460
4.77k
            errmsg = "xmlParsePI : no target name";
461
4.77k
            break;
462
442
        case XML_ERR_RESERVED_XML_NAME:
463
442
            errmsg = "Invalid PI name";
464
442
            break;
465
246
        case XML_ERR_NOTATION_NOT_STARTED:
466
246
            errmsg = "NOTATION: Name expected here";
467
246
            break;
468
715
        case XML_ERR_NOTATION_NOT_FINISHED:
469
715
            errmsg = "'>' required to close NOTATION declaration";
470
715
            break;
471
4.71k
        case XML_ERR_VALUE_REQUIRED:
472
4.71k
            errmsg = "Entity value required";
473
4.71k
            break;
474
648
        case XML_ERR_URI_FRAGMENT:
475
648
            errmsg = "Fragment not allowed";
476
648
            break;
477
5.06k
        case XML_ERR_ATTLIST_NOT_STARTED:
478
5.06k
            errmsg = "'(' required to start ATTLIST enumeration";
479
5.06k
            break;
480
234
        case XML_ERR_NMTOKEN_REQUIRED:
481
234
            errmsg = "NmToken expected in ATTLIST enumeration";
482
234
            break;
483
1.04k
        case XML_ERR_ATTLIST_NOT_FINISHED:
484
1.04k
            errmsg = "')' required to finish ATTLIST enumeration";
485
1.04k
            break;
486
1.30k
        case XML_ERR_MIXED_NOT_STARTED:
487
1.30k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
488
1.30k
            break;
489
0
        case XML_ERR_PCDATA_REQUIRED:
490
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
491
0
            break;
492
3.71k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
493
3.71k
            errmsg = "ContentDecl : Name or '(' expected";
494
3.71k
            break;
495
7.94k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
496
7.94k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
497
7.94k
            break;
498
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
499
0
            errmsg =
500
0
                "PEReference: forbidden within markup decl in internal subset";
501
0
            break;
502
85.6k
        case XML_ERR_GT_REQUIRED:
503
85.6k
            errmsg = "expected '>'";
504
85.6k
            break;
505
342
        case XML_ERR_CONDSEC_INVALID:
506
342
            errmsg = "XML conditional section '[' expected";
507
342
            break;
508
17.8k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
509
17.8k
            errmsg = "Content error in the external subset";
510
17.8k
            break;
511
1.21k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
512
1.21k
            errmsg =
513
1.21k
                "conditional section INCLUDE or IGNORE keyword expected";
514
1.21k
            break;
515
1.40k
        case XML_ERR_CONDSEC_NOT_FINISHED:
516
1.40k
            errmsg = "XML conditional section not closed";
517
1.40k
            break;
518
137
        case XML_ERR_XMLDECL_NOT_STARTED:
519
137
            errmsg = "Text declaration '<?xml' required";
520
137
            break;
521
59.9k
        case XML_ERR_XMLDECL_NOT_FINISHED:
522
59.9k
            errmsg = "parsing XML declaration: '?>' expected";
523
59.9k
            break;
524
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
525
0
            errmsg = "external parsed entities cannot be standalone";
526
0
            break;
527
55.2k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
528
55.2k
            errmsg = "EntityRef: expecting ';'";
529
55.2k
            break;
530
52.7k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
531
52.7k
            errmsg = "DOCTYPE improperly terminated";
532
52.7k
            break;
533
0
        case XML_ERR_LTSLASH_REQUIRED:
534
0
            errmsg = "EndTag: '</' not found";
535
0
            break;
536
3.24k
        case XML_ERR_EQUAL_REQUIRED:
537
3.24k
            errmsg = "expected '='";
538
3.24k
            break;
539
14.2k
        case XML_ERR_STRING_NOT_CLOSED:
540
14.2k
            errmsg = "String not closed expecting \" or '";
541
14.2k
            break;
542
3.50k
        case XML_ERR_STRING_NOT_STARTED:
543
3.50k
            errmsg = "String not started expecting ' or \"";
544
3.50k
            break;
545
594
        case XML_ERR_ENCODING_NAME:
546
594
            errmsg = "Invalid XML encoding name";
547
594
            break;
548
1.18k
        case XML_ERR_STANDALONE_VALUE:
549
1.18k
            errmsg = "standalone accepts only 'yes' or 'no'";
550
1.18k
            break;
551
22.1k
        case XML_ERR_DOCUMENT_EMPTY:
552
22.1k
            errmsg = "Document is empty";
553
22.1k
            break;
554
149k
        case XML_ERR_DOCUMENT_END:
555
149k
            errmsg = "Extra content at the end of the document";
556
149k
            break;
557
7.09k
        case XML_ERR_NOT_WELL_BALANCED:
558
7.09k
            errmsg = "chunk is not well balanced";
559
7.09k
            break;
560
0
        case XML_ERR_EXTRA_CONTENT:
561
0
            errmsg = "extra content at the end of well balanced chunk";
562
0
            break;
563
32.0k
        case XML_ERR_VERSION_MISSING:
564
32.0k
            errmsg = "Malformed declaration expecting version";
565
32.0k
            break;
566
53
        case XML_ERR_NAME_TOO_LONG:
567
53
            errmsg = "Name too long";
568
53
            break;
569
#if 0
570
        case:
571
            errmsg = "";
572
            break;
573
#endif
574
458
        default:
575
458
            errmsg = "Unregistered error message";
576
1.60M
    }
577
1.60M
    if (ctxt != NULL)
578
1.60M
  ctxt->errNo = error;
579
1.60M
    if (info == NULL) {
580
1.50M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581
1.50M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
582
1.50M
                        errmsg);
583
1.50M
    } else {
584
98.4k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
585
98.4k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
586
98.4k
                        errmsg, info);
587
98.4k
    }
588
1.60M
    if (ctxt != NULL) {
589
1.60M
  ctxt->wellFormed = 0;
590
1.60M
  if (ctxt->recovery == 0)
591
1.12M
      ctxt->disableSAX = 1;
592
1.60M
    }
593
1.60M
}
594
595
/**
596
 * xmlFatalErrMsg:
597
 * @ctxt:  an XML parser context
598
 * @error:  the error number
599
 * @msg:  the error message
600
 *
601
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
602
 */
603
static void LIBXML_ATTR_FORMAT(3,0)
604
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
605
               const char *msg)
606
3.14M
{
607
3.14M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
608
3.14M
        (ctxt->instate == XML_PARSER_EOF))
609
0
  return;
610
3.14M
    if (ctxt != NULL)
611
3.14M
  ctxt->errNo = error;
612
3.14M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
613
3.14M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
614
3.14M
    if (ctxt != NULL) {
615
3.14M
  ctxt->wellFormed = 0;
616
3.14M
  if (ctxt->recovery == 0)
617
1.45M
      ctxt->disableSAX = 1;
618
3.14M
    }
619
3.14M
}
620
621
/**
622
 * xmlWarningMsg:
623
 * @ctxt:  an XML parser context
624
 * @error:  the error number
625
 * @msg:  the error message
626
 * @str1:  extra data
627
 * @str2:  extra data
628
 *
629
 * Handle a warning.
630
 */
631
static void LIBXML_ATTR_FORMAT(3,0)
632
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633
              const char *msg, const xmlChar *str1, const xmlChar *str2)
634
92.1k
{
635
92.1k
    xmlStructuredErrorFunc schannel = NULL;
636
637
92.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638
92.1k
        (ctxt->instate == XML_PARSER_EOF))
639
0
  return;
640
92.1k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
641
92.1k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
642
76.7k
        schannel = ctxt->sax->serror;
643
92.1k
    if (ctxt != NULL) {
644
92.1k
        __xmlRaiseError(schannel,
645
92.1k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
646
92.1k
                    ctxt->userData,
647
92.1k
                    ctxt, NULL, XML_FROM_PARSER, error,
648
92.1k
                    XML_ERR_WARNING, NULL, 0,
649
92.1k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
650
92.1k
        msg, (const char *) str1, (const char *) str2);
651
92.1k
    } else {
652
0
        __xmlRaiseError(schannel, NULL, NULL,
653
0
                    ctxt, NULL, XML_FROM_PARSER, error,
654
0
                    XML_ERR_WARNING, NULL, 0,
655
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
656
0
        msg, (const char *) str1, (const char *) str2);
657
0
    }
658
92.1k
}
659
660
/**
661
 * xmlValidityError:
662
 * @ctxt:  an XML parser context
663
 * @error:  the error number
664
 * @msg:  the error message
665
 * @str1:  extra data
666
 *
667
 * Handle a validity error.
668
 */
669
static void LIBXML_ATTR_FORMAT(3,0)
670
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
671
              const char *msg, const xmlChar *str1, const xmlChar *str2)
672
8.01k
{
673
8.01k
    xmlStructuredErrorFunc schannel = NULL;
674
675
8.01k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676
8.01k
        (ctxt->instate == XML_PARSER_EOF))
677
0
  return;
678
8.01k
    if (ctxt != NULL) {
679
8.01k
  ctxt->errNo = error;
680
8.01k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
681
6.91k
      schannel = ctxt->sax->serror;
682
8.01k
    }
683
8.01k
    if (ctxt != NULL) {
684
8.01k
        __xmlRaiseError(schannel,
685
8.01k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
686
8.01k
                    ctxt, NULL, XML_FROM_DTD, error,
687
8.01k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
688
8.01k
        (const char *) str2, NULL, 0, 0,
689
8.01k
        msg, (const char *) str1, (const char *) str2);
690
8.01k
  ctxt->valid = 0;
691
8.01k
    } else {
692
0
        __xmlRaiseError(schannel, NULL, NULL,
693
0
                    ctxt, NULL, XML_FROM_DTD, error,
694
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
695
0
        (const char *) str2, NULL, 0, 0,
696
0
        msg, (const char *) str1, (const char *) str2);
697
0
    }
698
8.01k
}
699
700
/**
701
 * xmlFatalErrMsgInt:
702
 * @ctxt:  an XML parser context
703
 * @error:  the error number
704
 * @msg:  the error message
705
 * @val:  an integer value
706
 *
707
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
708
 */
709
static void LIBXML_ATTR_FORMAT(3,0)
710
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
711
                  const char *msg, int val)
712
5.36M
{
713
5.36M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714
5.36M
        (ctxt->instate == XML_PARSER_EOF))
715
0
  return;
716
5.36M
    if (ctxt != NULL)
717
5.36M
  ctxt->errNo = error;
718
5.36M
    __xmlRaiseError(NULL, NULL, NULL,
719
5.36M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
720
5.36M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
721
5.36M
    if (ctxt != NULL) {
722
5.36M
  ctxt->wellFormed = 0;
723
5.36M
  if (ctxt->recovery == 0)
724
2.34M
      ctxt->disableSAX = 1;
725
5.36M
    }
726
5.36M
}
727
728
/**
729
 * xmlFatalErrMsgStrIntStr:
730
 * @ctxt:  an XML parser context
731
 * @error:  the error number
732
 * @msg:  the error message
733
 * @str1:  an string info
734
 * @val:  an integer value
735
 * @str2:  an string info
736
 *
737
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
738
 */
739
static void LIBXML_ATTR_FORMAT(3,0)
740
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
741
                  const char *msg, const xmlChar *str1, int val,
742
      const xmlChar *str2)
743
461k
{
744
461k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
745
461k
        (ctxt->instate == XML_PARSER_EOF))
746
0
  return;
747
461k
    if (ctxt != NULL)
748
461k
  ctxt->errNo = error;
749
461k
    __xmlRaiseError(NULL, NULL, NULL,
750
461k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
751
461k
                    NULL, 0, (const char *) str1, (const char *) str2,
752
461k
        NULL, val, 0, msg, str1, val, str2);
753
461k
    if (ctxt != NULL) {
754
461k
  ctxt->wellFormed = 0;
755
461k
  if (ctxt->recovery == 0)
756
135k
      ctxt->disableSAX = 1;
757
461k
    }
758
461k
}
759
760
/**
761
 * xmlFatalErrMsgStr:
762
 * @ctxt:  an XML parser context
763
 * @error:  the error number
764
 * @msg:  the error message
765
 * @val:  a string value
766
 *
767
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
768
 */
769
static void LIBXML_ATTR_FORMAT(3,0)
770
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
771
                  const char *msg, const xmlChar * val)
772
739k
{
773
739k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
774
739k
        (ctxt->instate == XML_PARSER_EOF))
775
0
  return;
776
739k
    if (ctxt != NULL)
777
739k
  ctxt->errNo = error;
778
739k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
779
739k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
780
739k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
781
739k
                    val);
782
739k
    if (ctxt != NULL) {
783
739k
  ctxt->wellFormed = 0;
784
739k
  if (ctxt->recovery == 0)
785
292k
      ctxt->disableSAX = 1;
786
739k
    }
787
739k
}
788
789
/**
790
 * xmlErrMsgStr:
791
 * @ctxt:  an XML parser context
792
 * @error:  the error number
793
 * @msg:  the error message
794
 * @val:  a string value
795
 *
796
 * Handle a non fatal parser error
797
 */
798
static void LIBXML_ATTR_FORMAT(3,0)
799
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
800
                  const char *msg, const xmlChar * val)
801
42.9k
{
802
42.9k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
803
42.9k
        (ctxt->instate == XML_PARSER_EOF))
804
0
  return;
805
42.9k
    if (ctxt != NULL)
806
42.9k
  ctxt->errNo = error;
807
42.9k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
808
42.9k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
809
42.9k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
810
42.9k
                    val);
811
42.9k
}
812
813
/**
814
 * xmlNsErr:
815
 * @ctxt:  an XML parser context
816
 * @error:  the error number
817
 * @msg:  the message
818
 * @info1:  extra information string
819
 * @info2:  extra information string
820
 *
821
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
822
 */
823
static void LIBXML_ATTR_FORMAT(3,0)
824
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
825
         const char *msg,
826
         const xmlChar * info1, const xmlChar * info2,
827
         const xmlChar * info3)
828
227k
{
829
227k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
830
227k
        (ctxt->instate == XML_PARSER_EOF))
831
0
  return;
832
227k
    if (ctxt != NULL)
833
227k
  ctxt->errNo = error;
834
227k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
835
227k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
836
227k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
837
227k
                    info1, info2, info3);
838
227k
    if (ctxt != NULL)
839
227k
  ctxt->nsWellFormed = 0;
840
227k
}
841
842
/**
843
 * xmlNsWarn
844
 * @ctxt:  an XML parser context
845
 * @error:  the error number
846
 * @msg:  the message
847
 * @info1:  extra information string
848
 * @info2:  extra information string
849
 *
850
 * Handle a namespace warning error
851
 */
852
static void LIBXML_ATTR_FORMAT(3,0)
853
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
854
         const char *msg,
855
         const xmlChar * info1, const xmlChar * info2,
856
         const xmlChar * info3)
857
2.49k
{
858
2.49k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
859
2.49k
        (ctxt->instate == XML_PARSER_EOF))
860
0
  return;
861
2.49k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
862
2.49k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
863
2.49k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
864
2.49k
                    info1, info2, info3);
865
2.49k
}
866
867
/************************************************************************
868
 *                  *
869
 *    Library wide options          *
870
 *                  *
871
 ************************************************************************/
872
873
/**
874
  * xmlHasFeature:
875
  * @feature: the feature to be examined
876
  *
877
  * Examines if the library has been compiled with a given feature.
878
  *
879
  * Returns a non-zero value if the feature exist, otherwise zero.
880
  * Returns zero (0) if the feature does not exist or an unknown
881
  * unknown feature is requested, non-zero otherwise.
882
  */
883
int
884
xmlHasFeature(xmlFeature feature)
885
0
{
886
0
    switch (feature) {
887
0
  case XML_WITH_THREAD:
888
0
#ifdef LIBXML_THREAD_ENABLED
889
0
      return(1);
890
#else
891
      return(0);
892
#endif
893
0
        case XML_WITH_TREE:
894
0
#ifdef LIBXML_TREE_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_OUTPUT:
900
0
#ifdef LIBXML_OUTPUT_ENABLED
901
0
            return(1);
902
#else
903
            return(0);
904
#endif
905
0
        case XML_WITH_PUSH:
906
0
#ifdef LIBXML_PUSH_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_READER:
912
0
#ifdef LIBXML_READER_ENABLED
913
0
            return(1);
914
#else
915
            return(0);
916
#endif
917
0
        case XML_WITH_PATTERN:
918
0
#ifdef LIBXML_PATTERN_ENABLED
919
0
            return(1);
920
#else
921
            return(0);
922
#endif
923
0
        case XML_WITH_WRITER:
924
0
#ifdef LIBXML_WRITER_ENABLED
925
0
            return(1);
926
#else
927
            return(0);
928
#endif
929
0
        case XML_WITH_SAX1:
930
0
#ifdef LIBXML_SAX1_ENABLED
931
0
            return(1);
932
#else
933
            return(0);
934
#endif
935
0
        case XML_WITH_FTP:
936
#ifdef LIBXML_FTP_ENABLED
937
            return(1);
938
#else
939
0
            return(0);
940
0
#endif
941
0
        case XML_WITH_HTTP:
942
#ifdef LIBXML_HTTP_ENABLED
943
            return(1);
944
#else
945
0
            return(0);
946
0
#endif
947
0
        case XML_WITH_VALID:
948
0
#ifdef LIBXML_VALID_ENABLED
949
0
            return(1);
950
#else
951
            return(0);
952
#endif
953
0
        case XML_WITH_HTML:
954
0
#ifdef LIBXML_HTML_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_LEGACY:
960
#ifdef LIBXML_LEGACY_ENABLED
961
            return(1);
962
#else
963
0
            return(0);
964
0
#endif
965
0
        case XML_WITH_C14N:
966
0
#ifdef LIBXML_C14N_ENABLED
967
0
            return(1);
968
#else
969
            return(0);
970
#endif
971
0
        case XML_WITH_CATALOG:
972
0
#ifdef LIBXML_CATALOG_ENABLED
973
0
            return(1);
974
#else
975
            return(0);
976
#endif
977
0
        case XML_WITH_XPATH:
978
0
#ifdef LIBXML_XPATH_ENABLED
979
0
            return(1);
980
#else
981
            return(0);
982
#endif
983
0
        case XML_WITH_XPTR:
984
0
#ifdef LIBXML_XPTR_ENABLED
985
0
            return(1);
986
#else
987
            return(0);
988
#endif
989
0
        case XML_WITH_XINCLUDE:
990
0
#ifdef LIBXML_XINCLUDE_ENABLED
991
0
            return(1);
992
#else
993
            return(0);
994
#endif
995
0
        case XML_WITH_ICONV:
996
0
#ifdef LIBXML_ICONV_ENABLED
997
0
            return(1);
998
#else
999
            return(0);
1000
#endif
1001
0
        case XML_WITH_ISO8859X:
1002
0
#ifdef LIBXML_ISO8859X_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_UNICODE:
1008
0
#ifdef LIBXML_UNICODE_ENABLED
1009
0
            return(1);
1010
#else
1011
            return(0);
1012
#endif
1013
0
        case XML_WITH_REGEXP:
1014
0
#ifdef LIBXML_REGEXP_ENABLED
1015
0
            return(1);
1016
#else
1017
            return(0);
1018
#endif
1019
0
        case XML_WITH_AUTOMATA:
1020
0
#ifdef LIBXML_AUTOMATA_ENABLED
1021
0
            return(1);
1022
#else
1023
            return(0);
1024
#endif
1025
0
        case XML_WITH_EXPR:
1026
#ifdef LIBXML_EXPR_ENABLED
1027
            return(1);
1028
#else
1029
0
            return(0);
1030
0
#endif
1031
0
        case XML_WITH_SCHEMAS:
1032
0
#ifdef LIBXML_SCHEMAS_ENABLED
1033
0
            return(1);
1034
#else
1035
            return(0);
1036
#endif
1037
0
        case XML_WITH_SCHEMATRON:
1038
0
#ifdef LIBXML_SCHEMATRON_ENABLED
1039
0
            return(1);
1040
#else
1041
            return(0);
1042
#endif
1043
0
        case XML_WITH_MODULES:
1044
0
#ifdef LIBXML_MODULES_ENABLED
1045
0
            return(1);
1046
#else
1047
            return(0);
1048
#endif
1049
0
        case XML_WITH_DEBUG:
1050
#ifdef LIBXML_DEBUG_ENABLED
1051
            return(1);
1052
#else
1053
0
            return(0);
1054
0
#endif
1055
0
        case XML_WITH_DEBUG_MEM:
1056
#ifdef DEBUG_MEMORY_LOCATION
1057
            return(1);
1058
#else
1059
0
            return(0);
1060
0
#endif
1061
0
        case XML_WITH_DEBUG_RUN:
1062
0
            return(0);
1063
0
        case XML_WITH_ZLIB:
1064
0
#ifdef LIBXML_ZLIB_ENABLED
1065
0
            return(1);
1066
#else
1067
            return(0);
1068
#endif
1069
0
        case XML_WITH_LZMA:
1070
0
#ifdef LIBXML_LZMA_ENABLED
1071
0
            return(1);
1072
#else
1073
            return(0);
1074
#endif
1075
0
        case XML_WITH_ICU:
1076
#ifdef LIBXML_ICU_ENABLED
1077
            return(1);
1078
#else
1079
0
            return(0);
1080
0
#endif
1081
0
        default:
1082
0
      break;
1083
0
     }
1084
0
     return(0);
1085
0
}
1086
1087
/************************************************************************
1088
 *                  *
1089
 *    SAX2 defaulted attributes handling      *
1090
 *                  *
1091
 ************************************************************************/
1092
1093
/**
1094
 * xmlDetectSAX2:
1095
 * @ctxt:  an XML parser context
1096
 *
1097
 * Do the SAX2 detection and specific initialization
1098
 */
1099
static void
1100
1.63M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1101
1.63M
    xmlSAXHandlerPtr sax;
1102
1103
    /* Avoid unused variable warning if features are disabled. */
1104
1.63M
    (void) sax;
1105
1106
1.63M
    if (ctxt == NULL) return;
1107
1.63M
    sax = ctxt->sax;
1108
1.63M
#ifdef LIBXML_SAX1_ENABLED
1109
1.63M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1110
1.63M
        ((sax->startElementNs != NULL) ||
1111
1.09M
         (sax->endElementNs != NULL) ||
1112
1.09M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1113
1.09M
        ctxt->sax2 = 1;
1114
#else
1115
    ctxt->sax2 = 1;
1116
#endif /* LIBXML_SAX1_ENABLED */
1117
1118
1.63M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1119
1.63M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1120
1.63M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1121
1.63M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1122
1.63M
    (ctxt->str_xml_ns == NULL)) {
1123
0
        xmlErrMemory(ctxt, NULL);
1124
0
    }
1125
1.63M
}
1126
1127
typedef struct _xmlDefAttrs xmlDefAttrs;
1128
typedef xmlDefAttrs *xmlDefAttrsPtr;
1129
struct _xmlDefAttrs {
1130
    int nbAttrs;  /* number of defaulted attributes on that element */
1131
    int maxAttrs;       /* the size of the array */
1132
#if __STDC_VERSION__ >= 199901L
1133
    /* Using a C99 flexible array member avoids UBSan errors. */
1134
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1135
#else
1136
    const xmlChar *values[5];
1137
#endif
1138
};
1139
1140
/**
1141
 * xmlAttrNormalizeSpace:
1142
 * @src: the source string
1143
 * @dst: the target string
1144
 *
1145
 * Normalize the space in non CDATA attribute values:
1146
 * If the attribute type is not CDATA, then the XML processor MUST further
1147
 * process the normalized attribute value by discarding any leading and
1148
 * trailing space (#x20) characters, and by replacing sequences of space
1149
 * (#x20) characters by a single space (#x20) character.
1150
 * Note that the size of dst need to be at least src, and if one doesn't need
1151
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1152
 * passing src as dst is just fine.
1153
 *
1154
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1155
 *         is needed.
1156
 */
1157
static xmlChar *
1158
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1159
72.7k
{
1160
72.7k
    if ((src == NULL) || (dst == NULL))
1161
0
        return(NULL);
1162
1163
82.8k
    while (*src == 0x20) src++;
1164
1.64M
    while (*src != 0) {
1165
1.57M
  if (*src == 0x20) {
1166
259k
      while (*src == 0x20) src++;
1167
77.9k
      if (*src != 0)
1168
65.0k
    *dst++ = 0x20;
1169
1.49M
  } else {
1170
1.49M
      *dst++ = *src++;
1171
1.49M
  }
1172
1.57M
    }
1173
72.7k
    *dst = 0;
1174
72.7k
    if (dst == src)
1175
53.3k
       return(NULL);
1176
19.3k
    return(dst);
1177
72.7k
}
1178
1179
/**
1180
 * xmlAttrNormalizeSpace2:
1181
 * @src: the source string
1182
 *
1183
 * Normalize the space in non CDATA attribute values, a slightly more complex
1184
 * front end to avoid allocation problems when running on attribute values
1185
 * coming from the input.
1186
 *
1187
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1188
 *         is needed.
1189
 */
1190
static const xmlChar *
1191
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1192
50.2k
{
1193
50.2k
    int i;
1194
50.2k
    int remove_head = 0;
1195
50.2k
    int need_realloc = 0;
1196
50.2k
    const xmlChar *cur;
1197
1198
50.2k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1199
0
        return(NULL);
1200
50.2k
    i = *len;
1201
50.2k
    if (i <= 0)
1202
1.98k
        return(NULL);
1203
1204
48.2k
    cur = src;
1205
69.3k
    while (*cur == 0x20) {
1206
21.1k
        cur++;
1207
21.1k
  remove_head++;
1208
21.1k
    }
1209
839k
    while (*cur != 0) {
1210
801k
  if (*cur == 0x20) {
1211
36.3k
      cur++;
1212
36.3k
      if ((*cur == 0x20) || (*cur == 0)) {
1213
10.6k
          need_realloc = 1;
1214
10.6k
    break;
1215
10.6k
      }
1216
36.3k
  } else
1217
765k
      cur++;
1218
801k
    }
1219
48.2k
    if (need_realloc) {
1220
10.6k
        xmlChar *ret;
1221
1222
10.6k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1223
10.6k
  if (ret == NULL) {
1224
0
      xmlErrMemory(ctxt, NULL);
1225
0
      return(NULL);
1226
0
  }
1227
10.6k
  xmlAttrNormalizeSpace(ret, ret);
1228
10.6k
  *len = strlen((const char *)ret);
1229
10.6k
        return(ret);
1230
37.5k
    } else if (remove_head) {
1231
998
        *len -= remove_head;
1232
998
        memmove(src, src + remove_head, 1 + *len);
1233
998
  return(src);
1234
998
    }
1235
36.5k
    return(NULL);
1236
48.2k
}
1237
1238
/**
1239
 * xmlAddDefAttrs:
1240
 * @ctxt:  an XML parser context
1241
 * @fullname:  the element fullname
1242
 * @fullattr:  the attribute fullname
1243
 * @value:  the attribute value
1244
 *
1245
 * Add a defaulted attribute for an element
1246
 */
1247
static void
1248
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1249
               const xmlChar *fullname,
1250
               const xmlChar *fullattr,
1251
84.5k
               const xmlChar *value) {
1252
84.5k
    xmlDefAttrsPtr defaults;
1253
84.5k
    int len;
1254
84.5k
    const xmlChar *name;
1255
84.5k
    const xmlChar *prefix;
1256
1257
    /*
1258
     * Allows to detect attribute redefinitions
1259
     */
1260
84.5k
    if (ctxt->attsSpecial != NULL) {
1261
55.3k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1262
7.02k
      return;
1263
55.3k
    }
1264
1265
77.5k
    if (ctxt->attsDefault == NULL) {
1266
33.1k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1267
33.1k
  if (ctxt->attsDefault == NULL)
1268
0
      goto mem_error;
1269
33.1k
    }
1270
1271
    /*
1272
     * split the element name into prefix:localname , the string found
1273
     * are within the DTD and then not associated to namespace names.
1274
     */
1275
77.5k
    name = xmlSplitQName3(fullname, &len);
1276
77.5k
    if (name == NULL) {
1277
59.7k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1278
59.7k
  prefix = NULL;
1279
59.7k
    } else {
1280
17.8k
        name = xmlDictLookup(ctxt->dict, name, -1);
1281
17.8k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1282
17.8k
    }
1283
1284
    /*
1285
     * make sure there is some storage
1286
     */
1287
77.5k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1288
77.5k
    if (defaults == NULL) {
1289
46.2k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1290
46.2k
                     (4 * 5) * sizeof(const xmlChar *));
1291
46.2k
  if (defaults == NULL)
1292
0
      goto mem_error;
1293
46.2k
  defaults->nbAttrs = 0;
1294
46.2k
  defaults->maxAttrs = 4;
1295
46.2k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1296
46.2k
                          defaults, NULL) < 0) {
1297
0
      xmlFree(defaults);
1298
0
      goto mem_error;
1299
0
  }
1300
46.2k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1301
2.99k
        xmlDefAttrsPtr temp;
1302
1303
2.99k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1304
2.99k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1305
2.99k
  if (temp == NULL)
1306
0
      goto mem_error;
1307
2.99k
  defaults = temp;
1308
2.99k
  defaults->maxAttrs *= 2;
1309
2.99k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1310
2.99k
                          defaults, NULL) < 0) {
1311
0
      xmlFree(defaults);
1312
0
      goto mem_error;
1313
0
  }
1314
2.99k
    }
1315
1316
    /*
1317
     * Split the element name into prefix:localname , the string found
1318
     * are within the DTD and hen not associated to namespace names.
1319
     */
1320
77.5k
    name = xmlSplitQName3(fullattr, &len);
1321
77.5k
    if (name == NULL) {
1322
55.6k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1323
55.6k
  prefix = NULL;
1324
55.6k
    } else {
1325
21.8k
        name = xmlDictLookup(ctxt->dict, name, -1);
1326
21.8k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1327
21.8k
    }
1328
1329
77.5k
    defaults->values[5 * defaults->nbAttrs] = name;
1330
77.5k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1331
    /* intern the string and precompute the end */
1332
77.5k
    len = xmlStrlen(value);
1333
77.5k
    value = xmlDictLookup(ctxt->dict, value, len);
1334
77.5k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1335
77.5k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1336
77.5k
    if (ctxt->external)
1337
4.55k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1338
72.9k
    else
1339
72.9k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1340
77.5k
    defaults->nbAttrs++;
1341
1342
77.5k
    return;
1343
1344
0
mem_error:
1345
0
    xmlErrMemory(ctxt, NULL);
1346
0
    return;
1347
77.5k
}
1348
1349
/**
1350
 * xmlAddSpecialAttr:
1351
 * @ctxt:  an XML parser context
1352
 * @fullname:  the element fullname
1353
 * @fullattr:  the attribute fullname
1354
 * @type:  the attribute type
1355
 *
1356
 * Register this attribute type
1357
 */
1358
static void
1359
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1360
      const xmlChar *fullname,
1361
      const xmlChar *fullattr,
1362
      int type)
1363
781k
{
1364
781k
    if (ctxt->attsSpecial == NULL) {
1365
63.5k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1366
63.5k
  if (ctxt->attsSpecial == NULL)
1367
0
      goto mem_error;
1368
63.5k
    }
1369
1370
781k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1371
11.4k
        return;
1372
1373
770k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1374
770k
                     (void *) (ptrdiff_t) type);
1375
770k
    return;
1376
1377
0
mem_error:
1378
0
    xmlErrMemory(ctxt, NULL);
1379
0
    return;
1380
781k
}
1381
1382
/**
1383
 * xmlCleanSpecialAttrCallback:
1384
 *
1385
 * Removes CDATA attributes from the special attribute table
1386
 */
1387
static void
1388
xmlCleanSpecialAttrCallback(void *payload, void *data,
1389
                            const xmlChar *fullname, const xmlChar *fullattr,
1390
765k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1391
765k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1392
1393
765k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1394
379k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1395
379k
    }
1396
765k
}
1397
1398
/**
1399
 * xmlCleanSpecialAttr:
1400
 * @ctxt:  an XML parser context
1401
 *
1402
 * Trim the list of attributes defined to remove all those of type
1403
 * CDATA as they are not special. This call should be done when finishing
1404
 * to parse the DTD and before starting to parse the document root.
1405
 */
1406
static void
1407
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1408
245k
{
1409
245k
    if (ctxt->attsSpecial == NULL)
1410
183k
        return;
1411
1412
62.1k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1413
1414
62.1k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1415
9.31k
        xmlHashFree(ctxt->attsSpecial, NULL);
1416
9.31k
        ctxt->attsSpecial = NULL;
1417
9.31k
    }
1418
62.1k
    return;
1419
245k
}
1420
1421
/**
1422
 * xmlCheckLanguageID:
1423
 * @lang:  pointer to the string value
1424
 *
1425
 * Checks that the value conforms to the LanguageID production:
1426
 *
1427
 * NOTE: this is somewhat deprecated, those productions were removed from
1428
 *       the XML Second edition.
1429
 *
1430
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1431
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1432
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1433
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1434
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1435
 * [38] Subcode ::= ([a-z] | [A-Z])+
1436
 *
1437
 * The current REC reference the successors of RFC 1766, currently 5646
1438
 *
1439
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1440
 * langtag       = language
1441
 *                 ["-" script]
1442
 *                 ["-" region]
1443
 *                 *("-" variant)
1444
 *                 *("-" extension)
1445
 *                 ["-" privateuse]
1446
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1447
 *                 ["-" extlang]       ; sometimes followed by
1448
 *                                     ; extended language subtags
1449
 *               / 4ALPHA              ; or reserved for future use
1450
 *               / 5*8ALPHA            ; or registered language subtag
1451
 *
1452
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1453
 *                 *2("-" 3ALPHA)      ; permanently reserved
1454
 *
1455
 * script        = 4ALPHA              ; ISO 15924 code
1456
 *
1457
 * region        = 2ALPHA              ; ISO 3166-1 code
1458
 *               / 3DIGIT              ; UN M.49 code
1459
 *
1460
 * variant       = 5*8alphanum         ; registered variants
1461
 *               / (DIGIT 3alphanum)
1462
 *
1463
 * extension     = singleton 1*("-" (2*8alphanum))
1464
 *
1465
 *                                     ; Single alphanumerics
1466
 *                                     ; "x" reserved for private use
1467
 * singleton     = DIGIT               ; 0 - 9
1468
 *               / %x41-57             ; A - W
1469
 *               / %x59-5A             ; Y - Z
1470
 *               / %x61-77             ; a - w
1471
 *               / %x79-7A             ; y - z
1472
 *
1473
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1474
 * The parser below doesn't try to cope with extension or privateuse
1475
 * that could be added but that's not interoperable anyway
1476
 *
1477
 * Returns 1 if correct 0 otherwise
1478
 **/
1479
int
1480
xmlCheckLanguageID(const xmlChar * lang)
1481
9.47k
{
1482
9.47k
    const xmlChar *cur = lang, *nxt;
1483
1484
9.47k
    if (cur == NULL)
1485
360
        return (0);
1486
9.11k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1487
9.11k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1488
9.11k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1489
9.11k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1490
        /*
1491
         * Still allow IANA code and user code which were coming
1492
         * from the previous version of the XML-1.0 specification
1493
         * it's deprecated but we should not fail
1494
         */
1495
439
        cur += 2;
1496
15.0k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1497
15.0k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1498
14.5k
            cur++;
1499
439
        return(cur[0] == 0);
1500
439
    }
1501
8.67k
    nxt = cur;
1502
36.8k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1503
36.8k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1504
28.1k
           nxt++;
1505
8.67k
    if (nxt - cur >= 4) {
1506
        /*
1507
         * Reserved
1508
         */
1509
653
        if ((nxt - cur > 8) || (nxt[0] != 0))
1510
433
            return(0);
1511
220
        return(1);
1512
653
    }
1513
8.02k
    if (nxt - cur < 2)
1514
373
        return(0);
1515
    /* we got an ISO 639 code */
1516
7.64k
    if (nxt[0] == 0)
1517
3.57k
        return(1);
1518
4.07k
    if (nxt[0] != '-')
1519
414
        return(0);
1520
1521
3.66k
    nxt++;
1522
3.66k
    cur = nxt;
1523
    /* now we can have extlang or script or region or variant */
1524
3.66k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1525
564
        goto region_m49;
1526
1527
48.0k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1528
48.0k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1529
44.9k
           nxt++;
1530
3.10k
    if (nxt - cur == 4)
1531
854
        goto script;
1532
2.24k
    if (nxt - cur == 2)
1533
475
        goto region;
1534
1.77k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535
183
        goto variant;
1536
1.58k
    if (nxt - cur != 3)
1537
343
        return(0);
1538
    /* we parsed an extlang */
1539
1.24k
    if (nxt[0] == 0)
1540
141
        return(1);
1541
1.10k
    if (nxt[0] != '-')
1542
199
        return(0);
1543
1544
905
    nxt++;
1545
905
    cur = nxt;
1546
    /* now we can have script or region or variant */
1547
905
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1548
159
        goto region_m49;
1549
1550
7.73k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1551
7.73k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1552
6.99k
           nxt++;
1553
746
    if (nxt - cur == 2)
1554
216
        goto region;
1555
530
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1556
143
        goto variant;
1557
387
    if (nxt - cur != 4)
1558
232
        return(0);
1559
    /* we parsed a script */
1560
1.00k
script:
1561
1.00k
    if (nxt[0] == 0)
1562
112
        return(1);
1563
897
    if (nxt[0] != '-')
1564
182
        return(0);
1565
1566
715
    nxt++;
1567
715
    cur = nxt;
1568
    /* now we can have region or variant */
1569
715
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1570
146
        goto region_m49;
1571
1572
5.76k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1573
5.76k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1574
5.19k
           nxt++;
1575
1576
569
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1577
155
        goto variant;
1578
414
    if (nxt - cur != 2)
1579
250
        return(0);
1580
    /* we parsed a region */
1581
1.35k
region:
1582
1.35k
    if (nxt[0] == 0)
1583
214
        return(1);
1584
1.14k
    if (nxt[0] != '-')
1585
622
        return(0);
1586
1587
522
    nxt++;
1588
522
    cur = nxt;
1589
    /* now we can just have a variant */
1590
6.02k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1591
6.02k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1592
5.50k
           nxt++;
1593
1594
522
    if ((nxt - cur < 5) || (nxt - cur > 8))
1595
343
        return(0);
1596
1597
    /* we parsed a variant */
1598
660
variant:
1599
660
    if (nxt[0] == 0)
1600
181
        return(1);
1601
479
    if (nxt[0] != '-')
1602
385
        return(0);
1603
    /* extensions and private use subtags not checked */
1604
94
    return (1);
1605
1606
869
region_m49:
1607
869
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1608
869
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1609
503
        nxt += 3;
1610
503
        goto region;
1611
503
    }
1612
366
    return(0);
1613
869
}
1614
1615
/************************************************************************
1616
 *                  *
1617
 *    Parser stacks related functions and macros    *
1618
 *                  *
1619
 ************************************************************************/
1620
1621
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1622
                                            const xmlChar ** str);
1623
1624
#ifdef SAX2
1625
/**
1626
 * nsPush:
1627
 * @ctxt:  an XML parser context
1628
 * @prefix:  the namespace prefix or NULL
1629
 * @URL:  the namespace name
1630
 *
1631
 * Pushes a new parser namespace on top of the ns stack
1632
 *
1633
 * Returns -1 in case of error, -2 if the namespace should be discarded
1634
 *     and the index in the stack otherwise.
1635
 */
1636
static int
1637
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1638
218k
{
1639
218k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1640
120k
        int i;
1641
160k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1642
56.7k
      if (ctxt->nsTab[i] == prefix) {
1643
    /* in scope */
1644
16.2k
          if (ctxt->nsTab[i + 1] == URL)
1645
10.9k
        return(-2);
1646
    /* out of scope keep it */
1647
5.35k
    break;
1648
16.2k
      }
1649
56.7k
  }
1650
120k
    }
1651
207k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1652
147k
  ctxt->nsMax = 10;
1653
147k
  ctxt->nsNr = 0;
1654
147k
  ctxt->nsTab = (const xmlChar **)
1655
147k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1656
147k
  if (ctxt->nsTab == NULL) {
1657
0
      xmlErrMemory(ctxt, NULL);
1658
0
      ctxt->nsMax = 0;
1659
0
            return (-1);
1660
0
  }
1661
147k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1662
1.40k
        const xmlChar ** tmp;
1663
1.40k
        ctxt->nsMax *= 2;
1664
1.40k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1665
1.40k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1666
1.40k
        if (tmp == NULL) {
1667
0
            xmlErrMemory(ctxt, NULL);
1668
0
      ctxt->nsMax /= 2;
1669
0
            return (-1);
1670
0
        }
1671
1.40k
  ctxt->nsTab = tmp;
1672
1.40k
    }
1673
207k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1674
207k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1675
207k
    return (ctxt->nsNr);
1676
207k
}
1677
/**
1678
 * nsPop:
1679
 * @ctxt: an XML parser context
1680
 * @nr:  the number to pop
1681
 *
1682
 * Pops the top @nr parser prefix/namespace from the ns stack
1683
 *
1684
 * Returns the number of namespaces removed
1685
 */
1686
static int
1687
nsPop(xmlParserCtxtPtr ctxt, int nr)
1688
67.8k
{
1689
67.8k
    int i;
1690
1691
67.8k
    if (ctxt->nsTab == NULL) return(0);
1692
67.8k
    if (ctxt->nsNr < nr) {
1693
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1694
0
        nr = ctxt->nsNr;
1695
0
    }
1696
67.8k
    if (ctxt->nsNr <= 0)
1697
0
        return (0);
1698
1699
225k
    for (i = 0;i < nr;i++) {
1700
157k
         ctxt->nsNr--;
1701
157k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1702
157k
    }
1703
67.8k
    return(nr);
1704
67.8k
}
1705
#endif
1706
1707
static int
1708
180k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1709
180k
    const xmlChar **atts;
1710
180k
    int *attallocs;
1711
180k
    int maxatts;
1712
1713
180k
    if (ctxt->atts == NULL) {
1714
180k
  maxatts = 55; /* allow for 10 attrs by default */
1715
180k
  atts = (const xmlChar **)
1716
180k
         xmlMalloc(maxatts * sizeof(xmlChar *));
1717
180k
  if (atts == NULL) goto mem_error;
1718
180k
  ctxt->atts = atts;
1719
180k
  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1720
180k
  if (attallocs == NULL) goto mem_error;
1721
180k
  ctxt->attallocs = attallocs;
1722
180k
  ctxt->maxatts = maxatts;
1723
180k
    } else if (nr + 5 > ctxt->maxatts) {
1724
211
  maxatts = (nr + 5) * 2;
1725
211
  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1726
211
             maxatts * sizeof(const xmlChar *));
1727
211
  if (atts == NULL) goto mem_error;
1728
211
  ctxt->atts = atts;
1729
211
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1730
211
                               (maxatts / 5) * sizeof(int));
1731
211
  if (attallocs == NULL) goto mem_error;
1732
211
  ctxt->attallocs = attallocs;
1733
211
  ctxt->maxatts = maxatts;
1734
211
    }
1735
180k
    return(ctxt->maxatts);
1736
0
mem_error:
1737
0
    xmlErrMemory(ctxt, NULL);
1738
0
    return(-1);
1739
180k
}
1740
1741
/**
1742
 * inputPush:
1743
 * @ctxt:  an XML parser context
1744
 * @value:  the parser input
1745
 *
1746
 * Pushes a new parser input on top of the input stack
1747
 *
1748
 * Returns -1 in case of error, the index in the stack otherwise
1749
 */
1750
int
1751
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1752
1.91M
{
1753
1.91M
    if ((ctxt == NULL) || (value == NULL))
1754
0
        return(-1);
1755
1.91M
    if (ctxt->inputNr >= ctxt->inputMax) {
1756
4.09k
        ctxt->inputMax *= 2;
1757
4.09k
        ctxt->inputTab =
1758
4.09k
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1759
4.09k
                                             ctxt->inputMax *
1760
4.09k
                                             sizeof(ctxt->inputTab[0]));
1761
4.09k
        if (ctxt->inputTab == NULL) {
1762
0
            xmlErrMemory(ctxt, NULL);
1763
0
      ctxt->inputMax /= 2;
1764
0
            return (-1);
1765
0
        }
1766
4.09k
    }
1767
1.91M
    ctxt->inputTab[ctxt->inputNr] = value;
1768
1.91M
    ctxt->input = value;
1769
1.91M
    return (ctxt->inputNr++);
1770
1.91M
}
1771
/**
1772
 * inputPop:
1773
 * @ctxt: an XML parser context
1774
 *
1775
 * Pops the top parser input from the input stack
1776
 *
1777
 * Returns the input just removed
1778
 */
1779
xmlParserInputPtr
1780
inputPop(xmlParserCtxtPtr ctxt)
1781
5.12M
{
1782
5.12M
    xmlParserInputPtr ret;
1783
1784
5.12M
    if (ctxt == NULL)
1785
0
        return(NULL);
1786
5.12M
    if (ctxt->inputNr <= 0)
1787
3.25M
        return (NULL);
1788
1.87M
    ctxt->inputNr--;
1789
1.87M
    if (ctxt->inputNr > 0)
1790
371k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1791
1.50M
    else
1792
1.50M
        ctxt->input = NULL;
1793
1.87M
    ret = ctxt->inputTab[ctxt->inputNr];
1794
1.87M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1795
1.87M
    return (ret);
1796
5.12M
}
1797
/**
1798
 * nodePush:
1799
 * @ctxt:  an XML parser context
1800
 * @value:  the element node
1801
 *
1802
 * Pushes a new element node on top of the node stack
1803
 *
1804
 * Returns -1 in case of error, the index in the stack otherwise
1805
 */
1806
int
1807
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1808
7.26M
{
1809
7.26M
    if (ctxt == NULL) return(0);
1810
7.26M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1811
33.0k
        xmlNodePtr *tmp;
1812
1813
33.0k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1814
33.0k
                                      ctxt->nodeMax * 2 *
1815
33.0k
                                      sizeof(ctxt->nodeTab[0]));
1816
33.0k
        if (tmp == NULL) {
1817
0
            xmlErrMemory(ctxt, NULL);
1818
0
            return (-1);
1819
0
        }
1820
33.0k
        ctxt->nodeTab = tmp;
1821
33.0k
  ctxt->nodeMax *= 2;
1822
33.0k
    }
1823
7.26M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1824
7.26M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1825
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1826
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1827
0
        xmlParserMaxDepth);
1828
0
  xmlHaltParser(ctxt);
1829
0
  return(-1);
1830
0
    }
1831
7.26M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1832
7.26M
    ctxt->node = value;
1833
7.26M
    return (ctxt->nodeNr++);
1834
7.26M
}
1835
1836
/**
1837
 * nodePop:
1838
 * @ctxt: an XML parser context
1839
 *
1840
 * Pops the top element node from the node stack
1841
 *
1842
 * Returns the node just removed
1843
 */
1844
xmlNodePtr
1845
nodePop(xmlParserCtxtPtr ctxt)
1846
5.02M
{
1847
5.02M
    xmlNodePtr ret;
1848
1849
5.02M
    if (ctxt == NULL) return(NULL);
1850
5.02M
    if (ctxt->nodeNr <= 0)
1851
48.5k
        return (NULL);
1852
4.97M
    ctxt->nodeNr--;
1853
4.97M
    if (ctxt->nodeNr > 0)
1854
4.72M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1855
248k
    else
1856
248k
        ctxt->node = NULL;
1857
4.97M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1858
4.97M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1859
4.97M
    return (ret);
1860
5.02M
}
1861
1862
/**
1863
 * nameNsPush:
1864
 * @ctxt:  an XML parser context
1865
 * @value:  the element name
1866
 * @prefix:  the element prefix
1867
 * @URI:  the element namespace name
1868
 * @line:  the current line number for error messages
1869
 * @nsNr:  the number of namespaces pushed on the namespace table
1870
 *
1871
 * Pushes a new element name/prefix/URL on top of the name stack
1872
 *
1873
 * Returns -1 in case of error, the index in the stack otherwise
1874
 */
1875
static int
1876
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1877
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1878
6.05M
{
1879
6.05M
    xmlStartTag *tag;
1880
1881
6.05M
    if (ctxt->nameNr >= ctxt->nameMax) {
1882
37.4k
        const xmlChar * *tmp;
1883
37.4k
        xmlStartTag *tmp2;
1884
37.4k
        ctxt->nameMax *= 2;
1885
37.4k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1886
37.4k
                                    ctxt->nameMax *
1887
37.4k
                                    sizeof(ctxt->nameTab[0]));
1888
37.4k
        if (tmp == NULL) {
1889
0
      ctxt->nameMax /= 2;
1890
0
      goto mem_error;
1891
0
        }
1892
37.4k
  ctxt->nameTab = tmp;
1893
37.4k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1894
37.4k
                                    ctxt->nameMax *
1895
37.4k
                                    sizeof(ctxt->pushTab[0]));
1896
37.4k
        if (tmp2 == NULL) {
1897
0
      ctxt->nameMax /= 2;
1898
0
      goto mem_error;
1899
0
        }
1900
37.4k
  ctxt->pushTab = tmp2;
1901
6.02M
    } else if (ctxt->pushTab == NULL) {
1902
1.11M
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1903
1.11M
                                            sizeof(ctxt->pushTab[0]));
1904
1.11M
        if (ctxt->pushTab == NULL)
1905
0
            goto mem_error;
1906
1.11M
    }
1907
6.05M
    ctxt->nameTab[ctxt->nameNr] = value;
1908
6.05M
    ctxt->name = value;
1909
6.05M
    tag = &ctxt->pushTab[ctxt->nameNr];
1910
6.05M
    tag->prefix = prefix;
1911
6.05M
    tag->URI = URI;
1912
6.05M
    tag->line = line;
1913
6.05M
    tag->nsNr = nsNr;
1914
6.05M
    return (ctxt->nameNr++);
1915
0
mem_error:
1916
0
    xmlErrMemory(ctxt, NULL);
1917
0
    return (-1);
1918
6.05M
}
1919
#ifdef LIBXML_PUSH_ENABLED
1920
/**
1921
 * nameNsPop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element/prefix/URI name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
static const xmlChar *
1929
nameNsPop(xmlParserCtxtPtr ctxt)
1930
866k
{
1931
866k
    const xmlChar *ret;
1932
1933
866k
    if (ctxt->nameNr <= 0)
1934
0
        return (NULL);
1935
866k
    ctxt->nameNr--;
1936
866k
    if (ctxt->nameNr > 0)
1937
824k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
41.8k
    else
1939
41.8k
        ctxt->name = NULL;
1940
866k
    ret = ctxt->nameTab[ctxt->nameNr];
1941
866k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
866k
    return (ret);
1943
866k
}
1944
#endif /* LIBXML_PUSH_ENABLED */
1945
1946
/**
1947
 * namePush:
1948
 * @ctxt:  an XML parser context
1949
 * @value:  the element name
1950
 *
1951
 * Pushes a new element name on top of the name stack
1952
 *
1953
 * Returns -1 in case of error, the index in the stack otherwise
1954
 */
1955
int
1956
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1957
0
{
1958
0
    if (ctxt == NULL) return (-1);
1959
1960
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1961
0
        const xmlChar * *tmp;
1962
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1963
0
                                    ctxt->nameMax * 2 *
1964
0
                                    sizeof(ctxt->nameTab[0]));
1965
0
        if (tmp == NULL) {
1966
0
      goto mem_error;
1967
0
        }
1968
0
  ctxt->nameTab = tmp;
1969
0
        ctxt->nameMax *= 2;
1970
0
    }
1971
0
    ctxt->nameTab[ctxt->nameNr] = value;
1972
0
    ctxt->name = value;
1973
0
    return (ctxt->nameNr++);
1974
0
mem_error:
1975
0
    xmlErrMemory(ctxt, NULL);
1976
0
    return (-1);
1977
0
}
1978
/**
1979
 * namePop:
1980
 * @ctxt: an XML parser context
1981
 *
1982
 * Pops the top element name from the name stack
1983
 *
1984
 * Returns the name just removed
1985
 */
1986
const xmlChar *
1987
namePop(xmlParserCtxtPtr ctxt)
1988
3.54M
{
1989
3.54M
    const xmlChar *ret;
1990
1991
3.54M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1992
0
        return (NULL);
1993
3.54M
    ctxt->nameNr--;
1994
3.54M
    if (ctxt->nameNr > 0)
1995
3.39M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1996
144k
    else
1997
144k
        ctxt->name = NULL;
1998
3.54M
    ret = ctxt->nameTab[ctxt->nameNr];
1999
3.54M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2000
3.54M
    return (ret);
2001
3.54M
}
2002
2003
8.51M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2004
8.51M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2005
37.9k
        int *tmp;
2006
2007
37.9k
  ctxt->spaceMax *= 2;
2008
37.9k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2009
37.9k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2010
37.9k
        if (tmp == NULL) {
2011
0
      xmlErrMemory(ctxt, NULL);
2012
0
      ctxt->spaceMax /=2;
2013
0
      return(-1);
2014
0
  }
2015
37.9k
  ctxt->spaceTab = tmp;
2016
37.9k
    }
2017
8.51M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2018
8.51M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2019
8.51M
    return(ctxt->spaceNr++);
2020
8.51M
}
2021
2022
7.06M
static int spacePop(xmlParserCtxtPtr ctxt) {
2023
7.06M
    int ret;
2024
7.06M
    if (ctxt->spaceNr <= 0) return(0);
2025
7.05M
    ctxt->spaceNr--;
2026
7.05M
    if (ctxt->spaceNr > 0)
2027
7.03M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2028
18.2k
    else
2029
18.2k
        ctxt->space = &ctxt->spaceTab[0];
2030
7.05M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2031
7.05M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2032
7.05M
    return(ret);
2033
7.06M
}
2034
2035
/*
2036
 * Macros for accessing the content. Those should be used only by the parser,
2037
 * and not exported.
2038
 *
2039
 * Dirty macros, i.e. one often need to make assumption on the context to
2040
 * use them
2041
 *
2042
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2043
 *           To be used with extreme caution since operations consuming
2044
 *           characters may move the input buffer to a different location !
2045
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2046
 *           This should be used internally by the parser
2047
 *           only to compare to ASCII values otherwise it would break when
2048
 *           running with UTF-8 encoding.
2049
 *   RAW     same as CUR but in the input buffer, bypass any token
2050
 *           extraction that may have been done
2051
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2052
 *           to compare on ASCII based substring.
2053
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2054
 *           strings without newlines within the parser.
2055
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2056
 *           defined char within the parser.
2057
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2058
 *
2059
 *   NEXT    Skip to the next character, this does the proper decoding
2060
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2061
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2062
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2063
 *           to the number of xmlChars used for the encoding [0-5].
2064
 *   CUR_SCHAR  same but operate on a string instead of the context
2065
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2066
 *            the index
2067
 *   GROW, SHRINK  handling of input buffers
2068
 */
2069
2070
130M
#define RAW (*ctxt->input->cur)
2071
60.3M
#define CUR (*ctxt->input->cur)
2072
73.5M
#define NXT(val) ctxt->input->cur[(val)]
2073
13.1M
#define CUR_PTR ctxt->input->cur
2074
1.02M
#define BASE_PTR ctxt->input->base
2075
2076
#define CMP4( s, c1, c2, c3, c4 ) \
2077
57.8M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2078
29.1M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2079
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2080
53.7M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2081
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2082
48.5M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2083
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2084
45.6M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2085
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2086
41.8M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2087
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2088
19.8M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2089
19.8M
    ((unsigned char *) s)[ 8 ] == c9 )
2090
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2091
165k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2092
165k
    ((unsigned char *) s)[ 9 ] == c10 )
2093
2094
13.9M
#define SKIP(val) do {             \
2095
13.9M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2096
13.9M
    if (*ctxt->input->cur == 0)           \
2097
13.9M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2098
13.9M
  } while (0)
2099
2100
59.1k
#define SKIPL(val) do {             \
2101
59.1k
    int skipl;                \
2102
11.3M
    for(skipl=0; skipl<val; skipl++) {         \
2103
11.2M
  if (*(ctxt->input->cur) == '\n') {       \
2104
85.7k
  ctxt->input->line++; ctxt->input->col = 1;      \
2105
11.1M
  } else ctxt->input->col++;         \
2106
11.2M
  ctxt->input->cur++;           \
2107
11.2M
    }                  \
2108
59.1k
    if (*ctxt->input->cur == 0)           \
2109
59.1k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2110
59.1k
  } while (0)
2111
2112
56.5M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2113
56.5M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2114
56.5M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2115
56.5M
  xmlSHRINK (ctxt);
2116
2117
125k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2118
125k
    xmlParserInputShrink(ctxt->input);
2119
125k
    if (*ctxt->input->cur == 0)
2120
4.32k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2121
125k
}
2122
2123
185M
#define GROW if ((ctxt->progressive == 0) &&       \
2124
185M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2125
185M
  xmlGROW (ctxt);
2126
2127
23.9M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2128
23.9M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2129
23.9M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2130
2131
23.9M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2132
23.9M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2133
23.9M
         ((ctxt->input->buf) &&
2134
0
          (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2135
23.9M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2136
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2137
0
        xmlHaltParser(ctxt);
2138
0
  return;
2139
0
    }
2140
23.9M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2141
23.9M
    if ((ctxt->input->cur > ctxt->input->end) ||
2142
23.9M
        (ctxt->input->cur < ctxt->input->base)) {
2143
0
        xmlHaltParser(ctxt);
2144
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2145
0
  return;
2146
0
    }
2147
23.9M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2148
1.32M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2149
23.9M
}
2150
2151
46.9M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2152
2153
63.2M
#define NEXT xmlNextChar(ctxt)
2154
2155
13.7M
#define NEXT1 {               \
2156
13.7M
  ctxt->input->col++;           \
2157
13.7M
  ctxt->input->cur++;           \
2158
13.7M
  if (*ctxt->input->cur == 0)         \
2159
13.7M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2160
13.7M
    }
2161
2162
149M
#define NEXTL(l) do {             \
2163
149M
    if (*(ctxt->input->cur) == '\n') {         \
2164
1.80M
  ctxt->input->line++; ctxt->input->col = 1;      \
2165
147M
    } else ctxt->input->col++;           \
2166
149M
    ctxt->input->cur += l;        \
2167
149M
  } while (0)
2168
2169
155M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2170
137M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2171
2172
#define COPY_BUF(l,b,i,v)           \
2173
261M
    if (l == 1) b[i++] = v;           \
2174
261M
    else i += xmlCopyCharMultiByte(&b[i],v)
2175
2176
#define CUR_CONSUMED \
2177
67.4M
    (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2178
2179
/**
2180
 * xmlSkipBlankChars:
2181
 * @ctxt:  the XML parser context
2182
 *
2183
 * skip all blanks character found at that point in the input streams.
2184
 * It pops up finished entities in the process if allowable at that point.
2185
 *
2186
 * Returns the number of space chars skipped
2187
 */
2188
2189
int
2190
46.9M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2191
46.9M
    int res = 0;
2192
2193
    /*
2194
     * It's Okay to use CUR/NEXT here since all the blanks are on
2195
     * the ASCII range.
2196
     */
2197
46.9M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2198
46.9M
        (ctxt->instate == XML_PARSER_START)) {
2199
30.5M
  const xmlChar *cur;
2200
  /*
2201
   * if we are in the document content, go really fast
2202
   */
2203
30.5M
  cur = ctxt->input->cur;
2204
30.5M
  while (IS_BLANK_CH(*cur)) {
2205
16.2M
      if (*cur == '\n') {
2206
2.44M
    ctxt->input->line++; ctxt->input->col = 1;
2207
13.8M
      } else {
2208
13.8M
    ctxt->input->col++;
2209
13.8M
      }
2210
16.2M
      cur++;
2211
16.2M
      if (res < INT_MAX)
2212
16.2M
    res++;
2213
16.2M
      if (*cur == 0) {
2214
117k
    ctxt->input->cur = cur;
2215
117k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2216
117k
    cur = ctxt->input->cur;
2217
117k
      }
2218
16.2M
  }
2219
30.5M
  ctxt->input->cur = cur;
2220
30.5M
    } else {
2221
16.3M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2222
2223
46.3M
  while (1) {
2224
46.3M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2225
29.2M
    NEXT;
2226
29.2M
      } else if (CUR == '%') {
2227
                /*
2228
                 * Need to handle support of entities branching here
2229
                 */
2230
756k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2231
270k
                    break;
2232
486k
          xmlParsePEReference(ctxt);
2233
16.3M
            } else if (CUR == 0) {
2234
275k
                if (ctxt->inputNr <= 1)
2235
29.7k
                    break;
2236
245k
                xmlPopInput(ctxt);
2237
16.0M
            } else {
2238
16.0M
                break;
2239
16.0M
            }
2240
2241
            /*
2242
             * Also increase the counter when entering or exiting a PERef.
2243
             * The spec says: "When a parameter-entity reference is recognized
2244
             * in the DTD and included, its replacement text MUST be enlarged
2245
             * by the attachment of one leading and one following space (#x20)
2246
             * character."
2247
             */
2248
30.0M
      if (res < INT_MAX)
2249
30.0M
    res++;
2250
30.0M
        }
2251
16.3M
    }
2252
46.9M
    return(res);
2253
46.9M
}
2254
2255
/************************************************************************
2256
 *                  *
2257
 *    Commodity functions to handle entities      *
2258
 *                  *
2259
 ************************************************************************/
2260
2261
/**
2262
 * xmlPopInput:
2263
 * @ctxt:  an XML parser context
2264
 *
2265
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2266
 *          pop it and return the next char.
2267
 *
2268
 * Returns the current xmlChar in the parser context
2269
 */
2270
xmlChar
2271
255k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2272
255k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2273
255k
    if (xmlParserDebugEntities)
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Popping input %d\n", ctxt->inputNr);
2276
255k
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2277
255k
        (ctxt->instate != XML_PARSER_EOF))
2278
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2279
0
                    "Unfinished entity outside the DTD");
2280
255k
    xmlFreeInputStream(inputPop(ctxt));
2281
255k
    if (*ctxt->input->cur == 0)
2282
199
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2283
255k
    return(CUR);
2284
255k
}
2285
2286
/**
2287
 * xmlPushInput:
2288
 * @ctxt:  an XML parser context
2289
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2290
 *
2291
 * xmlPushInput: switch to a new input stream which is stacked on top
2292
 *               of the previous one(s).
2293
 * Returns -1 in case of error or the index in the input stack
2294
 */
2295
int
2296
410k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2297
410k
    int ret;
2298
410k
    if (input == NULL) return(-1);
2299
2300
408k
    if (xmlParserDebugEntities) {
2301
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2302
0
      xmlGenericError(xmlGenericErrorContext,
2303
0
        "%s(%d): ", ctxt->input->filename,
2304
0
        ctxt->input->line);
2305
0
  xmlGenericError(xmlGenericErrorContext,
2306
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2307
0
    }
2308
408k
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2309
408k
        (ctxt->inputNr > 1024)) {
2310
963
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2311
116k
        while (ctxt->inputNr > 1)
2312
115k
            xmlFreeInputStream(inputPop(ctxt));
2313
963
  return(-1);
2314
963
    }
2315
407k
    ret = inputPush(ctxt, input);
2316
407k
    if (ctxt->instate == XML_PARSER_EOF)
2317
0
        return(-1);
2318
407k
    GROW;
2319
407k
    return(ret);
2320
407k
}
2321
2322
/**
2323
 * xmlParseCharRef:
2324
 * @ctxt:  an XML parser context
2325
 *
2326
 * DEPRECATED: Internal function, don't use.
2327
 *
2328
 * parse Reference declarations
2329
 *
2330
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2331
 *                  '&#x' [0-9a-fA-F]+ ';'
2332
 *
2333
 * [ WFC: Legal Character ]
2334
 * Characters referred to using character references must match the
2335
 * production for Char.
2336
 *
2337
 * Returns the value parsed (as an int), 0 in case of error
2338
 */
2339
int
2340
437k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2341
437k
    int val = 0;
2342
437k
    int count = 0;
2343
2344
    /*
2345
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2346
     */
2347
437k
    if ((RAW == '&') && (NXT(1) == '#') &&
2348
437k
        (NXT(2) == 'x')) {
2349
194k
  SKIP(3);
2350
194k
  GROW;
2351
1.05M
  while (RAW != ';') { /* loop blocked by count */
2352
872k
      if (count++ > 20) {
2353
56.4k
    count = 0;
2354
56.4k
    GROW;
2355
56.4k
                if (ctxt->instate == XML_PARSER_EOF)
2356
0
                    return(0);
2357
56.4k
      }
2358
872k
      if ((RAW >= '0') && (RAW <= '9'))
2359
702k
          val = val * 16 + (CUR - '0');
2360
170k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2361
150k
          val = val * 16 + (CUR - 'a') + 10;
2362
19.8k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2363
11.1k
          val = val * 16 + (CUR - 'A') + 10;
2364
8.75k
      else {
2365
8.75k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2366
8.75k
    val = 0;
2367
8.75k
    break;
2368
8.75k
      }
2369
863k
      if (val > 0x110000)
2370
621k
          val = 0x110000;
2371
2372
863k
      NEXT;
2373
863k
      count++;
2374
863k
  }
2375
194k
  if (RAW == ';') {
2376
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2377
186k
      ctxt->input->col++;
2378
186k
      ctxt->input->cur++;
2379
186k
  }
2380
242k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2381
242k
  SKIP(2);
2382
242k
  GROW;
2383
1.20M
  while (RAW != ';') { /* loop blocked by count */
2384
981k
      if (count++ > 20) {
2385
34.9k
    count = 0;
2386
34.9k
    GROW;
2387
34.9k
                if (ctxt->instate == XML_PARSER_EOF)
2388
0
                    return(0);
2389
34.9k
      }
2390
981k
      if ((RAW >= '0') && (RAW <= '9'))
2391
964k
          val = val * 10 + (CUR - '0');
2392
17.4k
      else {
2393
17.4k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2394
17.4k
    val = 0;
2395
17.4k
    break;
2396
17.4k
      }
2397
964k
      if (val > 0x110000)
2398
380k
          val = 0x110000;
2399
2400
964k
      NEXT;
2401
964k
      count++;
2402
964k
  }
2403
242k
  if (RAW == ';') {
2404
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2405
225k
      ctxt->input->col++;
2406
225k
      ctxt->input->cur++;
2407
225k
  }
2408
242k
    } else {
2409
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2410
0
    }
2411
2412
    /*
2413
     * [ WFC: Legal Character ]
2414
     * Characters referred to using character references must match the
2415
     * production for Char.
2416
     */
2417
437k
    if (val >= 0x110000) {
2418
1.07k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2419
1.07k
                "xmlParseCharRef: character reference out of bounds\n",
2420
1.07k
          val);
2421
436k
    } else if (IS_CHAR(val)) {
2422
409k
        return(val);
2423
409k
    } else {
2424
27.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2425
27.6k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2426
27.6k
                    val);
2427
27.6k
    }
2428
28.7k
    return(0);
2429
437k
}
2430
2431
/**
2432
 * xmlParseStringCharRef:
2433
 * @ctxt:  an XML parser context
2434
 * @str:  a pointer to an index in the string
2435
 *
2436
 * parse Reference declarations, variant parsing from a string rather
2437
 * than an an input flow.
2438
 *
2439
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2440
 *                  '&#x' [0-9a-fA-F]+ ';'
2441
 *
2442
 * [ WFC: Legal Character ]
2443
 * Characters referred to using character references must match the
2444
 * production for Char.
2445
 *
2446
 * Returns the value parsed (as an int), 0 in case of error, str will be
2447
 *         updated to the current value of the index
2448
 */
2449
static int
2450
373k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2451
373k
    const xmlChar *ptr;
2452
373k
    xmlChar cur;
2453
373k
    int val = 0;
2454
2455
373k
    if ((str == NULL) || (*str == NULL)) return(0);
2456
373k
    ptr = *str;
2457
373k
    cur = *ptr;
2458
373k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2459
85.3k
  ptr += 3;
2460
85.3k
  cur = *ptr;
2461
224k
  while (cur != ';') { /* Non input consuming loop */
2462
141k
      if ((cur >= '0') && (cur <= '9'))
2463
49.0k
          val = val * 16 + (cur - '0');
2464
92.0k
      else if ((cur >= 'a') && (cur <= 'f'))
2465
23.2k
          val = val * 16 + (cur - 'a') + 10;
2466
68.7k
      else if ((cur >= 'A') && (cur <= 'F'))
2467
67.3k
          val = val * 16 + (cur - 'A') + 10;
2468
1.43k
      else {
2469
1.43k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2470
1.43k
    val = 0;
2471
1.43k
    break;
2472
1.43k
      }
2473
139k
      if (val > 0x110000)
2474
45.3k
          val = 0x110000;
2475
2476
139k
      ptr++;
2477
139k
      cur = *ptr;
2478
139k
  }
2479
85.3k
  if (cur == ';')
2480
83.8k
      ptr++;
2481
287k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2482
287k
  ptr += 2;
2483
287k
  cur = *ptr;
2484
1.12M
  while (cur != ';') { /* Non input consuming loops */
2485
839k
      if ((cur >= '0') && (cur <= '9'))
2486
837k
          val = val * 10 + (cur - '0');
2487
1.93k
      else {
2488
1.93k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2489
1.93k
    val = 0;
2490
1.93k
    break;
2491
1.93k
      }
2492
837k
      if (val > 0x110000)
2493
54.0k
          val = 0x110000;
2494
2495
837k
      ptr++;
2496
837k
      cur = *ptr;
2497
837k
  }
2498
287k
  if (cur == ';')
2499
285k
      ptr++;
2500
287k
    } else {
2501
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2502
0
  return(0);
2503
0
    }
2504
373k
    *str = ptr;
2505
2506
    /*
2507
     * [ WFC: Legal Character ]
2508
     * Characters referred to using character references must match the
2509
     * production for Char.
2510
     */
2511
373k
    if (val >= 0x110000) {
2512
515
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2513
515
                "xmlParseStringCharRef: character reference out of bounds\n",
2514
515
                val);
2515
372k
    } else if (IS_CHAR(val)) {
2516
368k
        return(val);
2517
368k
    } else {
2518
3.82k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2519
3.82k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2520
3.82k
        val);
2521
3.82k
    }
2522
4.33k
    return(0);
2523
373k
}
2524
2525
/**
2526
 * xmlParserHandlePEReference:
2527
 * @ctxt:  the parser context
2528
 *
2529
 * [69] PEReference ::= '%' Name ';'
2530
 *
2531
 * [ WFC: No Recursion ]
2532
 * A parsed entity must not contain a recursive
2533
 * reference to itself, either directly or indirectly.
2534
 *
2535
 * [ WFC: Entity Declared ]
2536
 * In a document without any DTD, a document with only an internal DTD
2537
 * subset which contains no parameter entity references, or a document
2538
 * with "standalone='yes'", ...  ... The declaration of a parameter
2539
 * entity must precede any reference to it...
2540
 *
2541
 * [ VC: Entity Declared ]
2542
 * In a document with an external subset or external parameter entities
2543
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2544
 * must precede any reference to it...
2545
 *
2546
 * [ WFC: In DTD ]
2547
 * Parameter-entity references may only appear in the DTD.
2548
 * NOTE: misleading but this is handled.
2549
 *
2550
 * A PEReference may have been detected in the current input stream
2551
 * the handling is done accordingly to
2552
 *      http://www.w3.org/TR/REC-xml#entproc
2553
 * i.e.
2554
 *   - Included in literal in entity values
2555
 *   - Included as Parameter Entity reference within DTDs
2556
 */
2557
void
2558
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2559
0
    switch(ctxt->instate) {
2560
0
  case XML_PARSER_CDATA_SECTION:
2561
0
      return;
2562
0
        case XML_PARSER_COMMENT:
2563
0
      return;
2564
0
  case XML_PARSER_START_TAG:
2565
0
      return;
2566
0
  case XML_PARSER_END_TAG:
2567
0
      return;
2568
0
        case XML_PARSER_EOF:
2569
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2570
0
      return;
2571
0
        case XML_PARSER_PROLOG:
2572
0
  case XML_PARSER_START:
2573
0
  case XML_PARSER_MISC:
2574
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2575
0
      return;
2576
0
  case XML_PARSER_ENTITY_DECL:
2577
0
        case XML_PARSER_CONTENT:
2578
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2579
0
        case XML_PARSER_PI:
2580
0
  case XML_PARSER_SYSTEM_LITERAL:
2581
0
  case XML_PARSER_PUBLIC_LITERAL:
2582
      /* we just ignore it there */
2583
0
      return;
2584
0
        case XML_PARSER_EPILOG:
2585
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2586
0
      return;
2587
0
  case XML_PARSER_ENTITY_VALUE:
2588
      /*
2589
       * NOTE: in the case of entity values, we don't do the
2590
       *       substitution here since we need the literal
2591
       *       entity value to be able to save the internal
2592
       *       subset of the document.
2593
       *       This will be handled by xmlStringDecodeEntities
2594
       */
2595
0
      return;
2596
0
        case XML_PARSER_DTD:
2597
      /*
2598
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2599
       * In the internal DTD subset, parameter-entity references
2600
       * can occur only where markup declarations can occur, not
2601
       * within markup declarations.
2602
       * In that case this is handled in xmlParseMarkupDecl
2603
       */
2604
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2605
0
    return;
2606
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2607
0
    return;
2608
0
            break;
2609
0
        case XML_PARSER_IGNORE:
2610
0
            return;
2611
0
    }
2612
2613
0
    xmlParsePEReference(ctxt);
2614
0
}
2615
2616
/*
2617
 * Macro used to grow the current buffer.
2618
 * buffer##_size is expected to be a size_t
2619
 * mem_error: is expected to handle memory allocation failures
2620
 */
2621
128k
#define growBuffer(buffer, n) {           \
2622
128k
    xmlChar *tmp;             \
2623
128k
    size_t new_size = buffer##_size * 2 + n;                            \
2624
128k
    if (new_size < buffer##_size) goto mem_error;                       \
2625
128k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2626
128k
    if (tmp == NULL) goto mem_error;         \
2627
128k
    buffer = tmp;             \
2628
128k
    buffer##_size = new_size;                                           \
2629
128k
}
2630
2631
/**
2632
 * xmlStringLenDecodeEntities:
2633
 * @ctxt:  the parser context
2634
 * @str:  the input string
2635
 * @len: the string length
2636
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637
 * @end:  an end marker xmlChar, 0 if none
2638
 * @end2:  an end marker xmlChar, 0 if none
2639
 * @end3:  an end marker xmlChar, 0 if none
2640
 *
2641
 * Takes a entity string content and process to do the adequate substitutions.
2642
 *
2643
 * [67] Reference ::= EntityRef | CharRef
2644
 *
2645
 * [69] PEReference ::= '%' Name ';'
2646
 *
2647
 * Returns A newly allocated string with the substitution done. The caller
2648
 *      must deallocate it !
2649
 */
2650
xmlChar *
2651
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2652
1.48M
          int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2653
1.48M
    xmlChar *buffer = NULL;
2654
1.48M
    size_t buffer_size = 0;
2655
1.48M
    size_t nbchars = 0;
2656
2657
1.48M
    xmlChar *current = NULL;
2658
1.48M
    xmlChar *rep = NULL;
2659
1.48M
    const xmlChar *last;
2660
1.48M
    xmlEntityPtr ent;
2661
1.48M
    int c,l;
2662
2663
1.48M
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2664
0
  return(NULL);
2665
1.48M
    last = str + len;
2666
2667
1.48M
    if (((ctxt->depth > 40) &&
2668
1.48M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2669
1.48M
  (ctxt->depth > 1024)) {
2670
1.20k
  xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2671
1.20k
  return(NULL);
2672
1.20k
    }
2673
2674
    /*
2675
     * allocate a translation buffer.
2676
     */
2677
1.47M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2678
1.47M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2679
1.47M
    if (buffer == NULL) goto mem_error;
2680
2681
    /*
2682
     * OK loop until we reach one of the ending char or a size limit.
2683
     * we are operating on already parsed values.
2684
     */
2685
1.47M
    if (str < last)
2686
1.38M
  c = CUR_SCHAR(str, l);
2687
91.9k
    else
2688
91.9k
        c = 0;
2689
128M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2690
128M
           (c != end2) && (c != end3) &&
2691
128M
           (ctxt->instate != XML_PARSER_EOF)) {
2692
2693
127M
  if (c == 0) break;
2694
127M
        if ((c == '&') && (str[1] == '#')) {
2695
373k
      int val = xmlParseStringCharRef(ctxt, &str);
2696
373k
      if (val == 0)
2697
4.33k
                goto int_error;
2698
368k
      COPY_BUF(0,buffer,nbchars,val);
2699
368k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2700
670
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2701
670
      }
2702
127M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2703
225k
      if (xmlParserDebugEntities)
2704
0
    xmlGenericError(xmlGenericErrorContext,
2705
0
      "String decoding Entity Reference: %.30s\n",
2706
0
      str);
2707
225k
      ent = xmlParseStringEntityRef(ctxt, &str);
2708
225k
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2709
225k
      if (ent != NULL)
2710
157k
          ctxt->nbentities += ent->checked / 2;
2711
225k
      if ((ent != NULL) &&
2712
225k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2713
3.87k
    if (ent->content != NULL) {
2714
3.87k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2715
3.87k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2716
28
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2717
28
        }
2718
3.87k
    } else {
2719
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2720
0
          "predefined entity has no content\n");
2721
0
                    goto int_error;
2722
0
    }
2723
222k
      } else if ((ent != NULL) && (ent->content != NULL)) {
2724
153k
    ctxt->depth++;
2725
153k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2726
153k
                            0, 0, 0);
2727
153k
    ctxt->depth--;
2728
153k
    if (rep == NULL) {
2729
109k
                    ent->content[0] = 0;
2730
109k
                    goto int_error;
2731
109k
                }
2732
2733
43.8k
                current = rep;
2734
1.34M
                while (*current != 0) { /* non input consuming loop */
2735
1.29M
                    buffer[nbchars++] = *current++;
2736
1.29M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2737
578
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2738
10
                            goto int_error;
2739
1.70k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2740
1.70k
                    }
2741
1.29M
                }
2742
43.8k
                xmlFree(rep);
2743
43.8k
                rep = NULL;
2744
69.0k
      } else if (ent != NULL) {
2745
786
    int i = xmlStrlen(ent->name);
2746
786
    const xmlChar *cur = ent->name;
2747
2748
786
    buffer[nbchars++] = '&';
2749
786
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2750
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2751
0
    }
2752
3.34k
    for (;i > 0;i--)
2753
2.56k
        buffer[nbchars++] = *cur++;
2754
786
    buffer[nbchars++] = ';';
2755
786
      }
2756
126M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2757
416k
      if (xmlParserDebugEntities)
2758
0
    xmlGenericError(xmlGenericErrorContext,
2759
0
      "String decoding PE Reference: %.30s\n", str);
2760
416k
      ent = xmlParseStringPEReference(ctxt, &str);
2761
416k
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2762
416k
      if (ent != NULL)
2763
237k
          ctxt->nbentities += ent->checked / 2;
2764
416k
      if (ent != NULL) {
2765
237k
                if (ent->content == NULL) {
2766
        /*
2767
         * Note: external parsed entities will not be loaded,
2768
         * it is not required for a non-validating parser to
2769
         * complete external PEReferences coming from the
2770
         * internal subset
2771
         */
2772
4.81k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2773
4.81k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2774
4.81k
      (ctxt->validate != 0)) {
2775
4.65k
      xmlLoadEntityContent(ctxt, ent);
2776
4.65k
        } else {
2777
159
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2778
159
      "not validating will not read content for PE entity %s\n",
2779
159
                          ent->name, NULL);
2780
159
        }
2781
4.81k
    }
2782
237k
    ctxt->depth++;
2783
237k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2784
237k
                            0, 0, 0);
2785
237k
    ctxt->depth--;
2786
237k
    if (rep == NULL) {
2787
67.9k
                    if (ent->content != NULL)
2788
66.2k
                        ent->content[0] = 0;
2789
67.9k
                    goto int_error;
2790
67.9k
                }
2791
169k
                current = rep;
2792
8.64M
                while (*current != 0) { /* non input consuming loop */
2793
8.47M
                    buffer[nbchars++] = *current++;
2794
8.47M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2795
16.4k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2796
111
                            goto int_error;
2797
49.0k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798
49.0k
                    }
2799
8.47M
                }
2800
169k
                xmlFree(rep);
2801
169k
                rep = NULL;
2802
169k
      }
2803
126M
  } else {
2804
126M
      COPY_BUF(l,buffer,nbchars,c);
2805
126M
      str += l;
2806
126M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2807
156k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2808
156k
      }
2809
126M
  }
2810
127M
  if (str < last)
2811
126M
      c = CUR_SCHAR(str, l);
2812
1.20M
  else
2813
1.20M
      c = 0;
2814
127M
    }
2815
1.29M
    buffer[nbchars] = 0;
2816
1.29M
    return(buffer);
2817
2818
0
mem_error:
2819
0
    xmlErrMemory(ctxt, NULL);
2820
181k
int_error:
2821
181k
    if (rep != NULL)
2822
121
        xmlFree(rep);
2823
181k
    if (buffer != NULL)
2824
181k
        xmlFree(buffer);
2825
181k
    return(NULL);
2826
0
}
2827
2828
/**
2829
 * xmlStringDecodeEntities:
2830
 * @ctxt:  the parser context
2831
 * @str:  the input string
2832
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2833
 * @end:  an end marker xmlChar, 0 if none
2834
 * @end2:  an end marker xmlChar, 0 if none
2835
 * @end3:  an end marker xmlChar, 0 if none
2836
 *
2837
 * Takes a entity string content and process to do the adequate substitutions.
2838
 *
2839
 * [67] Reference ::= EntityRef | CharRef
2840
 *
2841
 * [69] PEReference ::= '%' Name ';'
2842
 *
2843
 * Returns A newly allocated string with the substitution done. The caller
2844
 *      must deallocate it !
2845
 */
2846
xmlChar *
2847
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2848
1.47M
            xmlChar end, xmlChar  end2, xmlChar end3) {
2849
1.47M
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2850
1.47M
    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2851
1.47M
           end, end2, end3));
2852
1.47M
}
2853
2854
/************************************************************************
2855
 *                  *
2856
 *    Commodity functions, cleanup needed ?     *
2857
 *                  *
2858
 ************************************************************************/
2859
2860
/**
2861
 * areBlanks:
2862
 * @ctxt:  an XML parser context
2863
 * @str:  a xmlChar *
2864
 * @len:  the size of @str
2865
 * @blank_chars: we know the chars are blanks
2866
 *
2867
 * Is this a sequence of blank chars that one can ignore ?
2868
 *
2869
 * Returns 1 if ignorable 0 otherwise.
2870
 */
2871
2872
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2873
2.87M
                     int blank_chars) {
2874
2.87M
    int i, ret;
2875
2.87M
    xmlNodePtr lastChild;
2876
2877
    /*
2878
     * Don't spend time trying to differentiate them, the same callback is
2879
     * used !
2880
     */
2881
2.87M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2882
160k
  return(0);
2883
2884
    /*
2885
     * Check for xml:space value.
2886
     */
2887
2.71M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2888
2.71M
        (*(ctxt->space) == -2))
2889
425k
  return(0);
2890
2891
    /*
2892
     * Check that the string is made of blanks
2893
     */
2894
2.28M
    if (blank_chars == 0) {
2895
3.87M
  for (i = 0;i < len;i++)
2896
3.46M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2897
640k
    }
2898
2899
    /*
2900
     * Look if the element is mixed content in the DTD if available
2901
     */
2902
2.05M
    if (ctxt->node == NULL) return(0);
2903
2.01M
    if (ctxt->myDoc != NULL) {
2904
2.01M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2905
2.01M
        if (ret == 0) return(1);
2906
1.92M
        if (ret == 1) return(0);
2907
1.92M
    }
2908
2909
    /*
2910
     * Otherwise, heuristic :-\
2911
     */
2912
1.92M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2913
1.88M
    if ((ctxt->node->children == NULL) &&
2914
1.88M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2915
2916
1.88M
    lastChild = xmlGetLastChild(ctxt->node);
2917
1.88M
    if (lastChild == NULL) {
2918
603k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2919
603k
            (ctxt->node->content != NULL)) return(0);
2920
1.28M
    } else if (xmlNodeIsText(lastChild))
2921
26.4k
        return(0);
2922
1.25M
    else if ((ctxt->node->children != NULL) &&
2923
1.25M
             (xmlNodeIsText(ctxt->node->children)))
2924
12.9k
        return(0);
2925
1.84M
    return(1);
2926
1.88M
}
2927
2928
/************************************************************************
2929
 *                  *
2930
 *    Extra stuff for namespace support     *
2931
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2932
 *                  *
2933
 ************************************************************************/
2934
2935
/**
2936
 * xmlSplitQName:
2937
 * @ctxt:  an XML parser context
2938
 * @name:  an XML parser context
2939
 * @prefix:  a xmlChar **
2940
 *
2941
 * parse an UTF8 encoded XML qualified name string
2942
 *
2943
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2944
 *
2945
 * [NS 6] Prefix ::= NCName
2946
 *
2947
 * [NS 7] LocalPart ::= NCName
2948
 *
2949
 * Returns the local part, and prefix is updated
2950
 *   to get the Prefix if any.
2951
 */
2952
2953
xmlChar *
2954
4.09M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2955
4.09M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2956
4.09M
    xmlChar *buffer = NULL;
2957
4.09M
    int len = 0;
2958
4.09M
    int max = XML_MAX_NAMELEN;
2959
4.09M
    xmlChar *ret = NULL;
2960
4.09M
    const xmlChar *cur = name;
2961
4.09M
    int c;
2962
2963
4.09M
    if (prefix == NULL) return(NULL);
2964
4.09M
    *prefix = NULL;
2965
2966
4.09M
    if (cur == NULL) return(NULL);
2967
2968
#ifndef XML_XML_NAMESPACE
2969
    /* xml: prefix is not really a namespace */
2970
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2971
        (cur[2] == 'l') && (cur[3] == ':'))
2972
  return(xmlStrdup(name));
2973
#endif
2974
2975
    /* nasty but well=formed */
2976
4.09M
    if (cur[0] == ':')
2977
1.13k
  return(xmlStrdup(name));
2978
2979
4.09M
    c = *cur++;
2980
20.8M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2981
16.7M
  buf[len++] = c;
2982
16.7M
  c = *cur++;
2983
16.7M
    }
2984
4.09M
    if (len >= max) {
2985
  /*
2986
   * Okay someone managed to make a huge name, so he's ready to pay
2987
   * for the processing speed.
2988
   */
2989
3.00k
  max = len * 2;
2990
2991
3.00k
  buffer = (xmlChar *) xmlMallocAtomic(max);
2992
3.00k
  if (buffer == NULL) {
2993
0
      xmlErrMemory(ctxt, NULL);
2994
0
      return(NULL);
2995
0
  }
2996
3.00k
  memcpy(buffer, buf, len);
2997
6.08M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2998
6.08M
      if (len + 10 > max) {
2999
7.81k
          xmlChar *tmp;
3000
3001
7.81k
    max *= 2;
3002
7.81k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3003
7.81k
    if (tmp == NULL) {
3004
0
        xmlFree(buffer);
3005
0
        xmlErrMemory(ctxt, NULL);
3006
0
        return(NULL);
3007
0
    }
3008
7.81k
    buffer = tmp;
3009
7.81k
      }
3010
6.08M
      buffer[len++] = c;
3011
6.08M
      c = *cur++;
3012
6.08M
  }
3013
3.00k
  buffer[len] = 0;
3014
3.00k
    }
3015
3016
4.09M
    if ((c == ':') && (*cur == 0)) {
3017
2.52k
        if (buffer != NULL)
3018
179
      xmlFree(buffer);
3019
2.52k
  *prefix = NULL;
3020
2.52k
  return(xmlStrdup(name));
3021
2.52k
    }
3022
3023
4.09M
    if (buffer == NULL)
3024
4.09M
  ret = xmlStrndup(buf, len);
3025
2.82k
    else {
3026
2.82k
  ret = buffer;
3027
2.82k
  buffer = NULL;
3028
2.82k
  max = XML_MAX_NAMELEN;
3029
2.82k
    }
3030
3031
3032
4.09M
    if (c == ':') {
3033
1.26M
  c = *cur;
3034
1.26M
        *prefix = ret;
3035
1.26M
  if (c == 0) {
3036
0
      return(xmlStrndup(BAD_CAST "", 0));
3037
0
  }
3038
1.26M
  len = 0;
3039
3040
  /*
3041
   * Check that the first character is proper to start
3042
   * a new name
3043
   */
3044
1.26M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3045
1.26M
        ((c >= 0x41) && (c <= 0x5A)) ||
3046
1.26M
        (c == '_') || (c == ':'))) {
3047
2.94k
      int l;
3048
2.94k
      int first = CUR_SCHAR(cur, l);
3049
3050
2.94k
      if (!IS_LETTER(first) && (first != '_')) {
3051
1.45k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3052
1.45k
          "Name %s is not XML Namespace compliant\n",
3053
1.45k
          name);
3054
1.45k
      }
3055
2.94k
  }
3056
1.26M
  cur++;
3057
3058
8.31M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3059
7.05M
      buf[len++] = c;
3060
7.05M
      c = *cur++;
3061
7.05M
  }
3062
1.26M
  if (len >= max) {
3063
      /*
3064
       * Okay someone managed to make a huge name, so he's ready to pay
3065
       * for the processing speed.
3066
       */
3067
1.40k
      max = len * 2;
3068
3069
1.40k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3070
1.40k
      if (buffer == NULL) {
3071
0
          xmlErrMemory(ctxt, NULL);
3072
0
    return(NULL);
3073
0
      }
3074
1.40k
      memcpy(buffer, buf, len);
3075
3.05M
      while (c != 0) { /* tested bigname2.xml */
3076
3.05M
    if (len + 10 > max) {
3077
3.09k
        xmlChar *tmp;
3078
3079
3.09k
        max *= 2;
3080
3.09k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3081
3.09k
        if (tmp == NULL) {
3082
0
      xmlErrMemory(ctxt, NULL);
3083
0
      xmlFree(buffer);
3084
0
      return(NULL);
3085
0
        }
3086
3.09k
        buffer = tmp;
3087
3.09k
    }
3088
3.05M
    buffer[len++] = c;
3089
3.05M
    c = *cur++;
3090
3.05M
      }
3091
1.40k
      buffer[len] = 0;
3092
1.40k
  }
3093
3094
1.26M
  if (buffer == NULL)
3095
1.26M
      ret = xmlStrndup(buf, len);
3096
1.40k
  else {
3097
1.40k
      ret = buffer;
3098
1.40k
  }
3099
1.26M
    }
3100
3101
4.09M
    return(ret);
3102
4.09M
}
3103
3104
/************************************************************************
3105
 *                  *
3106
 *      The parser itself       *
3107
 *  Relates to http://www.w3.org/TR/REC-xml       *
3108
 *                  *
3109
 ************************************************************************/
3110
3111
/************************************************************************
3112
 *                  *
3113
 *  Routines to parse Name, NCName and NmToken      *
3114
 *                  *
3115
 ************************************************************************/
3116
#ifdef DEBUG
3117
static unsigned long nbParseName = 0;
3118
static unsigned long nbParseNmToken = 0;
3119
static unsigned long nbParseNCName = 0;
3120
static unsigned long nbParseNCNameComplex = 0;
3121
static unsigned long nbParseNameComplex = 0;
3122
static unsigned long nbParseStringName = 0;
3123
#endif
3124
3125
/*
3126
 * The two following functions are related to the change of accepted
3127
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3128
 * They correspond to the modified production [4] and the new production [4a]
3129
 * changes in that revision. Also note that the macros used for the
3130
 * productions Letter, Digit, CombiningChar and Extender are not needed
3131
 * anymore.
3132
 * We still keep compatibility to pre-revision5 parsing semantic if the
3133
 * new XML_PARSE_OLD10 option is given to the parser.
3134
 */
3135
static int
3136
1.80M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3137
1.80M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3138
        /*
3139
   * Use the new checks of production [4] [4a] amd [5] of the
3140
   * Update 5 of XML-1.0
3141
   */
3142
1.27M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3143
1.27M
      (((c >= 'a') && (c <= 'z')) ||
3144
1.25M
       ((c >= 'A') && (c <= 'Z')) ||
3145
1.25M
       (c == '_') || (c == ':') ||
3146
1.25M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3147
1.25M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3148
1.25M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3149
1.25M
       ((c >= 0x370) && (c <= 0x37D)) ||
3150
1.25M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3151
1.25M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3152
1.25M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3153
1.25M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3154
1.25M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3155
1.25M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3156
1.25M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3157
1.25M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3158
568k
      return(1);
3159
1.27M
    } else {
3160
527k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3161
294k
      return(1);
3162
527k
    }
3163
940k
    return(0);
3164
1.80M
}
3165
3166
static int
3167
21.8M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3168
21.8M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3169
        /*
3170
   * Use the new checks of production [4] [4a] amd [5] of the
3171
   * Update 5 of XML-1.0
3172
   */
3173
15.7M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3174
15.7M
      (((c >= 'a') && (c <= 'z')) ||
3175
15.6M
       ((c >= 'A') && (c <= 'Z')) ||
3176
15.6M
       ((c >= '0') && (c <= '9')) || /* !start */
3177
15.6M
       (c == '_') || (c == ':') ||
3178
15.6M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3179
15.6M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3180
15.6M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3181
15.6M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3182
15.6M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3183
15.6M
       ((c >= 0x370) && (c <= 0x37D)) ||
3184
15.6M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3185
15.6M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3186
15.6M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3187
15.6M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3188
15.6M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189
15.6M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190
15.6M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191
15.6M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192
15.6M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3193
15.0M
       return(1);
3194
15.7M
    } else {
3195
6.18M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3196
6.18M
            (c == '.') || (c == '-') ||
3197
6.18M
      (c == '_') || (c == ':') ||
3198
6.18M
      (IS_COMBINING(c)) ||
3199
6.18M
      (IS_EXTENDER(c)))
3200
5.82M
      return(1);
3201
6.18M
    }
3202
1.03M
    return(0);
3203
21.8M
}
3204
3205
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3206
                                          int *len, int *alloc, int normalize);
3207
3208
static const xmlChar *
3209
1.65M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3210
1.65M
    int len = 0, l;
3211
1.65M
    int c;
3212
1.65M
    int count = 0;
3213
1.65M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3214
464k
                    XML_MAX_TEXT_LENGTH :
3215
1.65M
                    XML_MAX_NAME_LENGTH;
3216
3217
#ifdef DEBUG
3218
    nbParseNameComplex++;
3219
#endif
3220
3221
    /*
3222
     * Handler for more complex cases
3223
     */
3224
1.65M
    GROW;
3225
1.65M
    if (ctxt->instate == XML_PARSER_EOF)
3226
0
        return(NULL);
3227
1.65M
    c = CUR_CHAR(l);
3228
1.65M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3229
        /*
3230
   * Use the new checks of production [4] [4a] amd [5] of the
3231
   * Update 5 of XML-1.0
3232
   */
3233
1.05M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3234
1.05M
      (!(((c >= 'a') && (c <= 'z')) ||
3235
1.04M
         ((c >= 'A') && (c <= 'Z')) ||
3236
1.04M
         (c == '_') || (c == ':') ||
3237
1.04M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3238
1.04M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3239
1.04M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3240
1.04M
         ((c >= 0x370) && (c <= 0x37D)) ||
3241
1.04M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3242
1.04M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3243
1.04M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3244
1.04M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3245
1.04M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3246
1.04M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3247
1.04M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3248
1.04M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3249
958k
      return(NULL);
3250
958k
  }
3251
100k
  len += l;
3252
100k
  NEXTL(l);
3253
100k
  c = CUR_CHAR(l);
3254
6.21M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3255
6.21M
         (((c >= 'a') && (c <= 'z')) ||
3256
6.17M
          ((c >= 'A') && (c <= 'Z')) ||
3257
6.17M
          ((c >= '0') && (c <= '9')) || /* !start */
3258
6.17M
          (c == '_') || (c == ':') ||
3259
6.17M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3260
6.17M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3261
6.17M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3262
6.17M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3263
6.17M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3264
6.17M
          ((c >= 0x370) && (c <= 0x37D)) ||
3265
6.17M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3266
6.17M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3267
6.17M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3268
6.17M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3269
6.17M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270
6.17M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271
6.17M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272
6.17M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273
6.17M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3274
6.17M
    )) {
3275
6.11M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3276
52.8k
    count = 0;
3277
52.8k
    GROW;
3278
52.8k
                if (ctxt->instate == XML_PARSER_EOF)
3279
0
                    return(NULL);
3280
52.8k
      }
3281
6.11M
            if (len <= INT_MAX - l)
3282
6.11M
          len += l;
3283
6.11M
      NEXTL(l);
3284
6.11M
      c = CUR_CHAR(l);
3285
6.11M
  }
3286
591k
    } else {
3287
591k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3288
591k
      (!IS_LETTER(c) && (c != '_') &&
3289
579k
       (c != ':'))) {
3290
517k
      return(NULL);
3291
517k
  }
3292
74.4k
  len += l;
3293
74.4k
  NEXTL(l);
3294
74.4k
  c = CUR_CHAR(l);
3295
3296
4.53M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3297
4.53M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298
4.50M
    (c == '.') || (c == '-') ||
3299
4.50M
    (c == '_') || (c == ':') ||
3300
4.50M
    (IS_COMBINING(c)) ||
3301
4.50M
    (IS_EXTENDER(c)))) {
3302
4.45M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3303
38.1k
    count = 0;
3304
38.1k
    GROW;
3305
38.1k
                if (ctxt->instate == XML_PARSER_EOF)
3306
0
                    return(NULL);
3307
38.1k
      }
3308
4.45M
            if (len <= INT_MAX - l)
3309
4.45M
          len += l;
3310
4.45M
      NEXTL(l);
3311
4.45M
      c = CUR_CHAR(l);
3312
4.45M
  }
3313
74.4k
    }
3314
174k
    if (len > maxLength) {
3315
17
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316
17
        return(NULL);
3317
17
    }
3318
174k
    if (ctxt->input->cur - ctxt->input->base < len) {
3319
        /*
3320
         * There were a couple of bugs where PERefs lead to to a change
3321
         * of the buffer. Check the buffer size to avoid passing an invalid
3322
         * pointer to xmlDictLookup.
3323
         */
3324
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3325
0
                    "unexpected change of input buffer");
3326
0
        return (NULL);
3327
0
    }
3328
174k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3329
1.19k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3330
173k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3331
174k
}
3332
3333
/**
3334
 * xmlParseName:
3335
 * @ctxt:  an XML parser context
3336
 *
3337
 * DEPRECATED: Internal function, don't use.
3338
 *
3339
 * parse an XML name.
3340
 *
3341
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3342
 *                  CombiningChar | Extender
3343
 *
3344
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3345
 *
3346
 * [6] Names ::= Name (#x20 Name)*
3347
 *
3348
 * Returns the Name parsed or NULL
3349
 */
3350
3351
const xmlChar *
3352
12.0M
xmlParseName(xmlParserCtxtPtr ctxt) {
3353
12.0M
    const xmlChar *in;
3354
12.0M
    const xmlChar *ret;
3355
12.0M
    size_t count = 0;
3356
12.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3357
4.67M
                       XML_MAX_TEXT_LENGTH :
3358
12.0M
                       XML_MAX_NAME_LENGTH;
3359
3360
12.0M
    GROW;
3361
3362
#ifdef DEBUG
3363
    nbParseName++;
3364
#endif
3365
3366
    /*
3367
     * Accelerator for simple ASCII names
3368
     */
3369
12.0M
    in = ctxt->input->cur;
3370
12.0M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3371
12.0M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3372
12.0M
  (*in == '_') || (*in == ':')) {
3373
10.5M
  in++;
3374
88.4M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3375
88.4M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3376
88.4M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3377
88.4M
         (*in == '_') || (*in == '-') ||
3378
88.4M
         (*in == ':') || (*in == '.'))
3379
77.8M
      in++;
3380
10.5M
  if ((*in > 0) && (*in < 0x80)) {
3381
10.4M
      count = in - ctxt->input->cur;
3382
10.4M
            if (count > maxLength) {
3383
9
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384
9
                return(NULL);
3385
9
            }
3386
10.4M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3387
10.4M
      ctxt->input->cur = in;
3388
10.4M
      ctxt->input->col += count;
3389
10.4M
      if (ret == NULL)
3390
0
          xmlErrMemory(ctxt, NULL);
3391
10.4M
      return(ret);
3392
10.4M
  }
3393
10.5M
    }
3394
    /* accelerator for special cases */
3395
1.65M
    return(xmlParseNameComplex(ctxt));
3396
12.0M
}
3397
3398
static const xmlChar *
3399
943k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3400
943k
    int len = 0, l;
3401
943k
    int c;
3402
943k
    int count = 0;
3403
943k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3404
326k
                    XML_MAX_TEXT_LENGTH :
3405
943k
                    XML_MAX_NAME_LENGTH;
3406
943k
    size_t startPosition = 0;
3407
3408
#ifdef DEBUG
3409
    nbParseNCNameComplex++;
3410
#endif
3411
3412
    /*
3413
     * Handler for more complex cases
3414
     */
3415
943k
    GROW;
3416
943k
    startPosition = CUR_PTR - BASE_PTR;
3417
943k
    c = CUR_CHAR(l);
3418
943k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
943k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
864k
  return(NULL);
3421
864k
    }
3422
3423
6.51M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
6.51M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
6.43M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3426
56.9k
      count = 0;
3427
56.9k
      GROW;
3428
56.9k
            if (ctxt->instate == XML_PARSER_EOF)
3429
0
                return(NULL);
3430
56.9k
  }
3431
6.43M
        if (len <= INT_MAX - l)
3432
6.43M
      len += l;
3433
6.43M
  NEXTL(l);
3434
6.43M
  c = CUR_CHAR(l);
3435
6.43M
  if (c == 0) {
3436
13.7k
      count = 0;
3437
      /*
3438
       * when shrinking to extend the buffer we really need to preserve
3439
       * the part of the name we already parsed. Hence rolling back
3440
       * by current length.
3441
       */
3442
13.7k
      ctxt->input->cur -= l;
3443
13.7k
      GROW;
3444
13.7k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
13.7k
      ctxt->input->cur += l;
3447
13.7k
      c = CUR_CHAR(l);
3448
13.7k
  }
3449
6.43M
    }
3450
78.2k
    if (len > maxLength) {
3451
17
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3452
17
        return(NULL);
3453
17
    }
3454
78.2k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3455
78.2k
}
3456
3457
/**
3458
 * xmlParseNCName:
3459
 * @ctxt:  an XML parser context
3460
 * @len:  length of the string parsed
3461
 *
3462
 * parse an XML name.
3463
 *
3464
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3465
 *                      CombiningChar | Extender
3466
 *
3467
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3468
 *
3469
 * Returns the Name parsed or NULL
3470
 */
3471
3472
static const xmlChar *
3473
13.0M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3474
13.0M
    const xmlChar *in, *e;
3475
13.0M
    const xmlChar *ret;
3476
13.0M
    size_t count = 0;
3477
13.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3478
3.96M
                       XML_MAX_TEXT_LENGTH :
3479
13.0M
                       XML_MAX_NAME_LENGTH;
3480
3481
#ifdef DEBUG
3482
    nbParseNCName++;
3483
#endif
3484
3485
    /*
3486
     * Accelerator for simple ASCII names
3487
     */
3488
13.0M
    in = ctxt->input->cur;
3489
13.0M
    e = ctxt->input->end;
3490
13.0M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3491
13.0M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3492
13.0M
   (*in == '_')) && (in < e)) {
3493
12.1M
  in++;
3494
60.9M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3495
60.9M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3496
60.9M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3497
60.9M
          (*in == '_') || (*in == '-') ||
3498
60.9M
          (*in == '.')) && (in < e))
3499
48.7M
      in++;
3500
12.1M
  if (in >= e)
3501
7.74k
      goto complex;
3502
12.1M
  if ((*in > 0) && (*in < 0x80)) {
3503
12.0M
      count = in - ctxt->input->cur;
3504
12.0M
            if (count > maxLength) {
3505
7
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3506
7
                return(NULL);
3507
7
            }
3508
12.0M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3509
12.0M
      ctxt->input->cur = in;
3510
12.0M
      ctxt->input->col += count;
3511
12.0M
      if (ret == NULL) {
3512
0
          xmlErrMemory(ctxt, NULL);
3513
0
      }
3514
12.0M
      return(ret);
3515
12.0M
  }
3516
12.1M
    }
3517
943k
complex:
3518
943k
    return(xmlParseNCNameComplex(ctxt));
3519
13.0M
}
3520
3521
/**
3522
 * xmlParseNameAndCompare:
3523
 * @ctxt:  an XML parser context
3524
 *
3525
 * parse an XML name and compares for match
3526
 * (specialized for endtag parsing)
3527
 *
3528
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3529
 * and the name for mismatch
3530
 */
3531
3532
static const xmlChar *
3533
2.33M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3534
2.33M
    register const xmlChar *cmp = other;
3535
2.33M
    register const xmlChar *in;
3536
2.33M
    const xmlChar *ret;
3537
3538
2.33M
    GROW;
3539
2.33M
    if (ctxt->instate == XML_PARSER_EOF)
3540
0
        return(NULL);
3541
3542
2.33M
    in = ctxt->input->cur;
3543
14.1M
    while (*in != 0 && *in == *cmp) {
3544
11.8M
  ++in;
3545
11.8M
  ++cmp;
3546
11.8M
    }
3547
2.33M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3548
  /* success */
3549
2.16M
  ctxt->input->col += in - ctxt->input->cur;
3550
2.16M
  ctxt->input->cur = in;
3551
2.16M
  return (const xmlChar*) 1;
3552
2.16M
    }
3553
    /* failure (or end of input buffer), check with full function */
3554
162k
    ret = xmlParseName (ctxt);
3555
    /* strings coming from the dictionary direct compare possible */
3556
162k
    if (ret == other) {
3557
15.5k
  return (const xmlChar*) 1;
3558
15.5k
    }
3559
146k
    return ret;
3560
162k
}
3561
3562
/**
3563
 * xmlParseStringName:
3564
 * @ctxt:  an XML parser context
3565
 * @str:  a pointer to the string pointer (IN/OUT)
3566
 *
3567
 * parse an XML name.
3568
 *
3569
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3570
 *                  CombiningChar | Extender
3571
 *
3572
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3573
 *
3574
 * [6] Names ::= Name (#x20 Name)*
3575
 *
3576
 * Returns the Name parsed or NULL. The @str pointer
3577
 * is updated to the current location in the string.
3578
 */
3579
3580
static xmlChar *
3581
876k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3582
876k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3583
876k
    const xmlChar *cur = *str;
3584
876k
    int len = 0, l;
3585
876k
    int c;
3586
876k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3587
524k
                    XML_MAX_TEXT_LENGTH :
3588
876k
                    XML_MAX_NAME_LENGTH;
3589
3590
#ifdef DEBUG
3591
    nbParseStringName++;
3592
#endif
3593
3594
876k
    c = CUR_SCHAR(cur, l);
3595
876k
    if (!xmlIsNameStartChar(ctxt, c)) {
3596
93.5k
  return(NULL);
3597
93.5k
    }
3598
3599
782k
    COPY_BUF(l,buf,len,c);
3600
782k
    cur += l;
3601
782k
    c = CUR_SCHAR(cur, l);
3602
7.06M
    while (xmlIsNameChar(ctxt, c)) {
3603
6.28M
  COPY_BUF(l,buf,len,c);
3604
6.28M
  cur += l;
3605
6.28M
  c = CUR_SCHAR(cur, l);
3606
6.28M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3607
      /*
3608
       * Okay someone managed to make a huge name, so he's ready to pay
3609
       * for the processing speed.
3610
       */
3611
4.94k
      xmlChar *buffer;
3612
4.94k
      int max = len * 2;
3613
3614
4.94k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3615
4.94k
      if (buffer == NULL) {
3616
0
          xmlErrMemory(ctxt, NULL);
3617
0
    return(NULL);
3618
0
      }
3619
4.94k
      memcpy(buffer, buf, len);
3620
2.48M
      while (xmlIsNameChar(ctxt, c)) {
3621
2.47M
    if (len + 10 > max) {
3622
6.45k
        xmlChar *tmp;
3623
3624
6.45k
        max *= 2;
3625
6.45k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3626
6.45k
        if (tmp == NULL) {
3627
0
      xmlErrMemory(ctxt, NULL);
3628
0
      xmlFree(buffer);
3629
0
      return(NULL);
3630
0
        }
3631
6.45k
        buffer = tmp;
3632
6.45k
    }
3633
2.47M
    COPY_BUF(l,buffer,len,c);
3634
2.47M
    cur += l;
3635
2.47M
    c = CUR_SCHAR(cur, l);
3636
2.47M
                if (len > maxLength) {
3637
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638
0
                    xmlFree(buffer);
3639
0
                    return(NULL);
3640
0
                }
3641
2.47M
      }
3642
4.94k
      buffer[len] = 0;
3643
4.94k
      *str = cur;
3644
4.94k
      return(buffer);
3645
4.94k
  }
3646
6.28M
    }
3647
777k
    if (len > maxLength) {
3648
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3649
0
        return(NULL);
3650
0
    }
3651
777k
    *str = cur;
3652
777k
    return(xmlStrndup(buf, len));
3653
777k
}
3654
3655
/**
3656
 * xmlParseNmtoken:
3657
 * @ctxt:  an XML parser context
3658
 *
3659
 * DEPRECATED: Internal function, don't use.
3660
 *
3661
 * parse an XML Nmtoken.
3662
 *
3663
 * [7] Nmtoken ::= (NameChar)+
3664
 *
3665
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3666
 *
3667
 * Returns the Nmtoken parsed or NULL
3668
 */
3669
3670
xmlChar *
3671
213k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3672
213k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3673
213k
    int len = 0, l;
3674
213k
    int c;
3675
213k
    int count = 0;
3676
213k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3677
58.5k
                    XML_MAX_TEXT_LENGTH :
3678
213k
                    XML_MAX_NAME_LENGTH;
3679
3680
#ifdef DEBUG
3681
    nbParseNmToken++;
3682
#endif
3683
3684
213k
    GROW;
3685
213k
    if (ctxt->instate == XML_PARSER_EOF)
3686
0
        return(NULL);
3687
213k
    c = CUR_CHAR(l);
3688
3689
1.39M
    while (xmlIsNameChar(ctxt, c)) {
3690
1.18M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3691
0
      count = 0;
3692
0
      GROW;
3693
0
  }
3694
1.18M
  COPY_BUF(l,buf,len,c);
3695
1.18M
  NEXTL(l);
3696
1.18M
  c = CUR_CHAR(l);
3697
1.18M
  if (c == 0) {
3698
495
      count = 0;
3699
495
      GROW;
3700
495
      if (ctxt->instate == XML_PARSER_EOF)
3701
0
    return(NULL);
3702
495
            c = CUR_CHAR(l);
3703
495
  }
3704
1.18M
  if (len >= XML_MAX_NAMELEN) {
3705
      /*
3706
       * Okay someone managed to make a huge token, so he's ready to pay
3707
       * for the processing speed.
3708
       */
3709
1.58k
      xmlChar *buffer;
3710
1.58k
      int max = len * 2;
3711
3712
1.58k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3713
1.58k
      if (buffer == NULL) {
3714
0
          xmlErrMemory(ctxt, NULL);
3715
0
    return(NULL);
3716
0
      }
3717
1.58k
      memcpy(buffer, buf, len);
3718
4.48M
      while (xmlIsNameChar(ctxt, c)) {
3719
4.47M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3720
44.6k
        count = 0;
3721
44.6k
        GROW;
3722
44.6k
                    if (ctxt->instate == XML_PARSER_EOF) {
3723
0
                        xmlFree(buffer);
3724
0
                        return(NULL);
3725
0
                    }
3726
44.6k
    }
3727
4.47M
    if (len + 10 > max) {
3728
4.46k
        xmlChar *tmp;
3729
3730
4.46k
        max *= 2;
3731
4.46k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3732
4.46k
        if (tmp == NULL) {
3733
0
      xmlErrMemory(ctxt, NULL);
3734
0
      xmlFree(buffer);
3735
0
      return(NULL);
3736
0
        }
3737
4.46k
        buffer = tmp;
3738
4.46k
    }
3739
4.47M
    COPY_BUF(l,buffer,len,c);
3740
4.47M
    NEXTL(l);
3741
4.47M
    c = CUR_CHAR(l);
3742
4.47M
                if (len > maxLength) {
3743
3
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3744
3
                    xmlFree(buffer);
3745
3
                    return(NULL);
3746
3
                }
3747
4.47M
      }
3748
1.58k
      buffer[len] = 0;
3749
1.58k
      return(buffer);
3750
1.58k
  }
3751
1.18M
    }
3752
211k
    if (len == 0)
3753
7.32k
        return(NULL);
3754
204k
    if (len > maxLength) {
3755
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3756
0
        return(NULL);
3757
0
    }
3758
204k
    return(xmlStrndup(buf, len));
3759
204k
}
3760
3761
/**
3762
 * xmlParseEntityValue:
3763
 * @ctxt:  an XML parser context
3764
 * @orig:  if non-NULL store a copy of the original entity value
3765
 *
3766
 * DEPRECATED: Internal function, don't use.
3767
 *
3768
 * parse a value for ENTITY declarations
3769
 *
3770
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3771
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3772
 *
3773
 * Returns the EntityValue parsed with reference substituted or NULL
3774
 */
3775
3776
xmlChar *
3777
788k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3778
788k
    xmlChar *buf = NULL;
3779
788k
    int len = 0;
3780
788k
    int size = XML_PARSER_BUFFER_SIZE;
3781
788k
    int c, l;
3782
788k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3783
221k
                    XML_MAX_HUGE_LENGTH :
3784
788k
                    XML_MAX_TEXT_LENGTH;
3785
788k
    xmlChar stop;
3786
788k
    xmlChar *ret = NULL;
3787
788k
    const xmlChar *cur = NULL;
3788
788k
    xmlParserInputPtr input;
3789
3790
788k
    if (RAW == '"') stop = '"';
3791
129k
    else if (RAW == '\'') stop = '\'';
3792
0
    else {
3793
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3794
0
  return(NULL);
3795
0
    }
3796
788k
    buf = (xmlChar *) xmlMallocAtomic(size);
3797
788k
    if (buf == NULL) {
3798
0
  xmlErrMemory(ctxt, NULL);
3799
0
  return(NULL);
3800
0
    }
3801
3802
    /*
3803
     * The content of the entity definition is copied in a buffer.
3804
     */
3805
3806
788k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3807
788k
    input = ctxt->input;
3808
788k
    GROW;
3809
788k
    if (ctxt->instate == XML_PARSER_EOF)
3810
0
        goto error;
3811
788k
    NEXT;
3812
788k
    c = CUR_CHAR(l);
3813
    /*
3814
     * NOTE: 4.4.5 Included in Literal
3815
     * When a parameter entity reference appears in a literal entity
3816
     * value, ... a single or double quote character in the replacement
3817
     * text is always treated as a normal data character and will not
3818
     * terminate the literal.
3819
     * In practice it means we stop the loop only when back at parsing
3820
     * the initial entity and the quote is found
3821
     */
3822
26.7M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3823
26.7M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3824
25.9M
  if (len + 5 >= size) {
3825
58.2k
      xmlChar *tmp;
3826
3827
58.2k
      size *= 2;
3828
58.2k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3829
58.2k
      if (tmp == NULL) {
3830
0
    xmlErrMemory(ctxt, NULL);
3831
0
                goto error;
3832
0
      }
3833
58.2k
      buf = tmp;
3834
58.2k
  }
3835
25.9M
  COPY_BUF(l,buf,len,c);
3836
25.9M
  NEXTL(l);
3837
3838
25.9M
  GROW;
3839
25.9M
  c = CUR_CHAR(l);
3840
25.9M
  if (c == 0) {
3841
1.33k
      GROW;
3842
1.33k
      c = CUR_CHAR(l);
3843
1.33k
  }
3844
3845
25.9M
        if (len > maxLength) {
3846
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3847
0
                           "entity value too long\n");
3848
0
            goto error;
3849
0
        }
3850
25.9M
    }
3851
788k
    buf[len] = 0;
3852
788k
    if (ctxt->instate == XML_PARSER_EOF)
3853
0
        goto error;
3854
788k
    if (c != stop) {
3855
2.01k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3856
2.01k
        goto error;
3857
2.01k
    }
3858
786k
    NEXT;
3859
3860
    /*
3861
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3862
     * reference constructs. Note Charref will be handled in
3863
     * xmlStringDecodeEntities()
3864
     */
3865
786k
    cur = buf;
3866
20.7M
    while (*cur != 0) { /* non input consuming */
3867
20.0M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3868
233k
      xmlChar *name;
3869
233k
      xmlChar tmp = *cur;
3870
233k
            int nameOk = 0;
3871
3872
233k
      cur++;
3873
233k
      name = xmlParseStringName(ctxt, &cur);
3874
233k
            if (name != NULL) {
3875
232k
                nameOk = 1;
3876
232k
                xmlFree(name);
3877
232k
            }
3878
233k
            if ((nameOk == 0) || (*cur != ';')) {
3879
4.10k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3880
4.10k
      "EntityValue: '%c' forbidden except for entities references\n",
3881
4.10k
                            tmp);
3882
4.10k
                goto error;
3883
4.10k
      }
3884
229k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3885
229k
    (ctxt->inputNr == 1)) {
3886
178
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3887
178
                goto error;
3888
178
      }
3889
229k
      if (*cur == 0)
3890
0
          break;
3891
229k
  }
3892
19.9M
  cur++;
3893
19.9M
    }
3894
3895
    /*
3896
     * Then PEReference entities are substituted.
3897
     *
3898
     * NOTE: 4.4.7 Bypassed
3899
     * When a general entity reference appears in the EntityValue in
3900
     * an entity declaration, it is bypassed and left as is.
3901
     * so XML_SUBSTITUTE_REF is not set here.
3902
     */
3903
782k
    ++ctxt->depth;
3904
782k
    ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3905
782k
                                  0, 0, 0);
3906
782k
    --ctxt->depth;
3907
782k
    if (orig != NULL) {
3908
782k
        *orig = buf;
3909
782k
        buf = NULL;
3910
782k
    }
3911
3912
788k
error:
3913
788k
    if (buf != NULL)
3914
6.29k
        xmlFree(buf);
3915
788k
    return(ret);
3916
782k
}
3917
3918
/**
3919
 * xmlParseAttValueComplex:
3920
 * @ctxt:  an XML parser context
3921
 * @len:   the resulting attribute len
3922
 * @normalize:  whether to apply the inner normalization
3923
 *
3924
 * parse a value for an attribute, this is the fallback function
3925
 * of xmlParseAttValue() when the attribute parsing requires handling
3926
 * of non-ASCII characters, or normalization compaction.
3927
 *
3928
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3929
 */
3930
static xmlChar *
3931
394k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3932
394k
    xmlChar limit = 0;
3933
394k
    xmlChar *buf = NULL;
3934
394k
    xmlChar *rep = NULL;
3935
394k
    size_t len = 0;
3936
394k
    size_t buf_size = 0;
3937
394k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3938
136k
                       XML_MAX_HUGE_LENGTH :
3939
394k
                       XML_MAX_TEXT_LENGTH;
3940
394k
    int c, l, in_space = 0;
3941
394k
    xmlChar *current = NULL;
3942
394k
    xmlEntityPtr ent;
3943
3944
394k
    if (NXT(0) == '"') {
3945
269k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3946
269k
  limit = '"';
3947
269k
        NEXT;
3948
269k
    } else if (NXT(0) == '\'') {
3949
124k
  limit = '\'';
3950
124k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3951
124k
        NEXT;
3952
124k
    } else {
3953
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3954
0
  return(NULL);
3955
0
    }
3956
3957
    /*
3958
     * allocate a translation buffer.
3959
     */
3960
394k
    buf_size = XML_PARSER_BUFFER_SIZE;
3961
394k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3962
394k
    if (buf == NULL) goto mem_error;
3963
3964
    /*
3965
     * OK loop until we reach one of the ending char or a size limit.
3966
     */
3967
394k
    c = CUR_CHAR(l);
3968
28.2M
    while (((NXT(0) != limit) && /* checked */
3969
28.2M
            (IS_CHAR(c)) && (c != '<')) &&
3970
28.2M
            (ctxt->instate != XML_PARSER_EOF)) {
3971
27.8M
  if (c == '&') {
3972
890k
      in_space = 0;
3973
890k
      if (NXT(1) == '#') {
3974
274k
    int val = xmlParseCharRef(ctxt);
3975
3976
274k
    if (val == '&') {
3977
1.28k
        if (ctxt->replaceEntities) {
3978
537
      if (len + 10 > buf_size) {
3979
26
          growBuffer(buf, 10);
3980
26
      }
3981
537
      buf[len++] = '&';
3982
750
        } else {
3983
      /*
3984
       * The reparsing will be done in xmlStringGetNodeList()
3985
       * called by the attribute() function in SAX.c
3986
       */
3987
750
      if (len + 10 > buf_size) {
3988
30
          growBuffer(buf, 10);
3989
30
      }
3990
750
      buf[len++] = '&';
3991
750
      buf[len++] = '#';
3992
750
      buf[len++] = '3';
3993
750
      buf[len++] = '8';
3994
750
      buf[len++] = ';';
3995
750
        }
3996
272k
    } else if (val != 0) {
3997
255k
        if (len + 10 > buf_size) {
3998
1.96k
      growBuffer(buf, 10);
3999
1.96k
        }
4000
255k
        len += xmlCopyChar(0, &buf[len], val);
4001
255k
    }
4002
616k
      } else {
4003
616k
    ent = xmlParseEntityRef(ctxt);
4004
616k
    ctxt->nbentities++;
4005
616k
    if (ent != NULL)
4006
233k
        ctxt->nbentities += ent->owner;
4007
616k
    if ((ent != NULL) &&
4008
616k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4009
85.4k
        if (len + 10 > buf_size) {
4010
52
      growBuffer(buf, 10);
4011
52
        }
4012
85.4k
        if ((ctxt->replaceEntities == 0) &&
4013
85.4k
            (ent->content[0] == '&')) {
4014
25.4k
      buf[len++] = '&';
4015
25.4k
      buf[len++] = '#';
4016
25.4k
      buf[len++] = '3';
4017
25.4k
      buf[len++] = '8';
4018
25.4k
      buf[len++] = ';';
4019
59.9k
        } else {
4020
59.9k
      buf[len++] = ent->content[0];
4021
59.9k
        }
4022
530k
    } else if ((ent != NULL) &&
4023
530k
               (ctxt->replaceEntities != 0)) {
4024
71.2k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4025
71.2k
      ++ctxt->depth;
4026
71.2k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4027
71.2k
                  XML_SUBSTITUTE_REF,
4028
71.2k
                  0, 0, 0);
4029
71.2k
      --ctxt->depth;
4030
71.2k
      if (rep != NULL) {
4031
69.6k
          current = rep;
4032
1.77M
          while (*current != 0) { /* non input consuming */
4033
1.70M
                                if ((*current == 0xD) || (*current == 0xA) ||
4034
1.70M
                                    (*current == 0x9)) {
4035
76.5k
                                    buf[len++] = 0x20;
4036
76.5k
                                    current++;
4037
76.5k
                                } else
4038
1.63M
                                    buf[len++] = *current++;
4039
1.70M
        if (len + 10 > buf_size) {
4040
2.80k
            growBuffer(buf, 10);
4041
2.80k
        }
4042
1.70M
          }
4043
69.6k
          xmlFree(rep);
4044
69.6k
          rep = NULL;
4045
69.6k
      }
4046
71.2k
        } else {
4047
0
      if (len + 10 > buf_size) {
4048
0
          growBuffer(buf, 10);
4049
0
      }
4050
0
      if (ent->content != NULL)
4051
0
          buf[len++] = ent->content[0];
4052
0
        }
4053
459k
    } else if (ent != NULL) {
4054
76.9k
        int i = xmlStrlen(ent->name);
4055
76.9k
        const xmlChar *cur = ent->name;
4056
4057
        /*
4058
         * This may look absurd but is needed to detect
4059
         * entities problems
4060
         */
4061
76.9k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4062
76.9k
      (ent->content != NULL) && (ent->checked == 0)) {
4063
17.4k
      unsigned long oldnbent = ctxt->nbentities, diff;
4064
4065
17.4k
      ++ctxt->depth;
4066
17.4k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4067
17.4k
              XML_SUBSTITUTE_REF, 0, 0, 0);
4068
17.4k
      --ctxt->depth;
4069
4070
17.4k
                        diff = ctxt->nbentities - oldnbent + 1;
4071
17.4k
                        if (diff > INT_MAX / 2)
4072
0
                            diff = INT_MAX / 2;
4073
17.4k
                        ent->checked = diff * 2;
4074
17.4k
      if (rep != NULL) {
4075
17.3k
          if (xmlStrchr(rep, '<'))
4076
520
              ent->checked |= 1;
4077
17.3k
          xmlFree(rep);
4078
17.3k
          rep = NULL;
4079
17.3k
      } else {
4080
96
                            ent->content[0] = 0;
4081
96
                        }
4082
17.4k
        }
4083
4084
        /*
4085
         * Just output the reference
4086
         */
4087
76.9k
        buf[len++] = '&';
4088
77.1k
        while (len + i + 10 > buf_size) {
4089
458
      growBuffer(buf, i + 10);
4090
458
        }
4091
418k
        for (;i > 0;i--)
4092
341k
      buf[len++] = *cur++;
4093
76.9k
        buf[len++] = ';';
4094
76.9k
    }
4095
616k
      }
4096
27.0M
  } else {
4097
27.0M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4098
4.72M
          if ((len != 0) || (!normalize)) {
4099
4.67M
        if ((!normalize) || (!in_space)) {
4100
4.62M
      COPY_BUF(l,buf,len,0x20);
4101
4.63M
      while (len + 10 > buf_size) {
4102
7.75k
          growBuffer(buf, 10);
4103
7.75k
      }
4104
4.62M
        }
4105
4.67M
        in_space = 1;
4106
4.67M
    }
4107
22.2M
      } else {
4108
22.2M
          in_space = 0;
4109
22.2M
    COPY_BUF(l,buf,len,c);
4110
22.2M
    if (len + 10 > buf_size) {
4111
52.2k
        growBuffer(buf, 10);
4112
52.2k
    }
4113
22.2M
      }
4114
27.0M
      NEXTL(l);
4115
27.0M
  }
4116
27.8M
  GROW;
4117
27.8M
  c = CUR_CHAR(l);
4118
27.8M
        if (len > maxLength) {
4119
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4120
0
                           "AttValue length too long\n");
4121
0
            goto mem_error;
4122
0
        }
4123
27.8M
    }
4124
394k
    if (ctxt->instate == XML_PARSER_EOF)
4125
0
        goto error;
4126
4127
394k
    if ((in_space) && (normalize)) {
4128
41.2k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4129
19.9k
    }
4130
394k
    buf[len] = 0;
4131
394k
    if (RAW == '<') {
4132
58.2k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4133
336k
    } else if (RAW != limit) {
4134
43.7k
  if ((c != 0) && (!IS_CHAR(c))) {
4135
20.2k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4136
20.2k
         "invalid character in attribute value\n");
4137
23.5k
  } else {
4138
23.5k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4139
23.5k
         "AttValue: ' expected\n");
4140
23.5k
        }
4141
43.7k
    } else
4142
292k
  NEXT;
4143
4144
394k
    if (attlen != NULL) *attlen = len;
4145
394k
    return(buf);
4146
4147
0
mem_error:
4148
0
    xmlErrMemory(ctxt, NULL);
4149
0
error:
4150
0
    if (buf != NULL)
4151
0
        xmlFree(buf);
4152
0
    if (rep != NULL)
4153
0
        xmlFree(rep);
4154
0
    return(NULL);
4155
0
}
4156
4157
/**
4158
 * xmlParseAttValue:
4159
 * @ctxt:  an XML parser context
4160
 *
4161
 * DEPRECATED: Internal function, don't use.
4162
 *
4163
 * parse a value for an attribute
4164
 * Note: the parser won't do substitution of entities here, this
4165
 * will be handled later in xmlStringGetNodeList
4166
 *
4167
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4168
 *                   "'" ([^<&'] | Reference)* "'"
4169
 *
4170
 * 3.3.3 Attribute-Value Normalization:
4171
 * Before the value of an attribute is passed to the application or
4172
 * checked for validity, the XML processor must normalize it as follows:
4173
 * - a character reference is processed by appending the referenced
4174
 *   character to the attribute value
4175
 * - an entity reference is processed by recursively processing the
4176
 *   replacement text of the entity
4177
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4178
 *   appending #x20 to the normalized value, except that only a single
4179
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4180
 *   parsed entity or the literal entity value of an internal parsed entity
4181
 * - other characters are processed by appending them to the normalized value
4182
 * If the declared value is not CDATA, then the XML processor must further
4183
 * process the normalized attribute value by discarding any leading and
4184
 * trailing space (#x20) characters, and by replacing sequences of space
4185
 * (#x20) characters by a single space (#x20) character.
4186
 * All attributes for which no declaration has been read should be treated
4187
 * by a non-validating parser as if declared CDATA.
4188
 *
4189
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4190
 */
4191
4192
4193
xmlChar *
4194
1.39M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4195
1.39M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4196
1.39M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4197
1.39M
}
4198
4199
/**
4200
 * xmlParseSystemLiteral:
4201
 * @ctxt:  an XML parser context
4202
 *
4203
 * DEPRECATED: Internal function, don't use.
4204
 *
4205
 * parse an XML Literal
4206
 *
4207
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4208
 *
4209
 * Returns the SystemLiteral parsed or NULL
4210
 */
4211
4212
xmlChar *
4213
155k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4214
155k
    xmlChar *buf = NULL;
4215
155k
    int len = 0;
4216
155k
    int size = XML_PARSER_BUFFER_SIZE;
4217
155k
    int cur, l;
4218
155k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4219
40.6k
                    XML_MAX_TEXT_LENGTH :
4220
155k
                    XML_MAX_NAME_LENGTH;
4221
155k
    xmlChar stop;
4222
155k
    int state = ctxt->instate;
4223
155k
    int count = 0;
4224
4225
155k
    SHRINK;
4226
155k
    if (RAW == '"') {
4227
145k
        NEXT;
4228
145k
  stop = '"';
4229
145k
    } else if (RAW == '\'') {
4230
6.29k
        NEXT;
4231
6.29k
  stop = '\'';
4232
6.29k
    } else {
4233
3.54k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4234
3.54k
  return(NULL);
4235
3.54k
    }
4236
4237
151k
    buf = (xmlChar *) xmlMallocAtomic(size);
4238
151k
    if (buf == NULL) {
4239
0
        xmlErrMemory(ctxt, NULL);
4240
0
  return(NULL);
4241
0
    }
4242
151k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4243
151k
    cur = CUR_CHAR(l);
4244
4.49M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4245
4.34M
  if (len + 5 >= size) {
4246
3.82k
      xmlChar *tmp;
4247
4248
3.82k
      size *= 2;
4249
3.82k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4250
3.82k
      if (tmp == NULL) {
4251
0
          xmlFree(buf);
4252
0
    xmlErrMemory(ctxt, NULL);
4253
0
    ctxt->instate = (xmlParserInputState) state;
4254
0
    return(NULL);
4255
0
      }
4256
3.82k
      buf = tmp;
4257
3.82k
  }
4258
4.34M
  count++;
4259
4.34M
  if (count > 50) {
4260
27.4k
      SHRINK;
4261
27.4k
      GROW;
4262
27.4k
      count = 0;
4263
27.4k
            if (ctxt->instate == XML_PARSER_EOF) {
4264
0
          xmlFree(buf);
4265
0
    return(NULL);
4266
0
            }
4267
27.4k
  }
4268
4.34M
  COPY_BUF(l,buf,len,cur);
4269
4.34M
  NEXTL(l);
4270
4.34M
  cur = CUR_CHAR(l);
4271
4.34M
  if (cur == 0) {
4272
2.61k
      GROW;
4273
2.61k
      SHRINK;
4274
2.61k
      cur = CUR_CHAR(l);
4275
2.61k
  }
4276
4.34M
        if (len > maxLength) {
4277
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4278
0
            xmlFree(buf);
4279
0
            ctxt->instate = (xmlParserInputState) state;
4280
0
            return(NULL);
4281
0
        }
4282
4.34M
    }
4283
151k
    buf[len] = 0;
4284
151k
    ctxt->instate = (xmlParserInputState) state;
4285
151k
    if (!IS_CHAR(cur)) {
4286
3.28k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4287
148k
    } else {
4288
148k
  NEXT;
4289
148k
    }
4290
151k
    return(buf);
4291
151k
}
4292
4293
/**
4294
 * xmlParsePubidLiteral:
4295
 * @ctxt:  an XML parser context
4296
 *
4297
 * DEPRECATED: Internal function, don't use.
4298
 *
4299
 * parse an XML public literal
4300
 *
4301
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4302
 *
4303
 * Returns the PubidLiteral parsed or NULL.
4304
 */
4305
4306
xmlChar *
4307
44.2k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4308
44.2k
    xmlChar *buf = NULL;
4309
44.2k
    int len = 0;
4310
44.2k
    int size = XML_PARSER_BUFFER_SIZE;
4311
44.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4312
12.1k
                    XML_MAX_TEXT_LENGTH :
4313
44.2k
                    XML_MAX_NAME_LENGTH;
4314
44.2k
    xmlChar cur;
4315
44.2k
    xmlChar stop;
4316
44.2k
    int count = 0;
4317
44.2k
    xmlParserInputState oldstate = ctxt->instate;
4318
4319
44.2k
    SHRINK;
4320
44.2k
    if (RAW == '"') {
4321
40.8k
        NEXT;
4322
40.8k
  stop = '"';
4323
40.8k
    } else if (RAW == '\'') {
4324
2.94k
        NEXT;
4325
2.94k
  stop = '\'';
4326
2.94k
    } else {
4327
472
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4328
472
  return(NULL);
4329
472
    }
4330
43.7k
    buf = (xmlChar *) xmlMallocAtomic(size);
4331
43.7k
    if (buf == NULL) {
4332
0
  xmlErrMemory(ctxt, NULL);
4333
0
  return(NULL);
4334
0
    }
4335
43.7k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4336
43.7k
    cur = CUR;
4337
1.95M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4338
1.90M
  if (len + 1 >= size) {
4339
2.26k
      xmlChar *tmp;
4340
4341
2.26k
      size *= 2;
4342
2.26k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4343
2.26k
      if (tmp == NULL) {
4344
0
    xmlErrMemory(ctxt, NULL);
4345
0
    xmlFree(buf);
4346
0
    return(NULL);
4347
0
      }
4348
2.26k
      buf = tmp;
4349
2.26k
  }
4350
1.90M
  buf[len++] = cur;
4351
1.90M
  count++;
4352
1.90M
  if (count > 50) {
4353
16.3k
      SHRINK;
4354
16.3k
      GROW;
4355
16.3k
      count = 0;
4356
16.3k
            if (ctxt->instate == XML_PARSER_EOF) {
4357
0
    xmlFree(buf);
4358
0
    return(NULL);
4359
0
            }
4360
16.3k
  }
4361
1.90M
  NEXT;
4362
1.90M
  cur = CUR;
4363
1.90M
  if (cur == 0) {
4364
473
      GROW;
4365
473
      SHRINK;
4366
473
      cur = CUR;
4367
473
  }
4368
1.90M
        if (len > maxLength) {
4369
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4370
0
            xmlFree(buf);
4371
0
            return(NULL);
4372
0
        }
4373
1.90M
    }
4374
43.7k
    buf[len] = 0;
4375
43.7k
    if (cur != stop) {
4376
1.89k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4377
41.8k
    } else {
4378
41.8k
  NEXT;
4379
41.8k
    }
4380
43.7k
    ctxt->instate = oldstate;
4381
43.7k
    return(buf);
4382
43.7k
}
4383
4384
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4385
4386
/*
4387
 * used for the test in the inner loop of the char data testing
4388
 */
4389
static const unsigned char test_char_data[256] = {
4390
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4392
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4395
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4396
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4397
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4398
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4399
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4400
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4401
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4402
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4403
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4404
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4405
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4406
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4407
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4416
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4417
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4418
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4420
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4422
};
4423
4424
/**
4425
 * xmlParseCharData:
4426
 * @ctxt:  an XML parser context
4427
 * @cdata:  int indicating whether we are within a CDATA section
4428
 *
4429
 * DEPRECATED: Internal function, don't use.
4430
 *
4431
 * parse a CharData section.
4432
 * if we are within a CDATA section ']]>' marks an end of section.
4433
 *
4434
 * The right angle bracket (>) may be represented using the string "&gt;",
4435
 * and must, for compatibility, be escaped using "&gt;" or a character
4436
 * reference when it appears in the string "]]>" in content, when that
4437
 * string is not marking the end of a CDATA section.
4438
 *
4439
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4440
 */
4441
4442
void
4443
15.1M
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4444
15.1M
    const xmlChar *in;
4445
15.1M
    int nbchar = 0;
4446
15.1M
    int line = ctxt->input->line;
4447
15.1M
    int col = ctxt->input->col;
4448
15.1M
    int ccol;
4449
4450
15.1M
    SHRINK;
4451
15.1M
    GROW;
4452
    /*
4453
     * Accelerated common case where input don't need to be
4454
     * modified before passing it to the handler.
4455
     */
4456
15.1M
    if (!cdata) {
4457
15.1M
  in = ctxt->input->cur;
4458
17.0M
  do {
4459
22.2M
get_more_space:
4460
51.8M
      while (*in == 0x20) { in++; ctxt->input->col++; }
4461
22.2M
      if (*in == 0xA) {
4462
5.75M
    do {
4463
5.75M
        ctxt->input->line++; ctxt->input->col = 1;
4464
5.75M
        in++;
4465
5.75M
    } while (*in == 0xA);
4466
5.18M
    goto get_more_space;
4467
5.18M
      }
4468
17.0M
      if (*in == '<') {
4469
4.70M
    nbchar = in - ctxt->input->cur;
4470
4.70M
    if (nbchar > 0) {
4471
4.68M
        const xmlChar *tmp = ctxt->input->cur;
4472
4.68M
        ctxt->input->cur = in;
4473
4474
4.68M
        if ((ctxt->sax != NULL) &&
4475
4.68M
            (ctxt->sax->ignorableWhitespace !=
4476
4.68M
             ctxt->sax->characters)) {
4477
1.84M
      if (areBlanks(ctxt, tmp, nbchar, 1)) {
4478
1.57M
          if (ctxt->sax->ignorableWhitespace != NULL)
4479
1.57M
        ctxt->sax->ignorableWhitespace(ctxt->userData,
4480
1.57M
                   tmp, nbchar);
4481
1.57M
      } else {
4482
267k
          if (ctxt->sax->characters != NULL)
4483
267k
        ctxt->sax->characters(ctxt->userData,
4484
267k
                  tmp, nbchar);
4485
267k
          if (*ctxt->space == -1)
4486
71.6k
              *ctxt->space = -2;
4487
267k
      }
4488
2.83M
        } else if ((ctxt->sax != NULL) &&
4489
2.83M
                   (ctxt->sax->characters != NULL)) {
4490
2.83M
      ctxt->sax->characters(ctxt->userData,
4491
2.83M
                tmp, nbchar);
4492
2.83M
        }
4493
4.68M
    }
4494
4.70M
    return;
4495
4.70M
      }
4496
4497
15.8M
get_more:
4498
15.8M
            ccol = ctxt->input->col;
4499
215M
      while (test_char_data[*in]) {
4500
199M
    in++;
4501
199M
    ccol++;
4502
199M
      }
4503
15.8M
      ctxt->input->col = ccol;
4504
15.8M
      if (*in == 0xA) {
4505
3.44M
    do {
4506
3.44M
        ctxt->input->line++; ctxt->input->col = 1;
4507
3.44M
        in++;
4508
3.44M
    } while (*in == 0xA);
4509
3.41M
    goto get_more;
4510
3.41M
      }
4511
12.4M
      if (*in == ']') {
4512
115k
    if ((in[1] == ']') && (in[2] == '>')) {
4513
2.47k
        xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4514
2.47k
        ctxt->input->cur = in + 1;
4515
2.47k
        return;
4516
2.47k
    }
4517
112k
    in++;
4518
112k
    ctxt->input->col++;
4519
112k
    goto get_more;
4520
115k
      }
4521
12.3M
      nbchar = in - ctxt->input->cur;
4522
12.3M
      if (nbchar > 0) {
4523
5.17M
    if ((ctxt->sax != NULL) &&
4524
5.17M
        (ctxt->sax->ignorableWhitespace !=
4525
5.17M
         ctxt->sax->characters) &&
4526
5.17M
        (IS_BLANK_CH(*ctxt->input->cur))) {
4527
736k
        const xmlChar *tmp = ctxt->input->cur;
4528
736k
        ctxt->input->cur = in;
4529
4530
736k
        if (areBlanks(ctxt, tmp, nbchar, 0)) {
4531
354k
            if (ctxt->sax->ignorableWhitespace != NULL)
4532
354k
          ctxt->sax->ignorableWhitespace(ctxt->userData,
4533
354k
                 tmp, nbchar);
4534
382k
        } else {
4535
382k
            if (ctxt->sax->characters != NULL)
4536
382k
          ctxt->sax->characters(ctxt->userData,
4537
382k
              tmp, nbchar);
4538
382k
      if (*ctxt->space == -1)
4539
223k
          *ctxt->space = -2;
4540
382k
        }
4541
736k
                    line = ctxt->input->line;
4542
736k
                    col = ctxt->input->col;
4543
4.44M
    } else if (ctxt->sax != NULL) {
4544
4.44M
        if (ctxt->sax->characters != NULL)
4545
4.44M
      ctxt->sax->characters(ctxt->userData,
4546
4.44M
                ctxt->input->cur, nbchar);
4547
4.44M
                    line = ctxt->input->line;
4548
4.44M
                    col = ctxt->input->col;
4549
4.44M
    }
4550
                /* something really bad happened in the SAX callback */
4551
5.17M
                if (ctxt->instate != XML_PARSER_CONTENT)
4552
0
                    return;
4553
5.17M
      }
4554
12.3M
      ctxt->input->cur = in;
4555
12.3M
      if (*in == 0xD) {
4556
1.88M
    in++;
4557
1.88M
    if (*in == 0xA) {
4558
1.84M
        ctxt->input->cur = in;
4559
1.84M
        in++;
4560
1.84M
        ctxt->input->line++; ctxt->input->col = 1;
4561
1.84M
        continue; /* while */
4562
1.84M
    }
4563
38.6k
    in--;
4564
38.6k
      }
4565
10.4M
      if (*in == '<') {
4566
4.11M
    return;
4567
4.11M
      }
4568
6.35M
      if (*in == '&') {
4569
574k
    return;
4570
574k
      }
4571
5.78M
      SHRINK;
4572
5.78M
      GROW;
4573
5.78M
            if (ctxt->instate == XML_PARSER_EOF)
4574
0
    return;
4575
5.78M
      in = ctxt->input->cur;
4576
7.63M
  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4577
5.79M
  nbchar = 0;
4578
5.79M
    }
4579
5.79M
    ctxt->input->line = line;
4580
5.79M
    ctxt->input->col = col;
4581
5.79M
    xmlParseCharDataComplex(ctxt, cdata);
4582
5.79M
}
4583
4584
/**
4585
 * xmlParseCharDataComplex:
4586
 * @ctxt:  an XML parser context
4587
 * @cdata:  int indicating whether we are within a CDATA section
4588
 *
4589
 * parse a CharData section.this is the fallback function
4590
 * of xmlParseCharData() when the parsing requires handling
4591
 * of non-ASCII characters.
4592
 */
4593
static void
4594
5.79M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4595
5.79M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4596
5.79M
    int nbchar = 0;
4597
5.79M
    int cur, l;
4598
5.79M
    int count = 0;
4599
4600
5.79M
    SHRINK;
4601
5.79M
    GROW;
4602
5.79M
    cur = CUR_CHAR(l);
4603
33.8M
    while ((cur != '<') && /* checked */
4604
33.8M
           (cur != '&') &&
4605
33.8M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4606
28.0M
  if ((cur == ']') && (NXT(1) == ']') &&
4607
28.0M
      (NXT(2) == '>')) {
4608
2.50k
      if (cdata) break;
4609
2.50k
      else {
4610
2.50k
    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4611
2.50k
      }
4612
2.50k
  }
4613
28.0M
  COPY_BUF(l,buf,nbchar,cur);
4614
  /* move current position before possible calling of ctxt->sax->characters */
4615
28.0M
  NEXTL(l);
4616
28.0M
  cur = CUR_CHAR(l);
4617
28.0M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4618
85.7k
      buf[nbchar] = 0;
4619
4620
      /*
4621
       * OK the segment is to be consumed as chars.
4622
       */
4623
85.7k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4624
70.0k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4625
891
        if (ctxt->sax->ignorableWhitespace != NULL)
4626
891
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4627
891
                                     buf, nbchar);
4628
69.1k
    } else {
4629
69.1k
        if (ctxt->sax->characters != NULL)
4630
69.1k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4631
69.1k
        if ((ctxt->sax->characters !=
4632
69.1k
             ctxt->sax->ignorableWhitespace) &&
4633
69.1k
      (*ctxt->space == -1))
4634
2.70k
      *ctxt->space = -2;
4635
69.1k
    }
4636
70.0k
      }
4637
85.7k
      nbchar = 0;
4638
            /* something really bad happened in the SAX callback */
4639
85.7k
            if (ctxt->instate != XML_PARSER_CONTENT)
4640
0
                return;
4641
85.7k
  }
4642
28.0M
  count++;
4643
28.0M
  if (count > 50) {
4644
479k
      SHRINK;
4645
479k
      GROW;
4646
479k
      count = 0;
4647
479k
            if (ctxt->instate == XML_PARSER_EOF)
4648
0
    return;
4649
479k
  }
4650
28.0M
    }
4651
5.79M
    if (nbchar != 0) {
4652
256k
        buf[nbchar] = 0;
4653
  /*
4654
   * OK the segment is to be consumed as chars.
4655
   */
4656
256k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4657
224k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4658
2.57k
    if (ctxt->sax->ignorableWhitespace != NULL)
4659
2.57k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4660
221k
      } else {
4661
221k
    if (ctxt->sax->characters != NULL)
4662
221k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4663
221k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4664
221k
        (*ctxt->space == -1))
4665
55.9k
        *ctxt->space = -2;
4666
221k
      }
4667
224k
  }
4668
256k
    }
4669
5.79M
    if ((cur != 0) && (!IS_CHAR(cur))) {
4670
  /* Generate the error and skip the offending character */
4671
5.31M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4672
5.31M
                          "PCDATA invalid Char value %d\n",
4673
5.31M
                    cur);
4674
5.31M
  NEXTL(l);
4675
5.31M
    }
4676
5.79M
}
4677
4678
/**
4679
 * xmlParseExternalID:
4680
 * @ctxt:  an XML parser context
4681
 * @publicID:  a xmlChar** receiving PubidLiteral
4682
 * @strict: indicate whether we should restrict parsing to only
4683
 *          production [75], see NOTE below
4684
 *
4685
 * DEPRECATED: Internal function, don't use.
4686
 *
4687
 * Parse an External ID or a Public ID
4688
 *
4689
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4690
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4691
 *
4692
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4693
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4694
 *
4695
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4696
 *
4697
 * Returns the function returns SystemLiteral and in the second
4698
 *                case publicID receives PubidLiteral, is strict is off
4699
 *                it is possible to return NULL and have publicID set.
4700
 */
4701
4702
xmlChar *
4703
336k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4704
336k
    xmlChar *URI = NULL;
4705
4706
336k
    SHRINK;
4707
4708
336k
    *publicID = NULL;
4709
336k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4710
112k
        SKIP(6);
4711
112k
  if (SKIP_BLANKS == 0) {
4712
342
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4713
342
                     "Space required after 'SYSTEM'\n");
4714
342
  }
4715
112k
  URI = xmlParseSystemLiteral(ctxt);
4716
112k
  if (URI == NULL) {
4717
511
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4718
511
        }
4719
223k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4720
44.2k
        SKIP(6);
4721
44.2k
  if (SKIP_BLANKS == 0) {
4722
303
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4723
303
        "Space required after 'PUBLIC'\n");
4724
303
  }
4725
44.2k
  *publicID = xmlParsePubidLiteral(ctxt);
4726
44.2k
  if (*publicID == NULL) {
4727
472
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4728
472
  }
4729
44.2k
  if (strict) {
4730
      /*
4731
       * We don't handle [83] so "S SystemLiteral" is required.
4732
       */
4733
43.0k
      if (SKIP_BLANKS == 0) {
4734
2.83k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4735
2.83k
      "Space required after the Public Identifier\n");
4736
2.83k
      }
4737
43.0k
  } else {
4738
      /*
4739
       * We handle [83] so we return immediately, if
4740
       * "S SystemLiteral" is not detected. We skip blanks if no
4741
             * system literal was found, but this is harmless since we must
4742
             * be at the end of a NotationDecl.
4743
       */
4744
1.19k
      if (SKIP_BLANKS == 0) return(NULL);
4745
145
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4746
145
  }
4747
43.1k
  URI = xmlParseSystemLiteral(ctxt);
4748
43.1k
  if (URI == NULL) {
4749
3.03k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4750
3.03k
        }
4751
43.1k
    }
4752
335k
    return(URI);
4753
336k
}
4754
4755
/**
4756
 * xmlParseCommentComplex:
4757
 * @ctxt:  an XML parser context
4758
 * @buf:  the already parsed part of the buffer
4759
 * @len:  number of bytes in the buffer
4760
 * @size:  allocated size of the buffer
4761
 *
4762
 * Skip an XML (SGML) comment <!-- .... -->
4763
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4764
 *  must not occur within comments. "
4765
 * This is the slow routine in case the accelerator for ascii didn't work
4766
 *
4767
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4768
 */
4769
static void
4770
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4771
42.7k
                       size_t len, size_t size) {
4772
42.7k
    int q, ql;
4773
42.7k
    int r, rl;
4774
42.7k
    int cur, l;
4775
42.7k
    size_t count = 0;
4776
42.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4777
12.5k
                       XML_MAX_HUGE_LENGTH :
4778
42.7k
                       XML_MAX_TEXT_LENGTH;
4779
42.7k
    int inputid;
4780
4781
42.7k
    inputid = ctxt->input->id;
4782
4783
42.7k
    if (buf == NULL) {
4784
2.70k
        len = 0;
4785
2.70k
  size = XML_PARSER_BUFFER_SIZE;
4786
2.70k
  buf = (xmlChar *) xmlMallocAtomic(size);
4787
2.70k
  if (buf == NULL) {
4788
0
      xmlErrMemory(ctxt, NULL);
4789
0
      return;
4790
0
  }
4791
2.70k
    }
4792
42.7k
    GROW; /* Assure there's enough input data */
4793
42.7k
    q = CUR_CHAR(ql);
4794
42.7k
    if (q == 0)
4795
4.97k
        goto not_terminated;
4796
37.7k
    if (!IS_CHAR(q)) {
4797
4.42k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4798
4.42k
                          "xmlParseComment: invalid xmlChar value %d\n",
4799
4.42k
                    q);
4800
4.42k
  xmlFree (buf);
4801
4.42k
  return;
4802
4.42k
    }
4803
33.3k
    NEXTL(ql);
4804
33.3k
    r = CUR_CHAR(rl);
4805
33.3k
    if (r == 0)
4806
491
        goto not_terminated;
4807
32.8k
    if (!IS_CHAR(r)) {
4808
493
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4809
493
                          "xmlParseComment: invalid xmlChar value %d\n",
4810
493
                    q);
4811
493
  xmlFree (buf);
4812
493
  return;
4813
493
    }
4814
32.3k
    NEXTL(rl);
4815
32.3k
    cur = CUR_CHAR(l);
4816
32.3k
    if (cur == 0)
4817
227
        goto not_terminated;
4818
11.6M
    while (IS_CHAR(cur) && /* checked */
4819
11.6M
           ((cur != '>') ||
4820
11.6M
      (r != '-') || (q != '-'))) {
4821
11.5M
  if ((r == '-') && (q == '-')) {
4822
53.3k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4823
53.3k
  }
4824
11.5M
  if (len + 5 >= size) {
4825
14.4k
      xmlChar *new_buf;
4826
14.4k
            size_t new_size;
4827
4828
14.4k
      new_size = size * 2;
4829
14.4k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4830
14.4k
      if (new_buf == NULL) {
4831
0
    xmlFree (buf);
4832
0
    xmlErrMemory(ctxt, NULL);
4833
0
    return;
4834
0
      }
4835
14.4k
      buf = new_buf;
4836
14.4k
            size = new_size;
4837
14.4k
  }
4838
11.5M
  COPY_BUF(ql,buf,len,q);
4839
11.5M
  q = r;
4840
11.5M
  ql = rl;
4841
11.5M
  r = cur;
4842
11.5M
  rl = l;
4843
4844
11.5M
  count++;
4845
11.5M
  if (count > 50) {
4846
218k
      SHRINK;
4847
218k
      GROW;
4848
218k
      count = 0;
4849
218k
            if (ctxt->instate == XML_PARSER_EOF) {
4850
0
    xmlFree(buf);
4851
0
    return;
4852
0
            }
4853
218k
  }
4854
11.5M
  NEXTL(l);
4855
11.5M
  cur = CUR_CHAR(l);
4856
11.5M
  if (cur == 0) {
4857
3.07k
      SHRINK;
4858
3.07k
      GROW;
4859
3.07k
      cur = CUR_CHAR(l);
4860
3.07k
  }
4861
4862
11.5M
        if (len > maxLength) {
4863
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4864
0
                         "Comment too big found", NULL);
4865
0
            xmlFree (buf);
4866
0
            return;
4867
0
        }
4868
11.5M
    }
4869
32.1k
    buf[len] = 0;
4870
32.1k
    if (cur == 0) {
4871
3.07k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872
3.07k
                       "Comment not terminated \n<!--%.50s\n", buf);
4873
29.0k
    } else if (!IS_CHAR(cur)) {
4874
1.51k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4875
1.51k
                          "xmlParseComment: invalid xmlChar value %d\n",
4876
1.51k
                    cur);
4877
27.5k
    } else {
4878
27.5k
  if (inputid != ctxt->input->id) {
4879
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4880
0
               "Comment doesn't start and stop in the same"
4881
0
                           " entity\n");
4882
0
  }
4883
27.5k
        NEXT;
4884
27.5k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4885
27.5k
      (!ctxt->disableSAX))
4886
22.1k
      ctxt->sax->comment(ctxt->userData, buf);
4887
27.5k
    }
4888
32.1k
    xmlFree(buf);
4889
32.1k
    return;
4890
5.69k
not_terminated:
4891
5.69k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4892
5.69k
       "Comment not terminated\n", NULL);
4893
5.69k
    xmlFree(buf);
4894
5.69k
    return;
4895
32.1k
}
4896
4897
/**
4898
 * xmlParseComment:
4899
 * @ctxt:  an XML parser context
4900
 *
4901
 * DEPRECATED: Internal function, don't use.
4902
 *
4903
 * Skip an XML (SGML) comment <!-- .... -->
4904
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4905
 *  must not occur within comments. "
4906
 *
4907
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4908
 */
4909
void
4910
700k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4911
700k
    xmlChar *buf = NULL;
4912
700k
    size_t size = XML_PARSER_BUFFER_SIZE;
4913
700k
    size_t len = 0;
4914
700k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4915
205k
                       XML_MAX_HUGE_LENGTH :
4916
700k
                       XML_MAX_TEXT_LENGTH;
4917
700k
    xmlParserInputState state;
4918
700k
    const xmlChar *in;
4919
700k
    size_t nbchar = 0;
4920
700k
    int ccol;
4921
700k
    int inputid;
4922
4923
    /*
4924
     * Check that there is a comment right here.
4925
     */
4926
700k
    if ((RAW != '<') || (NXT(1) != '!') ||
4927
700k
        (NXT(2) != '-') || (NXT(3) != '-')) return;
4928
700k
    state = ctxt->instate;
4929
700k
    ctxt->instate = XML_PARSER_COMMENT;
4930
700k
    inputid = ctxt->input->id;
4931
700k
    SKIP(4);
4932
700k
    SHRINK;
4933
700k
    GROW;
4934
4935
    /*
4936
     * Accelerated common case where input don't need to be
4937
     * modified before passing it to the handler.
4938
     */
4939
700k
    in = ctxt->input->cur;
4940
700k
    do {
4941
700k
  if (*in == 0xA) {
4942
69.9k
      do {
4943
69.9k
    ctxt->input->line++; ctxt->input->col = 1;
4944
69.9k
    in++;
4945
69.9k
      } while (*in == 0xA);
4946
57.2k
  }
4947
3.23M
get_more:
4948
3.23M
        ccol = ctxt->input->col;
4949
105M
  while (((*in > '-') && (*in <= 0x7F)) ||
4950
105M
         ((*in >= 0x20) && (*in < '-')) ||
4951
105M
         (*in == 0x09)) {
4952
102M
        in++;
4953
102M
        ccol++;
4954
102M
  }
4955
3.23M
  ctxt->input->col = ccol;
4956
3.23M
  if (*in == 0xA) {
4957
822k
      do {
4958
822k
    ctxt->input->line++; ctxt->input->col = 1;
4959
822k
    in++;
4960
822k
      } while (*in == 0xA);
4961
780k
      goto get_more;
4962
780k
  }
4963
2.45M
  nbchar = in - ctxt->input->cur;
4964
  /*
4965
   * save current set of data
4966
   */
4967
2.45M
  if (nbchar > 0) {
4968
2.43M
      if ((ctxt->sax != NULL) &&
4969
2.43M
    (ctxt->sax->comment != NULL)) {
4970
2.43M
    if (buf == NULL) {
4971
696k
        if ((*in == '-') && (in[1] == '-'))
4972
462k
            size = nbchar + 1;
4973
233k
        else
4974
233k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4975
696k
        buf = (xmlChar *) xmlMallocAtomic(size);
4976
696k
        if (buf == NULL) {
4977
0
            xmlErrMemory(ctxt, NULL);
4978
0
      ctxt->instate = state;
4979
0
      return;
4980
0
        }
4981
696k
        len = 0;
4982
1.74M
    } else if (len + nbchar + 1 >= size) {
4983
205k
        xmlChar *new_buf;
4984
205k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4985
205k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4986
205k
        if (new_buf == NULL) {
4987
0
            xmlFree (buf);
4988
0
      xmlErrMemory(ctxt, NULL);
4989
0
      ctxt->instate = state;
4990
0
      return;
4991
0
        }
4992
205k
        buf = new_buf;
4993
205k
    }
4994
2.43M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4995
2.43M
    len += nbchar;
4996
2.43M
    buf[len] = 0;
4997
2.43M
      }
4998
2.43M
  }
4999
2.45M
        if (len > maxLength) {
5000
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5001
0
                         "Comment too big found", NULL);
5002
0
            xmlFree (buf);
5003
0
            return;
5004
0
        }
5005
2.45M
  ctxt->input->cur = in;
5006
2.45M
  if (*in == 0xA) {
5007
0
      in++;
5008
0
      ctxt->input->line++; ctxt->input->col = 1;
5009
0
  }
5010
2.45M
  if (*in == 0xD) {
5011
932k
      in++;
5012
932k
      if (*in == 0xA) {
5013
930k
    ctxt->input->cur = in;
5014
930k
    in++;
5015
930k
    ctxt->input->line++; ctxt->input->col = 1;
5016
930k
    goto get_more;
5017
930k
      }
5018
2.04k
      in--;
5019
2.04k
  }
5020
1.52M
  SHRINK;
5021
1.52M
  GROW;
5022
1.52M
        if (ctxt->instate == XML_PARSER_EOF) {
5023
0
            xmlFree(buf);
5024
0
            return;
5025
0
        }
5026
1.52M
  in = ctxt->input->cur;
5027
1.52M
  if (*in == '-') {
5028
1.47M
      if (in[1] == '-') {
5029
865k
          if (in[2] == '>') {
5030
657k
        if (ctxt->input->id != inputid) {
5031
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5032
0
                     "comment doesn't start and stop in the"
5033
0
                                       " same entity\n");
5034
0
        }
5035
657k
        SKIP(3);
5036
657k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5037
657k
            (!ctxt->disableSAX)) {
5038
567k
      if (buf != NULL)
5039
566k
          ctxt->sax->comment(ctxt->userData, buf);
5040
851
      else
5041
851
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5042
567k
        }
5043
657k
        if (buf != NULL)
5044
656k
            xmlFree(buf);
5045
657k
        if (ctxt->instate != XML_PARSER_EOF)
5046
657k
      ctxt->instate = state;
5047
657k
        return;
5048
657k
    }
5049
207k
    if (buf != NULL) {
5050
207k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5051
207k
                          "Double hyphen within comment: "
5052
207k
                                      "<!--%.50s\n",
5053
207k
              buf);
5054
207k
    } else
5055
471
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5056
471
                          "Double hyphen within comment\n", NULL);
5057
207k
                if (ctxt->instate == XML_PARSER_EOF) {
5058
0
                    xmlFree(buf);
5059
0
                    return;
5060
0
                }
5061
207k
    in++;
5062
207k
    ctxt->input->col++;
5063
207k
      }
5064
821k
      in++;
5065
821k
      ctxt->input->col++;
5066
821k
      goto get_more;
5067
1.47M
  }
5068
1.52M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5069
42.7k
    xmlParseCommentComplex(ctxt, buf, len, size);
5070
42.7k
    ctxt->instate = state;
5071
42.7k
    return;
5072
700k
}
5073
5074
5075
/**
5076
 * xmlParsePITarget:
5077
 * @ctxt:  an XML parser context
5078
 *
5079
 * DEPRECATED: Internal function, don't use.
5080
 *
5081
 * parse the name of a PI
5082
 *
5083
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5084
 *
5085
 * Returns the PITarget name or NULL
5086
 */
5087
5088
const xmlChar *
5089
126k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5090
126k
    const xmlChar *name;
5091
5092
126k
    name = xmlParseName(ctxt);
5093
126k
    if ((name != NULL) &&
5094
126k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5095
126k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5096
126k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5097
13.8k
  int i;
5098
13.8k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5099
13.8k
      (name[2] == 'l') && (name[3] == 0)) {
5100
6.80k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5101
6.80k
     "XML declaration allowed only at the start of the document\n");
5102
6.80k
      return(name);
5103
7.05k
  } else if (name[3] == 0) {
5104
442
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5105
442
      return(name);
5106
442
  }
5107
14.2k
  for (i = 0;;i++) {
5108
14.2k
      if (xmlW3CPIs[i] == NULL) break;
5109
10.4k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5110
2.77k
          return(name);
5111
10.4k
  }
5112
3.83k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5113
3.83k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5114
3.83k
          NULL, NULL);
5115
3.83k
    }
5116
116k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5117
1.63k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5118
1.63k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5119
1.63k
    }
5120
116k
    return(name);
5121
126k
}
5122
5123
#ifdef LIBXML_CATALOG_ENABLED
5124
/**
5125
 * xmlParseCatalogPI:
5126
 * @ctxt:  an XML parser context
5127
 * @catalog:  the PI value string
5128
 *
5129
 * parse an XML Catalog Processing Instruction.
5130
 *
5131
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5132
 *
5133
 * Occurs only if allowed by the user and if happening in the Misc
5134
 * part of the document before any doctype information
5135
 * This will add the given catalog to the parsing context in order
5136
 * to be used if there is a resolution need further down in the document
5137
 */
5138
5139
static void
5140
147
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5141
147
    xmlChar *URL = NULL;
5142
147
    const xmlChar *tmp, *base;
5143
147
    xmlChar marker;
5144
5145
147
    tmp = catalog;
5146
147
    while (IS_BLANK_CH(*tmp)) tmp++;
5147
147
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5148
56
  goto error;
5149
91
    tmp += 7;
5150
11.4k
    while (IS_BLANK_CH(*tmp)) tmp++;
5151
91
    if (*tmp != '=') {
5152
85
  return;
5153
85
    }
5154
6
    tmp++;
5155
6
    while (IS_BLANK_CH(*tmp)) tmp++;
5156
6
    marker = *tmp;
5157
6
    if ((marker != '\'') && (marker != '"'))
5158
6
  goto error;
5159
0
    tmp++;
5160
0
    base = tmp;
5161
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5162
0
    if (*tmp == 0)
5163
0
  goto error;
5164
0
    URL = xmlStrndup(base, tmp - base);
5165
0
    tmp++;
5166
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5167
0
    if (*tmp != 0)
5168
0
  goto error;
5169
5170
0
    if (URL != NULL) {
5171
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5172
0
  xmlFree(URL);
5173
0
    }
5174
0
    return;
5175
5176
62
error:
5177
62
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5178
62
            "Catalog PI syntax error: %s\n",
5179
62
      catalog, NULL);
5180
62
    if (URL != NULL)
5181
0
  xmlFree(URL);
5182
62
}
5183
#endif
5184
5185
/**
5186
 * xmlParsePI:
5187
 * @ctxt:  an XML parser context
5188
 *
5189
 * DEPRECATED: Internal function, don't use.
5190
 *
5191
 * parse an XML Processing Instruction.
5192
 *
5193
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5194
 *
5195
 * The processing is transferred to SAX once parsed.
5196
 */
5197
5198
void
5199
126k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5200
126k
    xmlChar *buf = NULL;
5201
126k
    size_t len = 0;
5202
126k
    size_t size = XML_PARSER_BUFFER_SIZE;
5203
126k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5204
69.0k
                       XML_MAX_HUGE_LENGTH :
5205
126k
                       XML_MAX_TEXT_LENGTH;
5206
126k
    int cur, l;
5207
126k
    const xmlChar *target;
5208
126k
    xmlParserInputState state;
5209
126k
    int count = 0;
5210
5211
126k
    if ((RAW == '<') && (NXT(1) == '?')) {
5212
126k
  int inputid = ctxt->input->id;
5213
126k
  state = ctxt->instate;
5214
126k
        ctxt->instate = XML_PARSER_PI;
5215
  /*
5216
   * this is a Processing Instruction.
5217
   */
5218
126k
  SKIP(2);
5219
126k
  SHRINK;
5220
5221
  /*
5222
   * Parse the target name and check for special support like
5223
   * namespace.
5224
   */
5225
126k
        target = xmlParsePITarget(ctxt);
5226
126k
  if (target != NULL) {
5227
121k
      if ((RAW == '?') && (NXT(1) == '>')) {
5228
47.1k
    if (inputid != ctxt->input->id) {
5229
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5230
0
                             "PI declaration doesn't start and stop in"
5231
0
                                   " the same entity\n");
5232
0
    }
5233
47.1k
    SKIP(2);
5234
5235
    /*
5236
     * SAX: PI detected.
5237
     */
5238
47.1k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5239
47.1k
        (ctxt->sax->processingInstruction != NULL))
5240
46.0k
        ctxt->sax->processingInstruction(ctxt->userData,
5241
46.0k
                                         target, NULL);
5242
47.1k
    if (ctxt->instate != XML_PARSER_EOF)
5243
47.1k
        ctxt->instate = state;
5244
47.1k
    return;
5245
47.1k
      }
5246
74.5k
      buf = (xmlChar *) xmlMallocAtomic(size);
5247
74.5k
      if (buf == NULL) {
5248
0
    xmlErrMemory(ctxt, NULL);
5249
0
    ctxt->instate = state;
5250
0
    return;
5251
0
      }
5252
74.5k
      if (SKIP_BLANKS == 0) {
5253
15.6k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5254
15.6k
        "ParsePI: PI %s space expected\n", target);
5255
15.6k
      }
5256
74.5k
      cur = CUR_CHAR(l);
5257
9.75M
      while (IS_CHAR(cur) && /* checked */
5258
9.75M
       ((cur != '?') || (NXT(1) != '>'))) {
5259
9.68M
    if (len + 5 >= size) {
5260
14.1k
        xmlChar *tmp;
5261
14.1k
                    size_t new_size = size * 2;
5262
14.1k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5263
14.1k
        if (tmp == NULL) {
5264
0
      xmlErrMemory(ctxt, NULL);
5265
0
      xmlFree(buf);
5266
0
      ctxt->instate = state;
5267
0
      return;
5268
0
        }
5269
14.1k
        buf = tmp;
5270
14.1k
                    size = new_size;
5271
14.1k
    }
5272
9.68M
    count++;
5273
9.68M
    if (count > 50) {
5274
167k
        SHRINK;
5275
167k
        GROW;
5276
167k
                    if (ctxt->instate == XML_PARSER_EOF) {
5277
0
                        xmlFree(buf);
5278
0
                        return;
5279
0
                    }
5280
167k
        count = 0;
5281
167k
    }
5282
9.68M
    COPY_BUF(l,buf,len,cur);
5283
9.68M
    NEXTL(l);
5284
9.68M
    cur = CUR_CHAR(l);
5285
9.68M
    if (cur == 0) {
5286
5.97k
        SHRINK;
5287
5.97k
        GROW;
5288
5.97k
        cur = CUR_CHAR(l);
5289
5.97k
    }
5290
9.68M
                if (len > maxLength) {
5291
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5292
0
                                      "PI %s too big found", target);
5293
0
                    xmlFree(buf);
5294
0
                    ctxt->instate = state;
5295
0
                    return;
5296
0
                }
5297
9.68M
      }
5298
74.5k
      buf[len] = 0;
5299
74.5k
      if (cur != '?') {
5300
12.2k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5301
12.2k
          "ParsePI: PI %s never end ...\n", target);
5302
62.3k
      } else {
5303
62.3k
    if (inputid != ctxt->input->id) {
5304
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5305
0
                             "PI declaration doesn't start and stop in"
5306
0
                                   " the same entity\n");
5307
0
    }
5308
62.3k
    SKIP(2);
5309
5310
62.3k
#ifdef LIBXML_CATALOG_ENABLED
5311
62.3k
    if (((state == XML_PARSER_MISC) ||
5312
62.3k
               (state == XML_PARSER_START)) &&
5313
62.3k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5314
147
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5315
147
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5316
147
      (allow == XML_CATA_ALLOW_ALL))
5317
147
      xmlParseCatalogPI(ctxt, buf);
5318
147
    }
5319
62.3k
#endif
5320
5321
5322
    /*
5323
     * SAX: PI detected.
5324
     */
5325
62.3k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5326
62.3k
        (ctxt->sax->processingInstruction != NULL))
5327
53.6k
        ctxt->sax->processingInstruction(ctxt->userData,
5328
53.6k
                                         target, buf);
5329
62.3k
      }
5330
74.5k
      xmlFree(buf);
5331
74.5k
  } else {
5332
4.77k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5333
4.77k
  }
5334
79.3k
  if (ctxt->instate != XML_PARSER_EOF)
5335
79.3k
      ctxt->instate = state;
5336
79.3k
    }
5337
126k
}
5338
5339
/**
5340
 * xmlParseNotationDecl:
5341
 * @ctxt:  an XML parser context
5342
 *
5343
 * DEPRECATED: Internal function, don't use.
5344
 *
5345
 * parse a notation declaration
5346
 *
5347
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5348
 *
5349
 * Hence there is actually 3 choices:
5350
 *     'PUBLIC' S PubidLiteral
5351
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5352
 * and 'SYSTEM' S SystemLiteral
5353
 *
5354
 * See the NOTE on xmlParseExternalID().
5355
 */
5356
5357
void
5358
4.13k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5359
4.13k
    const xmlChar *name;
5360
4.13k
    xmlChar *Pubid;
5361
4.13k
    xmlChar *Systemid;
5362
5363
4.13k
    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5364
2.81k
  int inputid = ctxt->input->id;
5365
2.81k
  SHRINK;
5366
2.81k
  SKIP(10);
5367
2.81k
  if (SKIP_BLANKS == 0) {
5368
193
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5369
193
         "Space required after '<!NOTATION'\n");
5370
193
      return;
5371
193
  }
5372
5373
2.62k
        name = xmlParseName(ctxt);
5374
2.62k
  if (name == NULL) {
5375
144
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5376
144
      return;
5377
144
  }
5378
2.48k
  if (xmlStrchr(name, ':') != NULL) {
5379
90
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5380
90
         "colons are forbidden from notation names '%s'\n",
5381
90
         name, NULL, NULL);
5382
90
  }
5383
2.48k
  if (SKIP_BLANKS == 0) {
5384
165
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385
165
         "Space required after the NOTATION name'\n");
5386
165
      return;
5387
165
  }
5388
5389
  /*
5390
   * Parse the IDs.
5391
   */
5392
2.31k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5393
2.31k
  SKIP_BLANKS;
5394
5395
2.31k
  if (RAW == '>') {
5396
1.77k
      if (inputid != ctxt->input->id) {
5397
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5398
0
                         "Notation declaration doesn't start and stop"
5399
0
                               " in the same entity\n");
5400
0
      }
5401
1.77k
      NEXT;
5402
1.77k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5403
1.77k
    (ctxt->sax->notationDecl != NULL))
5404
1.44k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5405
1.77k
  } else {
5406
541
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5407
541
  }
5408
2.31k
  if (Systemid != NULL) xmlFree(Systemid);
5409
2.31k
  if (Pubid != NULL) xmlFree(Pubid);
5410
2.31k
    }
5411
4.13k
}
5412
5413
/**
5414
 * xmlParseEntityDecl:
5415
 * @ctxt:  an XML parser context
5416
 *
5417
 * DEPRECATED: Internal function, don't use.
5418
 *
5419
 * parse <!ENTITY declarations
5420
 *
5421
 * [70] EntityDecl ::= GEDecl | PEDecl
5422
 *
5423
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5424
 *
5425
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5426
 *
5427
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5428
 *
5429
 * [74] PEDef ::= EntityValue | ExternalID
5430
 *
5431
 * [76] NDataDecl ::= S 'NDATA' S Name
5432
 *
5433
 * [ VC: Notation Declared ]
5434
 * The Name must match the declared name of a notation.
5435
 */
5436
5437
void
5438
840k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5439
840k
    const xmlChar *name = NULL;
5440
840k
    xmlChar *value = NULL;
5441
840k
    xmlChar *URI = NULL, *literal = NULL;
5442
840k
    const xmlChar *ndata = NULL;
5443
840k
    int isParameter = 0;
5444
840k
    xmlChar *orig = NULL;
5445
5446
    /* GROW; done in the caller */
5447
840k
    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5448
837k
  int inputid = ctxt->input->id;
5449
837k
  SHRINK;
5450
837k
  SKIP(8);
5451
837k
  if (SKIP_BLANKS == 0) {
5452
2.12k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5453
2.12k
         "Space required after '<!ENTITY'\n");
5454
2.12k
  }
5455
5456
837k
  if (RAW == '%') {
5457
224k
      NEXT;
5458
224k
      if (SKIP_BLANKS == 0) {
5459
328
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
328
             "Space required after '%%'\n");
5461
328
      }
5462
224k
      isParameter = 1;
5463
224k
  }
5464
5465
837k
        name = xmlParseName(ctxt);
5466
837k
  if (name == NULL) {
5467
2.30k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5468
2.30k
                     "xmlParseEntityDecl: no name\n");
5469
2.30k
            return;
5470
2.30k
  }
5471
835k
  if (xmlStrchr(name, ':') != NULL) {
5472
561
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5473
561
         "colons are forbidden from entities names '%s'\n",
5474
561
         name, NULL, NULL);
5475
561
  }
5476
835k
  if (SKIP_BLANKS == 0) {
5477
3.10k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478
3.10k
         "Space required after the entity name\n");
5479
3.10k
  }
5480
5481
835k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5482
  /*
5483
   * handle the various case of definitions...
5484
   */
5485
835k
  if (isParameter) {
5486
223k
      if ((RAW == '"') || (RAW == '\'')) {
5487
208k
          value = xmlParseEntityValue(ctxt, &orig);
5488
208k
    if (value) {
5489
206k
        if ((ctxt->sax != NULL) &&
5490
206k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5491
190k
      ctxt->sax->entityDecl(ctxt->userData, name,
5492
190k
                        XML_INTERNAL_PARAMETER_ENTITY,
5493
190k
            NULL, NULL, value);
5494
206k
    }
5495
208k
      } else {
5496
14.8k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5497
14.8k
    if ((URI == NULL) && (literal == NULL)) {
5498
967
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5499
967
    }
5500
14.8k
    if (URI) {
5501
13.8k
        xmlURIPtr uri;
5502
5503
13.8k
        uri = xmlParseURI((const char *) URI);
5504
13.8k
        if (uri == NULL) {
5505
449
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5506
449
             "Invalid URI: %s\n", URI);
5507
      /*
5508
       * This really ought to be a well formedness error
5509
       * but the XML Core WG decided otherwise c.f. issue
5510
       * E26 of the XML erratas.
5511
       */
5512
13.3k
        } else {
5513
13.3k
      if (uri->fragment != NULL) {
5514
          /*
5515
           * Okay this is foolish to block those but not
5516
           * invalid URIs.
5517
           */
5518
86
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5519
13.2k
      } else {
5520
13.2k
          if ((ctxt->sax != NULL) &&
5521
13.2k
        (!ctxt->disableSAX) &&
5522
13.2k
        (ctxt->sax->entityDecl != NULL))
5523
13.0k
        ctxt->sax->entityDecl(ctxt->userData, name,
5524
13.0k
              XML_EXTERNAL_PARAMETER_ENTITY,
5525
13.0k
              literal, URI, NULL);
5526
13.2k
      }
5527
13.3k
      xmlFreeURI(uri);
5528
13.3k
        }
5529
13.8k
    }
5530
14.8k
      }
5531
611k
  } else {
5532
611k
      if ((RAW == '"') || (RAW == '\'')) {
5533
579k
          value = xmlParseEntityValue(ctxt, &orig);
5534
579k
    if ((ctxt->sax != NULL) &&
5535
579k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5536
507k
        ctxt->sax->entityDecl(ctxt->userData, name,
5537
507k
        XML_INTERNAL_GENERAL_ENTITY,
5538
507k
        NULL, NULL, value);
5539
    /*
5540
     * For expat compatibility in SAX mode.
5541
     */
5542
579k
    if ((ctxt->myDoc == NULL) ||
5543
579k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5544
10.4k
        if (ctxt->myDoc == NULL) {
5545
1.02k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5546
1.02k
      if (ctxt->myDoc == NULL) {
5547
0
          xmlErrMemory(ctxt, "New Doc failed");
5548
0
          return;
5549
0
      }
5550
1.02k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5551
1.02k
        }
5552
10.4k
        if (ctxt->myDoc->intSubset == NULL)
5553
1.02k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5554
1.02k
              BAD_CAST "fake", NULL, NULL);
5555
5556
10.4k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5557
10.4k
                    NULL, NULL, value);
5558
10.4k
    }
5559
579k
      } else {
5560
31.7k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5561
31.7k
    if ((URI == NULL) && (literal == NULL)) {
5562
3.74k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5563
3.74k
    }
5564
31.7k
    if (URI) {
5565
27.0k
        xmlURIPtr uri;
5566
5567
27.0k
        uri = xmlParseURI((const char *)URI);
5568
27.0k
        if (uri == NULL) {
5569
1.46k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5570
1.46k
             "Invalid URI: %s\n", URI);
5571
      /*
5572
       * This really ought to be a well formedness error
5573
       * but the XML Core WG decided otherwise c.f. issue
5574
       * E26 of the XML erratas.
5575
       */
5576
25.5k
        } else {
5577
25.5k
      if (uri->fragment != NULL) {
5578
          /*
5579
           * Okay this is foolish to block those but not
5580
           * invalid URIs.
5581
           */
5582
562
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5583
562
      }
5584
25.5k
      xmlFreeURI(uri);
5585
25.5k
        }
5586
27.0k
    }
5587
31.7k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5588
4.16k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5589
4.16k
           "Space required before 'NDATA'\n");
5590
4.16k
    }
5591
31.7k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5592
7.15k
        SKIP(5);
5593
7.15k
        if (SKIP_BLANKS == 0) {
5594
213
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5595
213
               "Space required after 'NDATA'\n");
5596
213
        }
5597
7.15k
        ndata = xmlParseName(ctxt);
5598
7.15k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5599
7.15k
            (ctxt->sax->unparsedEntityDecl != NULL))
5600
6.82k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5601
6.82k
            literal, URI, ndata);
5602
24.5k
    } else {
5603
24.5k
        if ((ctxt->sax != NULL) &&
5604
24.5k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5605
20.4k
      ctxt->sax->entityDecl(ctxt->userData, name,
5606
20.4k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5607
20.4k
            literal, URI, NULL);
5608
        /*
5609
         * For expat compatibility in SAX mode.
5610
         * assuming the entity replacement was asked for
5611
         */
5612
24.5k
        if ((ctxt->replaceEntities != 0) &&
5613
24.5k
      ((ctxt->myDoc == NULL) ||
5614
16.5k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5615
698
      if (ctxt->myDoc == NULL) {
5616
518
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5617
518
          if (ctxt->myDoc == NULL) {
5618
0
              xmlErrMemory(ctxt, "New Doc failed");
5619
0
        return;
5620
0
          }
5621
518
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5622
518
      }
5623
5624
698
      if (ctxt->myDoc->intSubset == NULL)
5625
518
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5626
518
            BAD_CAST "fake", NULL, NULL);
5627
698
      xmlSAX2EntityDecl(ctxt, name,
5628
698
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5629
698
                  literal, URI, NULL);
5630
698
        }
5631
24.5k
    }
5632
31.7k
      }
5633
611k
  }
5634
835k
  if (ctxt->instate == XML_PARSER_EOF)
5635
0
      goto done;
5636
835k
  SKIP_BLANKS;
5637
835k
  if (RAW != '>') {
5638
10.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5639
10.9k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5640
10.9k
      xmlHaltParser(ctxt);
5641
824k
  } else {
5642
824k
      if (inputid != ctxt->input->id) {
5643
63
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5644
63
                         "Entity declaration doesn't start and stop in"
5645
63
                               " the same entity\n");
5646
63
      }
5647
824k
      NEXT;
5648
824k
  }
5649
835k
  if (orig != NULL) {
5650
      /*
5651
       * Ugly mechanism to save the raw entity value.
5652
       */
5653
782k
      xmlEntityPtr cur = NULL;
5654
5655
782k
      if (isParameter) {
5656
206k
          if ((ctxt->sax != NULL) &&
5657
206k
        (ctxt->sax->getParameterEntity != NULL))
5658
206k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5659
575k
      } else {
5660
575k
          if ((ctxt->sax != NULL) &&
5661
575k
        (ctxt->sax->getEntity != NULL))
5662
575k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5663
575k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5664
56.3k
        cur = xmlSAX2GetEntity(ctxt, name);
5665
56.3k
    }
5666
575k
      }
5667
782k
            if ((cur != NULL) && (cur->orig == NULL)) {
5668
692k
    cur->orig = orig;
5669
692k
                orig = NULL;
5670
692k
      }
5671
782k
  }
5672
5673
835k
done:
5674
835k
  if (value != NULL) xmlFree(value);
5675
835k
  if (URI != NULL) xmlFree(URI);
5676
835k
  if (literal != NULL) xmlFree(literal);
5677
835k
        if (orig != NULL) xmlFree(orig);
5678
835k
    }
5679
840k
}
5680
5681
/**
5682
 * xmlParseDefaultDecl:
5683
 * @ctxt:  an XML parser context
5684
 * @value:  Receive a possible fixed default value for the attribute
5685
 *
5686
 * DEPRECATED: Internal function, don't use.
5687
 *
5688
 * Parse an attribute default declaration
5689
 *
5690
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5691
 *
5692
 * [ VC: Required Attribute ]
5693
 * if the default declaration is the keyword #REQUIRED, then the
5694
 * attribute must be specified for all elements of the type in the
5695
 * attribute-list declaration.
5696
 *
5697
 * [ VC: Attribute Default Legal ]
5698
 * The declared default value must meet the lexical constraints of
5699
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5700
 *
5701
 * [ VC: Fixed Attribute Default ]
5702
 * if an attribute has a default value declared with the #FIXED
5703
 * keyword, instances of that attribute must match the default value.
5704
 *
5705
 * [ WFC: No < in Attribute Values ]
5706
 * handled in xmlParseAttValue()
5707
 *
5708
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5709
 *          or XML_ATTRIBUTE_FIXED.
5710
 */
5711
5712
int
5713
1.10M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5714
1.10M
    int val;
5715
1.10M
    xmlChar *ret;
5716
5717
1.10M
    *value = NULL;
5718
1.10M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5719
325k
  SKIP(9);
5720
325k
  return(XML_ATTRIBUTE_REQUIRED);
5721
325k
    }
5722
774k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5723
648k
  SKIP(8);
5724
648k
  return(XML_ATTRIBUTE_IMPLIED);
5725
648k
    }
5726
125k
    val = XML_ATTRIBUTE_NONE;
5727
125k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5728
48.4k
  SKIP(6);
5729
48.4k
  val = XML_ATTRIBUTE_FIXED;
5730
48.4k
  if (SKIP_BLANKS == 0) {
5731
214
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
214
         "Space required after '#FIXED'\n");
5733
214
  }
5734
48.4k
    }
5735
125k
    ret = xmlParseAttValue(ctxt);
5736
125k
    ctxt->instate = XML_PARSER_DTD;
5737
125k
    if (ret == NULL) {
5738
4.15k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5739
4.15k
           "Attribute default value declaration error\n");
5740
4.15k
    } else
5741
121k
        *value = ret;
5742
125k
    return(val);
5743
774k
}
5744
5745
/**
5746
 * xmlParseNotationType:
5747
 * @ctxt:  an XML parser context
5748
 *
5749
 * DEPRECATED: Internal function, don't use.
5750
 *
5751
 * parse an Notation attribute type.
5752
 *
5753
 * Note: the leading 'NOTATION' S part has already being parsed...
5754
 *
5755
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5756
 *
5757
 * [ VC: Notation Attributes ]
5758
 * Values of this type must match one of the notation names included
5759
 * in the declaration; all notation names in the declaration must be declared.
5760
 *
5761
 * Returns: the notation attribute tree built while parsing
5762
 */
5763
5764
xmlEnumerationPtr
5765
2.52k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5766
2.52k
    const xmlChar *name;
5767
2.52k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5768
5769
2.52k
    if (RAW != '(') {
5770
102
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5771
102
  return(NULL);
5772
102
    }
5773
2.42k
    SHRINK;
5774
2.61k
    do {
5775
2.61k
        NEXT;
5776
2.61k
  SKIP_BLANKS;
5777
2.61k
        name = xmlParseName(ctxt);
5778
2.61k
  if (name == NULL) {
5779
224
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5780
224
         "Name expected in NOTATION declaration\n");
5781
224
            xmlFreeEnumeration(ret);
5782
224
      return(NULL);
5783
224
  }
5784
2.38k
  tmp = ret;
5785
2.59k
  while (tmp != NULL) {
5786
234
      if (xmlStrEqual(name, tmp->name)) {
5787
24
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5788
24
    "standalone: attribute notation value token %s duplicated\n",
5789
24
         name, NULL);
5790
24
    if (!xmlDictOwns(ctxt->dict, name))
5791
0
        xmlFree((xmlChar *) name);
5792
24
    break;
5793
24
      }
5794
210
      tmp = tmp->next;
5795
210
  }
5796
2.38k
  if (tmp == NULL) {
5797
2.36k
      cur = xmlCreateEnumeration(name);
5798
2.36k
      if (cur == NULL) {
5799
0
                xmlFreeEnumeration(ret);
5800
0
                return(NULL);
5801
0
            }
5802
2.36k
      if (last == NULL) ret = last = cur;
5803
161
      else {
5804
161
    last->next = cur;
5805
161
    last = cur;
5806
161
      }
5807
2.36k
  }
5808
2.38k
  SKIP_BLANKS;
5809
2.38k
    } while (RAW == '|');
5810
2.19k
    if (RAW != ')') {
5811
174
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5812
174
        xmlFreeEnumeration(ret);
5813
174
  return(NULL);
5814
174
    }
5815
2.02k
    NEXT;
5816
2.02k
    return(ret);
5817
2.19k
}
5818
5819
/**
5820
 * xmlParseEnumerationType:
5821
 * @ctxt:  an XML parser context
5822
 *
5823
 * DEPRECATED: Internal function, don't use.
5824
 *
5825
 * parse an Enumeration attribute type.
5826
 *
5827
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5828
 *
5829
 * [ VC: Enumeration ]
5830
 * Values of this type must match one of the Nmtoken tokens in
5831
 * the declaration
5832
 *
5833
 * Returns: the enumeration attribute tree built while parsing
5834
 */
5835
5836
xmlEnumerationPtr
5837
76.0k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5838
76.0k
    xmlChar *name;
5839
76.0k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5840
5841
76.0k
    if (RAW != '(') {
5842
5.06k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5843
5.06k
  return(NULL);
5844
5.06k
    }
5845
70.9k
    SHRINK;
5846
202k
    do {
5847
202k
        NEXT;
5848
202k
  SKIP_BLANKS;
5849
202k
        name = xmlParseNmtoken(ctxt);
5850
202k
  if (name == NULL) {
5851
234
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5852
234
      return(ret);
5853
234
  }
5854
202k
  tmp = ret;
5855
534k
  while (tmp != NULL) {
5856
333k
      if (xmlStrEqual(name, tmp->name)) {
5857
515
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5858
515
    "standalone: attribute enumeration value token %s duplicated\n",
5859
515
         name, NULL);
5860
515
    if (!xmlDictOwns(ctxt->dict, name))
5861
515
        xmlFree(name);
5862
515
    break;
5863
515
      }
5864
332k
      tmp = tmp->next;
5865
332k
  }
5866
202k
  if (tmp == NULL) {
5867
201k
      cur = xmlCreateEnumeration(name);
5868
201k
      if (!xmlDictOwns(ctxt->dict, name))
5869
201k
    xmlFree(name);
5870
201k
      if (cur == NULL) {
5871
0
                xmlFreeEnumeration(ret);
5872
0
                return(NULL);
5873
0
            }
5874
201k
      if (last == NULL) ret = last = cur;
5875
130k
      else {
5876
130k
    last->next = cur;
5877
130k
    last = cur;
5878
130k
      }
5879
201k
  }
5880
202k
  SKIP_BLANKS;
5881
202k
    } while (RAW == '|');
5882
70.7k
    if (RAW != ')') {
5883
1.04k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5884
1.04k
  return(ret);
5885
1.04k
    }
5886
69.6k
    NEXT;
5887
69.6k
    return(ret);
5888
70.7k
}
5889
5890
/**
5891
 * xmlParseEnumeratedType:
5892
 * @ctxt:  an XML parser context
5893
 * @tree:  the enumeration tree built while parsing
5894
 *
5895
 * DEPRECATED: Internal function, don't use.
5896
 *
5897
 * parse an Enumerated attribute type.
5898
 *
5899
 * [57] EnumeratedType ::= NotationType | Enumeration
5900
 *
5901
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5902
 *
5903
 *
5904
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5905
 */
5906
5907
int
5908
78.7k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5909
78.7k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5910
2.70k
  SKIP(8);
5911
2.70k
  if (SKIP_BLANKS == 0) {
5912
178
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5913
178
         "Space required after 'NOTATION'\n");
5914
178
      return(0);
5915
178
  }
5916
2.52k
  *tree = xmlParseNotationType(ctxt);
5917
2.52k
  if (*tree == NULL) return(0);
5918
2.02k
  return(XML_ATTRIBUTE_NOTATION);
5919
2.52k
    }
5920
76.0k
    *tree = xmlParseEnumerationType(ctxt);
5921
76.0k
    if (*tree == NULL) return(0);
5922
70.8k
    return(XML_ATTRIBUTE_ENUMERATION);
5923
76.0k
}
5924
5925
/**
5926
 * xmlParseAttributeType:
5927
 * @ctxt:  an XML parser context
5928
 * @tree:  the enumeration tree built while parsing
5929
 *
5930
 * DEPRECATED: Internal function, don't use.
5931
 *
5932
 * parse the Attribute list def for an element
5933
 *
5934
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5935
 *
5936
 * [55] StringType ::= 'CDATA'
5937
 *
5938
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5939
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5940
 *
5941
 * Validity constraints for attribute values syntax are checked in
5942
 * xmlValidateAttributeValue()
5943
 *
5944
 * [ VC: ID ]
5945
 * Values of type ID must match the Name production. A name must not
5946
 * appear more than once in an XML document as a value of this type;
5947
 * i.e., ID values must uniquely identify the elements which bear them.
5948
 *
5949
 * [ VC: One ID per Element Type ]
5950
 * No element type may have more than one ID attribute specified.
5951
 *
5952
 * [ VC: ID Attribute Default ]
5953
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5954
 *
5955
 * [ VC: IDREF ]
5956
 * Values of type IDREF must match the Name production, and values
5957
 * of type IDREFS must match Names; each IDREF Name must match the value
5958
 * of an ID attribute on some element in the XML document; i.e. IDREF
5959
 * values must match the value of some ID attribute.
5960
 *
5961
 * [ VC: Entity Name ]
5962
 * Values of type ENTITY must match the Name production, values
5963
 * of type ENTITIES must match Names; each Entity Name must match the
5964
 * name of an unparsed entity declared in the DTD.
5965
 *
5966
 * [ VC: Name Token ]
5967
 * Values of type NMTOKEN must match the Nmtoken production; values
5968
 * of type NMTOKENS must match Nmtokens.
5969
 *
5970
 * Returns the attribute type
5971
 */
5972
int
5973
1.10M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5974
1.10M
    SHRINK;
5975
1.10M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5976
532k
  SKIP(5);
5977
532k
  return(XML_ATTRIBUTE_CDATA);
5978
575k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5979
8.69k
  SKIP(6);
5980
8.69k
  return(XML_ATTRIBUTE_IDREFS);
5981
567k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5982
35.7k
  SKIP(5);
5983
35.7k
  return(XML_ATTRIBUTE_IDREF);
5984
531k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5985
212k
        SKIP(2);
5986
212k
  return(XML_ATTRIBUTE_ID);
5987
319k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5988
1.25k
  SKIP(6);
5989
1.25k
  return(XML_ATTRIBUTE_ENTITY);
5990
317k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5991
640
  SKIP(8);
5992
640
  return(XML_ATTRIBUTE_ENTITIES);
5993
317k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5994
42.0k
  SKIP(8);
5995
42.0k
  return(XML_ATTRIBUTE_NMTOKENS);
5996
275k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5997
196k
  SKIP(7);
5998
196k
  return(XML_ATTRIBUTE_NMTOKEN);
5999
196k
     }
6000
78.7k
     return(xmlParseEnumeratedType(ctxt, tree));
6001
1.10M
}
6002
6003
/**
6004
 * xmlParseAttributeListDecl:
6005
 * @ctxt:  an XML parser context
6006
 *
6007
 * DEPRECATED: Internal function, don't use.
6008
 *
6009
 * : parse the Attribute list def for an element
6010
 *
6011
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6012
 *
6013
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6014
 *
6015
 */
6016
void
6017
595k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6018
595k
    const xmlChar *elemName;
6019
595k
    const xmlChar *attrName;
6020
595k
    xmlEnumerationPtr tree;
6021
6022
595k
    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6023
591k
  int inputid = ctxt->input->id;
6024
6025
591k
  SKIP(9);
6026
591k
  if (SKIP_BLANKS == 0) {
6027
1.60k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6028
1.60k
                     "Space required after '<!ATTLIST'\n");
6029
1.60k
  }
6030
591k
        elemName = xmlParseName(ctxt);
6031
591k
  if (elemName == NULL) {
6032
1.00k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6033
1.00k
         "ATTLIST: no name for Element\n");
6034
1.00k
      return;
6035
1.00k
  }
6036
590k
  SKIP_BLANKS;
6037
590k
  GROW;
6038
1.68M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6039
1.11M
      int type;
6040
1.11M
      int def;
6041
1.11M
      xmlChar *defaultValue = NULL;
6042
6043
1.11M
      GROW;
6044
1.11M
            tree = NULL;
6045
1.11M
      attrName = xmlParseName(ctxt);
6046
1.11M
      if (attrName == NULL) {
6047
7.97k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6048
7.97k
             "ATTLIST: no name for Attribute\n");
6049
7.97k
    break;
6050
7.97k
      }
6051
1.11M
      GROW;
6052
1.11M
      if (SKIP_BLANKS == 0) {
6053
2.24k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6054
2.24k
            "Space required after the attribute name\n");
6055
2.24k
    break;
6056
2.24k
      }
6057
6058
1.10M
      type = xmlParseAttributeType(ctxt, &tree);
6059
1.10M
      if (type <= 0) {
6060
5.86k
          break;
6061
5.86k
      }
6062
6063
1.10M
      GROW;
6064
1.10M
      if (SKIP_BLANKS == 0) {
6065
2.75k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6066
2.75k
             "Space required after the attribute type\n");
6067
2.75k
          if (tree != NULL)
6068
1.26k
        xmlFreeEnumeration(tree);
6069
2.75k
    break;
6070
2.75k
      }
6071
6072
1.10M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6073
1.10M
      if (def <= 0) {
6074
0
                if (defaultValue != NULL)
6075
0
        xmlFree(defaultValue);
6076
0
          if (tree != NULL)
6077
0
        xmlFreeEnumeration(tree);
6078
0
          break;
6079
0
      }
6080
1.10M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6081
62.0k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6082
6083
1.10M
      GROW;
6084
1.10M
            if (RAW != '>') {
6085
786k
    if (SKIP_BLANKS == 0) {
6086
7.16k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6087
7.16k
      "Space required after the attribute default value\n");
6088
7.16k
        if (defaultValue != NULL)
6089
2.80k
      xmlFree(defaultValue);
6090
7.16k
        if (tree != NULL)
6091
791
      xmlFreeEnumeration(tree);
6092
7.16k
        break;
6093
7.16k
    }
6094
786k
      }
6095
1.09M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6096
1.09M
    (ctxt->sax->attributeDecl != NULL))
6097
1.02M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6098
1.02M
                          type, def, defaultValue, tree);
6099
69.1k
      else if (tree != NULL)
6100
4.68k
    xmlFreeEnumeration(tree);
6101
6102
1.09M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6103
1.09M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6104
1.09M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6105
84.5k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6106
84.5k
      }
6107
1.09M
      if (ctxt->sax2) {
6108
781k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6109
781k
      }
6110
1.09M
      if (defaultValue != NULL)
6111
118k
          xmlFree(defaultValue);
6112
1.09M
      GROW;
6113
1.09M
  }
6114
590k
  if (RAW == '>') {
6115
565k
      if (inputid != ctxt->input->id) {
6116
93
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6117
93
                               "Attribute list declaration doesn't start and"
6118
93
                               " stop in the same entity\n");
6119
93
      }
6120
565k
      NEXT;
6121
565k
  }
6122
590k
    }
6123
595k
}
6124
6125
/**
6126
 * xmlParseElementMixedContentDecl:
6127
 * @ctxt:  an XML parser context
6128
 * @inputchk:  the input used for the current entity, needed for boundary checks
6129
 *
6130
 * DEPRECATED: Internal function, don't use.
6131
 *
6132
 * parse the declaration for a Mixed Element content
6133
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6134
 *
6135
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6136
 *                '(' S? '#PCDATA' S? ')'
6137
 *
6138
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6139
 *
6140
 * [ VC: No Duplicate Types ]
6141
 * The same name must not appear more than once in a single
6142
 * mixed-content declaration.
6143
 *
6144
 * returns: the list of the xmlElementContentPtr describing the element choices
6145
 */
6146
xmlElementContentPtr
6147
195k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6148
195k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6149
195k
    const xmlChar *elem = NULL;
6150
6151
195k
    GROW;
6152
195k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6153
195k
  SKIP(7);
6154
195k
  SKIP_BLANKS;
6155
195k
  SHRINK;
6156
195k
  if (RAW == ')') {
6157
165k
      if (ctxt->input->id != inputchk) {
6158
25
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159
25
                               "Element content declaration doesn't start and"
6160
25
                               " stop in the same entity\n");
6161
25
      }
6162
165k
      NEXT;
6163
165k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6164
165k
      if (ret == NULL)
6165
0
          return(NULL);
6166
165k
      if (RAW == '*') {
6167
88
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168
88
    NEXT;
6169
88
      }
6170
165k
      return(ret);
6171
165k
  }
6172
29.4k
  if ((RAW == '(') || (RAW == '|')) {
6173
28.8k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6174
28.8k
      if (ret == NULL) return(NULL);
6175
28.8k
  }
6176
194k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6177
165k
      NEXT;
6178
165k
      if (elem == NULL) {
6179
28.7k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6180
28.7k
    if (ret == NULL) {
6181
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6182
0
                    return(NULL);
6183
0
                }
6184
28.7k
    ret->c1 = cur;
6185
28.7k
    if (cur != NULL)
6186
28.7k
        cur->parent = ret;
6187
28.7k
    cur = ret;
6188
136k
      } else {
6189
136k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6190
136k
    if (n == NULL) {
6191
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6192
0
                    return(NULL);
6193
0
                }
6194
136k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6195
136k
    if (n->c1 != NULL)
6196
136k
        n->c1->parent = n;
6197
136k
          cur->c2 = n;
6198
136k
    if (n != NULL)
6199
136k
        n->parent = cur;
6200
136k
    cur = n;
6201
136k
      }
6202
165k
      SKIP_BLANKS;
6203
165k
      elem = xmlParseName(ctxt);
6204
165k
      if (elem == NULL) {
6205
207
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6206
207
      "xmlParseElementMixedContentDecl : Name expected\n");
6207
207
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6208
207
    return(NULL);
6209
207
      }
6210
164k
      SKIP_BLANKS;
6211
164k
      GROW;
6212
164k
  }
6213
29.2k
  if ((RAW == ')') && (NXT(1) == '*')) {
6214
27.9k
      if (elem != NULL) {
6215
27.9k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6216
27.9k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6217
27.9k
    if (cur->c2 != NULL)
6218
27.9k
        cur->c2->parent = cur;
6219
27.9k
            }
6220
27.9k
            if (ret != NULL)
6221
27.9k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6222
27.9k
      if (ctxt->input->id != inputchk) {
6223
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6224
0
                               "Element content declaration doesn't start and"
6225
0
                               " stop in the same entity\n");
6226
0
      }
6227
27.9k
      SKIP(2);
6228
27.9k
  } else {
6229
1.30k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6230
1.30k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6231
1.30k
      return(NULL);
6232
1.30k
  }
6233
6234
29.2k
    } else {
6235
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6236
0
    }
6237
27.9k
    return(ret);
6238
195k
}
6239
6240
/**
6241
 * xmlParseElementChildrenContentDeclPriv:
6242
 * @ctxt:  an XML parser context
6243
 * @inputchk:  the input used for the current entity, needed for boundary checks
6244
 * @depth: the level of recursion
6245
 *
6246
 * parse the declaration for a Mixed Element content
6247
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6248
 *
6249
 *
6250
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6251
 *
6252
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6253
 *
6254
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6255
 *
6256
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6257
 *
6258
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6259
 * TODO Parameter-entity replacement text must be properly nested
6260
 *  with parenthesized groups. That is to say, if either of the
6261
 *  opening or closing parentheses in a choice, seq, or Mixed
6262
 *  construct is contained in the replacement text for a parameter
6263
 *  entity, both must be contained in the same replacement text. For
6264
 *  interoperability, if a parameter-entity reference appears in a
6265
 *  choice, seq, or Mixed construct, its replacement text should not
6266
 *  be empty, and neither the first nor last non-blank character of
6267
 *  the replacement text should be a connector (| or ,).
6268
 *
6269
 * Returns the tree of xmlElementContentPtr describing the element
6270
 *          hierarchy.
6271
 */
6272
static xmlElementContentPtr
6273
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6274
441k
                                       int depth) {
6275
441k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6276
441k
    const xmlChar *elem;
6277
441k
    xmlChar type = 0;
6278
6279
441k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6280
441k
        (depth >  2048)) {
6281
46
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6282
46
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6283
46
                          depth);
6284
46
  return(NULL);
6285
46
    }
6286
441k
    SKIP_BLANKS;
6287
441k
    GROW;
6288
441k
    if (RAW == '(') {
6289
71.2k
  int inputid = ctxt->input->id;
6290
6291
        /* Recurse on first child */
6292
71.2k
  NEXT;
6293
71.2k
  SKIP_BLANKS;
6294
71.2k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6295
71.2k
                                                           depth + 1);
6296
71.2k
        if (cur == NULL)
6297
43.4k
            return(NULL);
6298
27.8k
  SKIP_BLANKS;
6299
27.8k
  GROW;
6300
370k
    } else {
6301
370k
  elem = xmlParseName(ctxt);
6302
370k
  if (elem == NULL) {
6303
2.98k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6304
2.98k
      return(NULL);
6305
2.98k
  }
6306
367k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6307
367k
  if (cur == NULL) {
6308
0
      xmlErrMemory(ctxt, NULL);
6309
0
      return(NULL);
6310
0
  }
6311
367k
  GROW;
6312
367k
  if (RAW == '?') {
6313
17.0k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6314
17.0k
      NEXT;
6315
350k
  } else if (RAW == '*') {
6316
49.8k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6317
49.8k
      NEXT;
6318
300k
  } else if (RAW == '+') {
6319
17.1k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6320
17.1k
      NEXT;
6321
283k
  } else {
6322
283k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6323
283k
  }
6324
367k
  GROW;
6325
367k
    }
6326
394k
    SKIP_BLANKS;
6327
394k
    SHRINK;
6328
1.16M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6329
        /*
6330
   * Each loop we parse one separator and one element.
6331
   */
6332
780k
        if (RAW == ',') {
6333
238k
      if (type == 0) type = CUR;
6334
6335
      /*
6336
       * Detect "Name | Name , Name" error
6337
       */
6338
132k
      else if (type != CUR) {
6339
94
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6340
94
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6341
94
                      type);
6342
94
    if ((last != NULL) && (last != ret))
6343
94
        xmlFreeDocElementContent(ctxt->myDoc, last);
6344
94
    if (ret != NULL)
6345
94
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6346
94
    return(NULL);
6347
94
      }
6348
238k
      NEXT;
6349
6350
238k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6351
238k
      if (op == NULL) {
6352
0
    if ((last != NULL) && (last != ret))
6353
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6354
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6355
0
    return(NULL);
6356
0
      }
6357
238k
      if (last == NULL) {
6358
106k
    op->c1 = ret;
6359
106k
    if (ret != NULL)
6360
106k
        ret->parent = op;
6361
106k
    ret = cur = op;
6362
132k
      } else {
6363
132k
          cur->c2 = op;
6364
132k
    if (op != NULL)
6365
132k
        op->parent = cur;
6366
132k
    op->c1 = last;
6367
132k
    if (last != NULL)
6368
132k
        last->parent = op;
6369
132k
    cur =op;
6370
132k
    last = NULL;
6371
132k
      }
6372
541k
  } else if (RAW == '|') {
6373
533k
      if (type == 0) type = CUR;
6374
6375
      /*
6376
       * Detect "Name , Name | Name" error
6377
       */
6378
430k
      else if (type != CUR) {
6379
110
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6380
110
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6381
110
          type);
6382
110
    if ((last != NULL) && (last != ret))
6383
110
        xmlFreeDocElementContent(ctxt->myDoc, last);
6384
110
    if (ret != NULL)
6385
110
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6386
110
    return(NULL);
6387
110
      }
6388
533k
      NEXT;
6389
6390
533k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6391
533k
      if (op == NULL) {
6392
0
    if ((last != NULL) && (last != ret))
6393
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6394
0
    if (ret != NULL)
6395
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6396
0
    return(NULL);
6397
0
      }
6398
533k
      if (last == NULL) {
6399
103k
    op->c1 = ret;
6400
103k
    if (ret != NULL)
6401
103k
        ret->parent = op;
6402
103k
    ret = cur = op;
6403
430k
      } else {
6404
430k
          cur->c2 = op;
6405
430k
    if (op != NULL)
6406
430k
        op->parent = cur;
6407
430k
    op->c1 = last;
6408
430k
    if (last != NULL)
6409
430k
        last->parent = op;
6410
430k
    cur =op;
6411
430k
    last = NULL;
6412
430k
      }
6413
533k
  } else {
6414
7.94k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6415
7.94k
      if ((last != NULL) && (last != ret))
6416
2.57k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6417
7.94k
      if (ret != NULL)
6418
7.94k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
7.94k
      return(NULL);
6420
7.94k
  }
6421
772k
  GROW;
6422
772k
  SKIP_BLANKS;
6423
772k
  GROW;
6424
772k
  if (RAW == '(') {
6425
36.3k
      int inputid = ctxt->input->id;
6426
      /* Recurse on second child */
6427
36.3k
      NEXT;
6428
36.3k
      SKIP_BLANKS;
6429
36.3k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6430
36.3k
                                                          depth + 1);
6431
36.3k
            if (last == NULL) {
6432
727
    if (ret != NULL)
6433
727
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6434
727
    return(NULL);
6435
727
            }
6436
35.6k
      SKIP_BLANKS;
6437
736k
  } else {
6438
736k
      elem = xmlParseName(ctxt);
6439
736k
      if (elem == NULL) {
6440
727
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6441
727
    if (ret != NULL)
6442
727
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
727
    return(NULL);
6444
727
      }
6445
735k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6446
735k
      if (last == NULL) {
6447
0
    if (ret != NULL)
6448
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6449
0
    return(NULL);
6450
0
      }
6451
735k
      if (RAW == '?') {
6452
131k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6453
131k
    NEXT;
6454
604k
      } else if (RAW == '*') {
6455
52.8k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6456
52.8k
    NEXT;
6457
551k
      } else if (RAW == '+') {
6458
12.3k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6459
12.3k
    NEXT;
6460
539k
      } else {
6461
539k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6462
539k
      }
6463
735k
  }
6464
771k
  SKIP_BLANKS;
6465
771k
  GROW;
6466
771k
    }
6467
385k
    if ((cur != NULL) && (last != NULL)) {
6468
205k
        cur->c2 = last;
6469
205k
  if (last != NULL)
6470
205k
      last->parent = cur;
6471
205k
    }
6472
385k
    if (ctxt->input->id != inputchk) {
6473
49
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6474
49
                       "Element content declaration doesn't start and stop in"
6475
49
                       " the same entity\n");
6476
49
    }
6477
385k
    NEXT;
6478
385k
    if (RAW == '?') {
6479
6.36k
  if (ret != NULL) {
6480
6.36k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6481
6.36k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6482
194
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6483
6.17k
      else
6484
6.17k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6485
6.36k
  }
6486
6.36k
  NEXT;
6487
378k
    } else if (RAW == '*') {
6488
186k
  if (ret != NULL) {
6489
186k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6490
186k
      cur = ret;
6491
      /*
6492
       * Some normalization:
6493
       * (a | b* | c?)* == (a | b | c)*
6494
       */
6495
638k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6496
452k
    if ((cur->c1 != NULL) &&
6497
452k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6498
452k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6499
43.9k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6500
452k
    if ((cur->c2 != NULL) &&
6501
452k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6502
452k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6503
7.23k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6504
452k
    cur = cur->c2;
6505
452k
      }
6506
186k
  }
6507
186k
  NEXT;
6508
192k
    } else if (RAW == '+') {
6509
17.1k
  if (ret != NULL) {
6510
17.1k
      int found = 0;
6511
6512
17.1k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6513
17.1k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6514
21
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6515
17.1k
      else
6516
17.1k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6517
      /*
6518
       * Some normalization:
6519
       * (a | b*)+ == (a | b)*
6520
       * (a | b?)+ == (a | b)*
6521
       */
6522
25.5k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6523
8.41k
    if ((cur->c1 != NULL) &&
6524
8.41k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6525
8.41k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6526
115
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6527
115
        found = 1;
6528
115
    }
6529
8.41k
    if ((cur->c2 != NULL) &&
6530
8.41k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6531
8.41k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6532
93
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6533
93
        found = 1;
6534
93
    }
6535
8.41k
    cur = cur->c2;
6536
8.41k
      }
6537
17.1k
      if (found)
6538
149
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6539
17.1k
  }
6540
17.1k
  NEXT;
6541
17.1k
    }
6542
385k
    return(ret);
6543
394k
}
6544
6545
/**
6546
 * xmlParseElementChildrenContentDecl:
6547
 * @ctxt:  an XML parser context
6548
 * @inputchk:  the input used for the current entity, needed for boundary checks
6549
 *
6550
 * DEPRECATED: Internal function, don't use.
6551
 *
6552
 * parse the declaration for a Mixed Element content
6553
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6554
 *
6555
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6556
 *
6557
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6558
 *
6559
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6560
 *
6561
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6562
 *
6563
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6564
 * TODO Parameter-entity replacement text must be properly nested
6565
 *  with parenthesized groups. That is to say, if either of the
6566
 *  opening or closing parentheses in a choice, seq, or Mixed
6567
 *  construct is contained in the replacement text for a parameter
6568
 *  entity, both must be contained in the same replacement text. For
6569
 *  interoperability, if a parameter-entity reference appears in a
6570
 *  choice, seq, or Mixed construct, its replacement text should not
6571
 *  be empty, and neither the first nor last non-blank character of
6572
 *  the replacement text should be a connector (| or ,).
6573
 *
6574
 * Returns the tree of xmlElementContentPtr describing the element
6575
 *          hierarchy.
6576
 */
6577
xmlElementContentPtr
6578
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6579
    /* stub left for API/ABI compat */
6580
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6581
0
}
6582
6583
/**
6584
 * xmlParseElementContentDecl:
6585
 * @ctxt:  an XML parser context
6586
 * @name:  the name of the element being defined.
6587
 * @result:  the Element Content pointer will be stored here if any
6588
 *
6589
 * DEPRECATED: Internal function, don't use.
6590
 *
6591
 * parse the declaration for an Element content either Mixed or Children,
6592
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6593
 *
6594
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6595
 *
6596
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6597
 */
6598
6599
int
6600
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6601
529k
                           xmlElementContentPtr *result) {
6602
6603
529k
    xmlElementContentPtr tree = NULL;
6604
529k
    int inputid = ctxt->input->id;
6605
529k
    int res;
6606
6607
529k
    *result = NULL;
6608
6609
529k
    if (RAW != '(') {
6610
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6611
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6612
0
  return(-1);
6613
0
    }
6614
529k
    NEXT;
6615
529k
    GROW;
6616
529k
    if (ctxt->instate == XML_PARSER_EOF)
6617
0
        return(-1);
6618
529k
    SKIP_BLANKS;
6619
529k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6620
195k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6621
195k
  res = XML_ELEMENT_TYPE_MIXED;
6622
333k
    } else {
6623
333k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6624
333k
  res = XML_ELEMENT_TYPE_ELEMENT;
6625
333k
    }
6626
529k
    SKIP_BLANKS;
6627
529k
    *result = tree;
6628
529k
    return(res);
6629
529k
}
6630
6631
/**
6632
 * xmlParseElementDecl:
6633
 * @ctxt:  an XML parser context
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse an Element declaration.
6638
 *
6639
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6640
 *
6641
 * [ VC: Unique Element Type Declaration ]
6642
 * No element type may be declared more than once
6643
 *
6644
 * Returns the type of the element, or -1 in case of error
6645
 */
6646
int
6647
823k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6648
823k
    const xmlChar *name;
6649
823k
    int ret = -1;
6650
823k
    xmlElementContentPtr content  = NULL;
6651
6652
    /* GROW; done in the caller */
6653
823k
    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6654
818k
  int inputid = ctxt->input->id;
6655
6656
818k
  SKIP(9);
6657
818k
  if (SKIP_BLANKS == 0) {
6658
1.05k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6659
1.05k
               "Space required after 'ELEMENT'\n");
6660
1.05k
      return(-1);
6661
1.05k
  }
6662
817k
        name = xmlParseName(ctxt);
6663
817k
  if (name == NULL) {
6664
983
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6665
983
         "xmlParseElementDecl: no name for Element\n");
6666
983
      return(-1);
6667
983
  }
6668
816k
  if (SKIP_BLANKS == 0) {
6669
3.33k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6670
3.33k
         "Space required after the element name\n");
6671
3.33k
  }
6672
816k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6673
276k
      SKIP(5);
6674
      /*
6675
       * Element must always be empty.
6676
       */
6677
276k
      ret = XML_ELEMENT_TYPE_EMPTY;
6678
540k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6679
540k
             (NXT(2) == 'Y')) {
6680
5.11k
      SKIP(3);
6681
      /*
6682
       * Element is a generic container.
6683
       */
6684
5.11k
      ret = XML_ELEMENT_TYPE_ANY;
6685
534k
  } else if (RAW == '(') {
6686
529k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6687
529k
  } else {
6688
      /*
6689
       * [ WFC: PEs in Internal Subset ] error handling.
6690
       */
6691
5.71k
      if ((RAW == '%') && (ctxt->external == 0) &&
6692
5.71k
          (ctxt->inputNr == 1)) {
6693
150
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6694
150
    "PEReference: forbidden within markup decl in internal subset\n");
6695
5.56k
      } else {
6696
5.56k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6697
5.56k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6698
5.56k
            }
6699
5.71k
      return(-1);
6700
5.71k
  }
6701
6702
810k
  SKIP_BLANKS;
6703
6704
810k
  if (RAW != '>') {
6705
12.6k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6706
12.6k
      if (content != NULL) {
6707
1.23k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6708
1.23k
      }
6709
798k
  } else {
6710
798k
      if (inputid != ctxt->input->id) {
6711
149
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
149
                               "Element declaration doesn't start and stop in"
6713
149
                               " the same entity\n");
6714
149
      }
6715
6716
798k
      NEXT;
6717
798k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6718
798k
    (ctxt->sax->elementDecl != NULL)) {
6719
745k
    if (content != NULL)
6720
478k
        content->parent = NULL;
6721
745k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6722
745k
                           content);
6723
745k
    if ((content != NULL) && (content->parent == NULL)) {
6724
        /*
6725
         * this is a trick: if xmlAddElementDecl is called,
6726
         * instead of copying the full tree it is plugged directly
6727
         * if called from the parser. Avoid duplicating the
6728
         * interfaces or change the API/ABI
6729
         */
6730
5.08k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6731
5.08k
    }
6732
745k
      } else if (content != NULL) {
6733
36.3k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6734
36.3k
      }
6735
798k
  }
6736
810k
    }
6737
815k
    return(ret);
6738
823k
}
6739
6740
/**
6741
 * xmlParseConditionalSections
6742
 * @ctxt:  an XML parser context
6743
 *
6744
 * [61] conditionalSect ::= includeSect | ignoreSect
6745
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6746
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6747
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6748
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6749
 */
6750
6751
static void
6752
6.86k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6753
6.86k
    int *inputIds = NULL;
6754
6.86k
    size_t inputIdsSize = 0;
6755
6.86k
    size_t depth = 0;
6756
6757
33.2k
    while (ctxt->instate != XML_PARSER_EOF) {
6758
33.1k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6759
19.5k
            int id = ctxt->input->id;
6760
6761
19.5k
            SKIP(3);
6762
19.5k
            SKIP_BLANKS;
6763
6764
19.5k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6765
15.6k
                SKIP(7);
6766
15.6k
                SKIP_BLANKS;
6767
15.6k
                if (RAW != '[') {
6768
192
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6769
192
                    xmlHaltParser(ctxt);
6770
192
                    goto error;
6771
192
                }
6772
15.4k
                if (ctxt->input->id != id) {
6773
6
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6774
6
                                   "All markup of the conditional section is"
6775
6
                                   " not in the same entity\n");
6776
6
                }
6777
15.4k
                NEXT;
6778
6779
15.4k
                if (inputIdsSize <= depth) {
6780
4.69k
                    int *tmp;
6781
6782
4.69k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6783
4.69k
                    tmp = (int *) xmlRealloc(inputIds,
6784
4.69k
                            inputIdsSize * sizeof(int));
6785
4.69k
                    if (tmp == NULL) {
6786
0
                        xmlErrMemory(ctxt, NULL);
6787
0
                        goto error;
6788
0
                    }
6789
4.69k
                    inputIds = tmp;
6790
4.69k
                }
6791
15.4k
                inputIds[depth] = id;
6792
15.4k
                depth++;
6793
15.4k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6794
2.62k
                int state;
6795
2.62k
                xmlParserInputState instate;
6796
2.62k
                size_t ignoreDepth = 0;
6797
6798
2.62k
                SKIP(6);
6799
2.62k
                SKIP_BLANKS;
6800
2.62k
                if (RAW != '[') {
6801
150
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6802
150
                    xmlHaltParser(ctxt);
6803
150
                    goto error;
6804
150
                }
6805
2.47k
                if (ctxt->input->id != id) {
6806
11
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6807
11
                                   "All markup of the conditional section is"
6808
11
                                   " not in the same entity\n");
6809
11
                }
6810
2.47k
                NEXT;
6811
6812
                /*
6813
                 * Parse up to the end of the conditional section but disable
6814
                 * SAX event generating DTD building in the meantime
6815
                 */
6816
2.47k
                state = ctxt->disableSAX;
6817
2.47k
                instate = ctxt->instate;
6818
2.47k
                if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6819
2.47k
                ctxt->instate = XML_PARSER_IGNORE;
6820
6821
582k
                while (RAW != 0) {
6822
580k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6823
4.20k
                        SKIP(3);
6824
4.20k
                        ignoreDepth++;
6825
                        /* Check for integer overflow */
6826
4.20k
                        if (ignoreDepth == 0) {
6827
0
                            xmlErrMemory(ctxt, NULL);
6828
0
                            goto error;
6829
0
                        }
6830
576k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6831
576k
                               (NXT(2) == '>')) {
6832
3.74k
                        if (ignoreDepth == 0)
6833
1.06k
                            break;
6834
2.67k
                        SKIP(3);
6835
2.67k
                        ignoreDepth--;
6836
572k
                    } else {
6837
572k
                        NEXT;
6838
572k
                    }
6839
580k
                }
6840
6841
2.47k
                ctxt->disableSAX = state;
6842
2.47k
                ctxt->instate = instate;
6843
6844
2.47k
    if (RAW == 0) {
6845
1.40k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6846
1.40k
                    goto error;
6847
1.40k
    }
6848
1.06k
                if (ctxt->input->id != id) {
6849
6
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6850
6
                                   "All markup of the conditional section is"
6851
6
                                   " not in the same entity\n");
6852
6
                }
6853
1.06k
                SKIP(3);
6854
1.21k
            } else {
6855
1.21k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6856
1.21k
                xmlHaltParser(ctxt);
6857
1.21k
                goto error;
6858
1.21k
            }
6859
19.5k
        } else if ((depth > 0) &&
6860
13.6k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6861
7.52k
            depth--;
6862
7.52k
            if (ctxt->input->id != inputIds[depth]) {
6863
205
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6864
205
                               "All markup of the conditional section is not"
6865
205
                               " in the same entity\n");
6866
205
            }
6867
7.52k
            SKIP(3);
6868
7.52k
        } else {
6869
6.10k
            int id = ctxt->input->id;
6870
6.10k
            unsigned long cons = CUR_CONSUMED;
6871
6872
6.10k
            xmlParseMarkupDecl(ctxt);
6873
6874
6.10k
            if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6875
1.50k
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6876
1.50k
                xmlHaltParser(ctxt);
6877
1.50k
                goto error;
6878
1.50k
            }
6879
6.10k
        }
6880
6881
28.6k
        if (depth == 0)
6882
2.24k
            break;
6883
6884
26.4k
        SKIP_BLANKS;
6885
26.4k
        GROW;
6886
26.4k
    }
6887
6888
6.86k
error:
6889
6.86k
    xmlFree(inputIds);
6890
6.86k
}
6891
6892
/**
6893
 * xmlParseMarkupDecl:
6894
 * @ctxt:  an XML parser context
6895
 *
6896
 * DEPRECATED: Internal function, don't use.
6897
 *
6898
 * parse Markup declarations
6899
 *
6900
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6901
 *                     NotationDecl | PI | Comment
6902
 *
6903
 * [ VC: Proper Declaration/PE Nesting ]
6904
 * Parameter-entity replacement text must be properly nested with
6905
 * markup declarations. That is to say, if either the first character
6906
 * or the last character of a markup declaration (markupdecl above) is
6907
 * contained in the replacement text for a parameter-entity reference,
6908
 * both must be contained in the same replacement text.
6909
 *
6910
 * [ WFC: PEs in Internal Subset ]
6911
 * In the internal DTD subset, parameter-entity references can occur
6912
 * only where markup declarations can occur, not within markup declarations.
6913
 * (This does not apply to references that occur in external parameter
6914
 * entities or to the external subset.)
6915
 */
6916
void
6917
2.96M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6918
2.96M
    GROW;
6919
2.96M
    if (CUR == '<') {
6920
2.76M
        if (NXT(1) == '!') {
6921
2.75M
      switch (NXT(2)) {
6922
1.66M
          case 'E':
6923
1.66M
        if (NXT(3) == 'L')
6924
823k
      xmlParseElementDecl(ctxt);
6925
841k
        else if (NXT(3) == 'N')
6926
840k
      xmlParseEntityDecl(ctxt);
6927
1.66M
        break;
6928
595k
          case 'A':
6929
595k
        xmlParseAttributeListDecl(ctxt);
6930
595k
        break;
6931
4.13k
          case 'N':
6932
4.13k
        xmlParseNotationDecl(ctxt);
6933
4.13k
        break;
6934
483k
          case '-':
6935
483k
        xmlParseComment(ctxt);
6936
483k
        break;
6937
2.58k
    default:
6938
        /* there is an error but it will be detected later */
6939
2.58k
        break;
6940
2.75M
      }
6941
2.75M
  } else if (NXT(1) == '?') {
6942
2.32k
      xmlParsePI(ctxt);
6943
2.32k
  }
6944
2.76M
    }
6945
6946
    /*
6947
     * detect requirement to exit there and act accordingly
6948
     * and avoid having instate overridden later on
6949
     */
6950
2.96M
    if (ctxt->instate == XML_PARSER_EOF)
6951
10.9k
        return;
6952
6953
2.95M
    ctxt->instate = XML_PARSER_DTD;
6954
2.95M
}
6955
6956
/**
6957
 * xmlParseTextDecl:
6958
 * @ctxt:  an XML parser context
6959
 *
6960
 * DEPRECATED: Internal function, don't use.
6961
 *
6962
 * parse an XML declaration header for external entities
6963
 *
6964
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6965
 */
6966
6967
void
6968
17.8k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6969
17.8k
    xmlChar *version;
6970
17.8k
    const xmlChar *encoding;
6971
17.8k
    int oldstate;
6972
6973
    /*
6974
     * We know that '<?xml' is here.
6975
     */
6976
17.8k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6977
17.6k
  SKIP(5);
6978
17.6k
    } else {
6979
137
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6980
137
  return;
6981
137
    }
6982
6983
    /* Avoid expansion of parameter entities when skipping blanks. */
6984
17.6k
    oldstate = ctxt->instate;
6985
17.6k
    ctxt->instate = XML_PARSER_START;
6986
6987
17.6k
    if (SKIP_BLANKS == 0) {
6988
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6989
0
           "Space needed after '<?xml'\n");
6990
0
    }
6991
6992
    /*
6993
     * We may have the VersionInfo here.
6994
     */
6995
17.6k
    version = xmlParseVersionInfo(ctxt);
6996
17.6k
    if (version == NULL)
6997
1.07k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6998
16.6k
    else {
6999
16.6k
  if (SKIP_BLANKS == 0) {
7000
677
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7001
677
               "Space needed here\n");
7002
677
  }
7003
16.6k
    }
7004
17.6k
    ctxt->input->version = version;
7005
7006
    /*
7007
     * We must have the encoding declaration
7008
     */
7009
17.6k
    encoding = xmlParseEncodingDecl(ctxt);
7010
17.6k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7011
  /*
7012
   * The XML REC instructs us to stop parsing right here
7013
   */
7014
148
        ctxt->instate = oldstate;
7015
148
        return;
7016
148
    }
7017
17.5k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7018
3.44k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7019
3.44k
           "Missing encoding in text declaration\n");
7020
3.44k
    }
7021
7022
17.5k
    SKIP_BLANKS;
7023
17.5k
    if ((RAW == '?') && (NXT(1) == '>')) {
7024
14.7k
        SKIP(2);
7025
14.7k
    } else if (RAW == '>') {
7026
        /* Deprecated old WD ... */
7027
115
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7028
115
  NEXT;
7029
2.72k
    } else {
7030
2.72k
        int c;
7031
7032
2.72k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7033
254k
        while ((c = CUR) != 0) {
7034
254k
            NEXT;
7035
254k
            if (c == '>')
7036
1.89k
                break;
7037
254k
        }
7038
2.72k
    }
7039
7040
17.5k
    ctxt->instate = oldstate;
7041
17.5k
}
7042
7043
/**
7044
 * xmlParseExternalSubset:
7045
 * @ctxt:  an XML parser context
7046
 * @ExternalID: the external identifier
7047
 * @SystemID: the system identifier (or URL)
7048
 *
7049
 * parse Markup declarations from an external subset
7050
 *
7051
 * [30] extSubset ::= textDecl? extSubsetDecl
7052
 *
7053
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7054
 */
7055
void
7056
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7057
35.9k
                       const xmlChar *SystemID) {
7058
35.9k
    xmlDetectSAX2(ctxt);
7059
35.9k
    GROW;
7060
7061
35.9k
    if ((ctxt->encoding == NULL) &&
7062
35.9k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7063
35.8k
        xmlChar start[4];
7064
35.8k
  xmlCharEncoding enc;
7065
7066
35.8k
  start[0] = RAW;
7067
35.8k
  start[1] = NXT(1);
7068
35.8k
  start[2] = NXT(2);
7069
35.8k
  start[3] = NXT(3);
7070
35.8k
  enc = xmlDetectCharEncoding(start, 4);
7071
35.8k
  if (enc != XML_CHAR_ENCODING_NONE)
7072
8.85k
      xmlSwitchEncoding(ctxt, enc);
7073
35.8k
    }
7074
7075
35.9k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7076
8.30k
  xmlParseTextDecl(ctxt);
7077
8.30k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7078
      /*
7079
       * The XML REC instructs us to stop parsing right here
7080
       */
7081
128
      xmlHaltParser(ctxt);
7082
128
      return;
7083
128
  }
7084
8.30k
    }
7085
35.8k
    if (ctxt->myDoc == NULL) {
7086
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7087
0
  if (ctxt->myDoc == NULL) {
7088
0
      xmlErrMemory(ctxt, "New Doc failed");
7089
0
      return;
7090
0
  }
7091
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7092
0
    }
7093
35.8k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7094
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7095
7096
35.8k
    ctxt->instate = XML_PARSER_DTD;
7097
35.8k
    ctxt->external = 1;
7098
35.8k
    SKIP_BLANKS;
7099
670k
    while (((RAW == '<') && (NXT(1) == '?')) ||
7100
670k
           ((RAW == '<') && (NXT(1) == '!')) ||
7101
670k
     (RAW == '%')) {
7102
640k
  int id = ctxt->input->id;
7103
640k
  unsigned long cons = CUR_CONSUMED;
7104
7105
640k
  GROW;
7106
640k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7107
6.86k
      xmlParseConditionalSections(ctxt);
7108
6.86k
  } else
7109
633k
      xmlParseMarkupDecl(ctxt);
7110
640k
        SKIP_BLANKS;
7111
7112
640k
  if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7113
5.16k
      xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7114
5.16k
      break;
7115
5.16k
  }
7116
640k
    }
7117
7118
35.8k
    if (RAW != 0) {
7119
13.1k
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7120
13.1k
    }
7121
7122
35.8k
}
7123
7124
/**
7125
 * xmlParseReference:
7126
 * @ctxt:  an XML parser context
7127
 *
7128
 * DEPRECATED: Internal function, don't use.
7129
 *
7130
 * parse and handle entity references in content, depending on the SAX
7131
 * interface, this may end-up in a call to character() if this is a
7132
 * CharRef, a predefined entity, if there is no reference() callback.
7133
 * or if the parser was asked to switch to that mode.
7134
 *
7135
 * [67] Reference ::= EntityRef | CharRef
7136
 */
7137
void
7138
1.88M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7139
1.88M
    xmlEntityPtr ent;
7140
1.88M
    xmlChar *val;
7141
1.88M
    int was_checked;
7142
1.88M
    xmlNodePtr list = NULL;
7143
1.88M
    xmlParserErrors ret = XML_ERR_OK;
7144
7145
7146
1.88M
    if (RAW != '&')
7147
0
        return;
7148
7149
    /*
7150
     * Simple case of a CharRef
7151
     */
7152
1.88M
    if (NXT(1) == '#') {
7153
163k
  int i = 0;
7154
163k
  xmlChar out[16];
7155
163k
  int hex = NXT(2);
7156
163k
  int value = xmlParseCharRef(ctxt);
7157
7158
163k
  if (value == 0)
7159
11.3k
      return;
7160
152k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7161
      /*
7162
       * So we are using non-UTF-8 buffers
7163
       * Check that the char fit on 8bits, if not
7164
       * generate a CharRef.
7165
       */
7166
52.1k
      if (value <= 0xFF) {
7167
49.8k
    out[0] = value;
7168
49.8k
    out[1] = 0;
7169
49.8k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7170
49.8k
        (!ctxt->disableSAX))
7171
38.6k
        ctxt->sax->characters(ctxt->userData, out, 1);
7172
49.8k
      } else {
7173
2.25k
    if ((hex == 'x') || (hex == 'X'))
7174
736
        snprintf((char *)out, sizeof(out), "#x%X", value);
7175
1.51k
    else
7176
1.51k
        snprintf((char *)out, sizeof(out), "#%d", value);
7177
2.25k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7178
2.25k
        (!ctxt->disableSAX))
7179
1.82k
        ctxt->sax->reference(ctxt->userData, out);
7180
2.25k
      }
7181
100k
  } else {
7182
      /*
7183
       * Just encode the value in UTF-8
7184
       */
7185
100k
      COPY_BUF(0 ,out, i, value);
7186
100k
      out[i] = 0;
7187
100k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7188
100k
    (!ctxt->disableSAX))
7189
89.0k
    ctxt->sax->characters(ctxt->userData, out, i);
7190
100k
  }
7191
152k
  return;
7192
163k
    }
7193
7194
    /*
7195
     * We are seeing an entity reference
7196
     */
7197
1.72M
    ent = xmlParseEntityRef(ctxt);
7198
1.72M
    if (ent == NULL) return;
7199
1.09M
    if (!ctxt->wellFormed)
7200
131k
  return;
7201
960k
    was_checked = ent->checked;
7202
7203
    /* special case of predefined entities */
7204
960k
    if ((ent->name == NULL) ||
7205
960k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7206
83.8k
  val = ent->content;
7207
83.8k
  if (val == NULL) return;
7208
  /*
7209
   * inline the entity.
7210
   */
7211
83.8k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7212
83.8k
      (!ctxt->disableSAX))
7213
83.8k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7214
83.8k
  return;
7215
83.8k
    }
7216
7217
    /*
7218
     * The first reference to the entity trigger a parsing phase
7219
     * where the ent->children is filled with the result from
7220
     * the parsing.
7221
     * Note: external parsed entities will not be loaded, it is not
7222
     * required for a non-validating parser, unless the parsing option
7223
     * of validating, or substituting entities were given. Doing so is
7224
     * far more secure as the parser will only process data coming from
7225
     * the document entity by default.
7226
     */
7227
876k
    if (((ent->checked == 0) ||
7228
876k
         ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7229
876k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7230
838k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7231
837k
  unsigned long oldnbent = ctxt->nbentities, diff;
7232
7233
  /*
7234
   * This is a bit hackish but this seems the best
7235
   * way to make sure both SAX and DOM entity support
7236
   * behaves okay.
7237
   */
7238
837k
  void *user_data;
7239
837k
  if (ctxt->userData == ctxt)
7240
837k
      user_data = NULL;
7241
0
  else
7242
0
      user_data = ctxt->userData;
7243
7244
  /*
7245
   * Check that this entity is well formed
7246
   * 4.3.2: An internal general parsed entity is well-formed
7247
   * if its replacement text matches the production labeled
7248
   * content.
7249
   */
7250
837k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7251
91.3k
      ctxt->depth++;
7252
91.3k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7253
91.3k
                                                user_data, &list);
7254
91.3k
      ctxt->depth--;
7255
7256
745k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7257
745k
      ctxt->depth++;
7258
745k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7259
745k
                                     user_data, ctxt->depth, ent->URI,
7260
745k
             ent->ExternalID, &list);
7261
745k
      ctxt->depth--;
7262
745k
  } else {
7263
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7264
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7265
0
       "invalid entity type found\n", NULL);
7266
0
  }
7267
7268
  /*
7269
   * Store the number of entities needing parsing for this entity
7270
   * content and do checkings
7271
   */
7272
837k
        diff = ctxt->nbentities - oldnbent + 1;
7273
837k
        if (diff > INT_MAX / 2)
7274
0
            diff = INT_MAX / 2;
7275
837k
        ent->checked = diff * 2;
7276
837k
  if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7277
49.2k
      ent->checked |= 1;
7278
837k
  if (ret == XML_ERR_ENTITY_LOOP) {
7279
776k
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7280
776k
            xmlHaltParser(ctxt);
7281
776k
      xmlFreeNodeList(list);
7282
776k
      return;
7283
776k
  }
7284
60.8k
  if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7285
0
      xmlFreeNodeList(list);
7286
0
      return;
7287
0
  }
7288
7289
60.8k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7290
33.7k
      if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7291
33.7k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7292
33.7k
    (ent->children == NULL)) {
7293
33.5k
    ent->children = list;
7294
                /*
7295
                 * Prune it directly in the generated document
7296
                 * except for single text nodes.
7297
                 */
7298
33.5k
                if ((ctxt->replaceEntities == 0) ||
7299
33.5k
                    (ctxt->parseMode == XML_PARSE_READER) ||
7300
33.5k
                    ((list->type == XML_TEXT_NODE) &&
7301
31.6k
                     (list->next == NULL))) {
7302
31.6k
                    ent->owner = 1;
7303
67.0k
                    while (list != NULL) {
7304
35.3k
                        list->parent = (xmlNodePtr) ent;
7305
35.3k
                        if (list->doc != ent->doc)
7306
0
                            xmlSetTreeDoc(list, ent->doc);
7307
35.3k
                        if (list->next == NULL)
7308
31.6k
                            ent->last = list;
7309
35.3k
                        list = list->next;
7310
35.3k
                    }
7311
31.6k
                    list = NULL;
7312
31.6k
                } else {
7313
1.88k
                    ent->owner = 0;
7314
5.52k
                    while (list != NULL) {
7315
3.64k
                        list->parent = (xmlNodePtr) ctxt->node;
7316
3.64k
                        list->doc = ctxt->myDoc;
7317
3.64k
                        if (list->next == NULL)
7318
1.88k
                            ent->last = list;
7319
3.64k
                        list = list->next;
7320
3.64k
                    }
7321
1.88k
                    list = ent->children;
7322
#ifdef LIBXML_LEGACY_ENABLED
7323
                    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7324
                        xmlAddEntityReference(ent, list, NULL);
7325
#endif /* LIBXML_LEGACY_ENABLED */
7326
1.88k
                }
7327
33.5k
      } else {
7328
214
    xmlFreeNodeList(list);
7329
214
    list = NULL;
7330
214
      }
7331
33.7k
  } else if ((ret != XML_ERR_OK) &&
7332
27.0k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7333
11.4k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7334
11.4k
         "Entity '%s' failed to parse\n", ent->name);
7335
11.4k
            if (ent->content != NULL)
7336
6.77k
                ent->content[0] = 0;
7337
11.4k
      xmlParserEntityCheck(ctxt, 0, ent, 0);
7338
15.6k
  } else if (list != NULL) {
7339
0
      xmlFreeNodeList(list);
7340
0
      list = NULL;
7341
0
  }
7342
60.8k
  if (ent->checked == 0)
7343
0
      ent->checked = 2;
7344
7345
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7346
60.8k
        was_checked = 0;
7347
60.8k
    } else if (ent->checked != 1) {
7348
39.5k
  ctxt->nbentities += ent->checked / 2;
7349
39.5k
    }
7350
7351
    /*
7352
     * Now that the entity content has been gathered
7353
     * provide it to the application, this can take different forms based
7354
     * on the parsing modes.
7355
     */
7356
100k
    if (ent->children == NULL) {
7357
  /*
7358
   * Probably running in SAX mode and the callbacks don't
7359
   * build the entity content. So unless we already went
7360
   * though parsing for first checking go though the entity
7361
   * content to generate callbacks associated to the entity
7362
   */
7363
32.8k
  if (was_checked != 0) {
7364
3.86k
      void *user_data;
7365
      /*
7366
       * This is a bit hackish but this seems the best
7367
       * way to make sure both SAX and DOM entity support
7368
       * behaves okay.
7369
       */
7370
3.86k
      if (ctxt->userData == ctxt)
7371
3.86k
    user_data = NULL;
7372
0
      else
7373
0
    user_data = ctxt->userData;
7374
7375
3.86k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7376
311
    ctxt->depth++;
7377
311
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7378
311
           ent->content, user_data, NULL);
7379
311
    ctxt->depth--;
7380
3.55k
      } else if (ent->etype ==
7381
3.55k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7382
3.55k
    ctxt->depth++;
7383
3.55k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7384
3.55k
         ctxt->sax, user_data, ctxt->depth,
7385
3.55k
         ent->URI, ent->ExternalID, NULL);
7386
3.55k
    ctxt->depth--;
7387
3.55k
      } else {
7388
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7389
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7390
0
           "invalid entity type found\n", NULL);
7391
0
      }
7392
3.86k
      if (ret == XML_ERR_ENTITY_LOOP) {
7393
6
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7394
6
    return;
7395
6
      }
7396
3.86k
  }
7397
32.8k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7398
32.8k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7399
      /*
7400
       * Entity reference callback comes second, it's somewhat
7401
       * superfluous but a compatibility to historical behaviour
7402
       */
7403
8.43k
      ctxt->sax->reference(ctxt->userData, ent->name);
7404
8.43k
  }
7405
32.8k
  return;
7406
32.8k
    }
7407
7408
    /*
7409
     * If we didn't get any children for the entity being built
7410
     */
7411
67.5k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7412
67.5k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7413
  /*
7414
   * Create a node.
7415
   */
7416
30.8k
  ctxt->sax->reference(ctxt->userData, ent->name);
7417
30.8k
  return;
7418
30.8k
    }
7419
7420
36.7k
    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7421
  /*
7422
   * There is a problem on the handling of _private for entities
7423
   * (bug 155816): Should we copy the content of the field from
7424
   * the entity (possibly overwriting some value set by the user
7425
   * when a copy is created), should we leave it alone, or should
7426
   * we try to take care of different situations?  The problem
7427
   * is exacerbated by the usage of this field by the xmlReader.
7428
   * To fix this bug, we look at _private on the created node
7429
   * and, if it's NULL, we copy in whatever was in the entity.
7430
   * If it's not NULL we leave it alone.  This is somewhat of a
7431
   * hack - maybe we should have further tests to determine
7432
   * what to do.
7433
   */
7434
36.7k
  if ((ctxt->node != NULL) && (ent->children != NULL)) {
7435
      /*
7436
       * Seems we are generating the DOM content, do
7437
       * a simple tree copy for all references except the first
7438
       * In the first occurrence list contains the replacement.
7439
       */
7440
36.7k
      if (((list == NULL) && (ent->owner == 0)) ||
7441
36.7k
    (ctxt->parseMode == XML_PARSE_READER)) {
7442
11.9k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7443
7444
    /*
7445
     * We are copying here, make sure there is no abuse
7446
     */
7447
11.9k
    ctxt->sizeentcopy += ent->length + 5;
7448
11.9k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7449
0
        return;
7450
7451
    /*
7452
     * when operating on a reader, the entities definitions
7453
     * are always owning the entities subtree.
7454
    if (ctxt->parseMode == XML_PARSE_READER)
7455
        ent->owner = 1;
7456
     */
7457
7458
11.9k
    cur = ent->children;
7459
13.0k
    while (cur != NULL) {
7460
13.0k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7461
13.0k
        if (nw != NULL) {
7462
13.0k
      if (nw->_private == NULL)
7463
13.0k
          nw->_private = cur->_private;
7464
13.0k
      if (firstChild == NULL){
7465
11.9k
          firstChild = nw;
7466
11.9k
      }
7467
13.0k
      nw = xmlAddChild(ctxt->node, nw);
7468
13.0k
        }
7469
13.0k
        if (cur == ent->last) {
7470
      /*
7471
       * needed to detect some strange empty
7472
       * node cases in the reader tests
7473
       */
7474
11.9k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7475
11.9k
          (nw != NULL) &&
7476
11.9k
          (nw->type == XML_ELEMENT_NODE) &&
7477
11.9k
          (nw->children == NULL))
7478
397
          nw->extra = 1;
7479
7480
11.9k
      break;
7481
11.9k
        }
7482
1.11k
        cur = cur->next;
7483
1.11k
    }
7484
#ifdef LIBXML_LEGACY_ENABLED
7485
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7486
      xmlAddEntityReference(ent, firstChild, nw);
7487
#endif /* LIBXML_LEGACY_ENABLED */
7488
24.7k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7489
24.7k
    xmlNodePtr nw = NULL, cur, next, last,
7490
24.7k
         firstChild = NULL;
7491
7492
    /*
7493
     * We are copying here, make sure there is no abuse
7494
     */
7495
24.7k
    ctxt->sizeentcopy += ent->length + 5;
7496
24.7k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7497
0
        return;
7498
7499
    /*
7500
     * Copy the entity child list and make it the new
7501
     * entity child list. The goal is to make sure any
7502
     * ID or REF referenced will be the one from the
7503
     * document content and not the entity copy.
7504
     */
7505
24.7k
    cur = ent->children;
7506
24.7k
    ent->children = NULL;
7507
24.7k
    last = ent->last;
7508
24.7k
    ent->last = NULL;
7509
26.8k
    while (cur != NULL) {
7510
26.8k
        next = cur->next;
7511
26.8k
        cur->next = NULL;
7512
26.8k
        cur->parent = NULL;
7513
26.8k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7514
26.8k
        if (nw != NULL) {
7515
26.8k
      if (nw->_private == NULL)
7516
26.8k
          nw->_private = cur->_private;
7517
26.8k
      if (firstChild == NULL){
7518
24.7k
          firstChild = cur;
7519
24.7k
      }
7520
26.8k
      xmlAddChild((xmlNodePtr) ent, nw);
7521
26.8k
      xmlAddChild(ctxt->node, cur);
7522
26.8k
        }
7523
26.8k
        if (cur == last)
7524
24.7k
      break;
7525
2.12k
        cur = next;
7526
2.12k
    }
7527
24.7k
    if (ent->owner == 0)
7528
1.88k
        ent->owner = 1;
7529
#ifdef LIBXML_LEGACY_ENABLED
7530
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7531
      xmlAddEntityReference(ent, firstChild, nw);
7532
#endif /* LIBXML_LEGACY_ENABLED */
7533
24.7k
      } else {
7534
0
    const xmlChar *nbktext;
7535
7536
    /*
7537
     * the name change is to avoid coalescing of the
7538
     * node with a possible previous text one which
7539
     * would make ent->children a dangling pointer
7540
     */
7541
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7542
0
          -1);
7543
0
    if (ent->children->type == XML_TEXT_NODE)
7544
0
        ent->children->name = nbktext;
7545
0
    if ((ent->last != ent->children) &&
7546
0
        (ent->last->type == XML_TEXT_NODE))
7547
0
        ent->last->name = nbktext;
7548
0
    xmlAddChildList(ctxt->node, ent->children);
7549
0
      }
7550
7551
      /*
7552
       * This is to avoid a nasty side effect, see
7553
       * characters() in SAX.c
7554
       */
7555
36.7k
      ctxt->nodemem = 0;
7556
36.7k
      ctxt->nodelen = 0;
7557
36.7k
      return;
7558
36.7k
  }
7559
36.7k
    }
7560
36.7k
}
7561
7562
/**
7563
 * xmlParseEntityRef:
7564
 * @ctxt:  an XML parser context
7565
 *
7566
 * DEPRECATED: Internal function, don't use.
7567
 *
7568
 * parse ENTITY references declarations
7569
 *
7570
 * [68] EntityRef ::= '&' Name ';'
7571
 *
7572
 * [ WFC: Entity Declared ]
7573
 * In a document without any DTD, a document with only an internal DTD
7574
 * subset which contains no parameter entity references, or a document
7575
 * with "standalone='yes'", the Name given in the entity reference
7576
 * must match that in an entity declaration, except that well-formed
7577
 * documents need not declare any of the following entities: amp, lt,
7578
 * gt, apos, quot.  The declaration of a parameter entity must precede
7579
 * any reference to it.  Similarly, the declaration of a general entity
7580
 * must precede any reference to it which appears in a default value in an
7581
 * attribute-list declaration. Note that if entities are declared in the
7582
 * external subset or in external parameter entities, a non-validating
7583
 * processor is not obligated to read and process their declarations;
7584
 * for such documents, the rule that an entity must be declared is a
7585
 * well-formedness constraint only if standalone='yes'.
7586
 *
7587
 * [ WFC: Parsed Entity ]
7588
 * An entity reference must not contain the name of an unparsed entity
7589
 *
7590
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7591
 */
7592
xmlEntityPtr
7593
2.33M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7594
2.33M
    const xmlChar *name;
7595
2.33M
    xmlEntityPtr ent = NULL;
7596
7597
2.33M
    GROW;
7598
2.33M
    if (ctxt->instate == XML_PARSER_EOF)
7599
0
        return(NULL);
7600
7601
2.33M
    if (RAW != '&')
7602
0
        return(NULL);
7603
2.33M
    NEXT;
7604
2.33M
    name = xmlParseName(ctxt);
7605
2.33M
    if (name == NULL) {
7606
922k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7607
922k
           "xmlParseEntityRef: no name\n");
7608
922k
        return(NULL);
7609
922k
    }
7610
1.41M
    if (RAW != ';') {
7611
20.6k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7612
20.6k
  return(NULL);
7613
20.6k
    }
7614
1.39M
    NEXT;
7615
7616
    /*
7617
     * Predefined entities override any extra definition
7618
     */
7619
1.39M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7620
757k
        ent = xmlGetPredefinedEntity(name);
7621
757k
        if (ent != NULL)
7622
155k
            return(ent);
7623
757k
    }
7624
7625
    /*
7626
     * Increase the number of entity references parsed
7627
     */
7628
1.23M
    ctxt->nbentities++;
7629
7630
    /*
7631
     * Ask first SAX for entity resolution, otherwise try the
7632
     * entities which may have stored in the parser context.
7633
     */
7634
1.23M
    if (ctxt->sax != NULL) {
7635
1.23M
  if (ctxt->sax->getEntity != NULL)
7636
1.23M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7637
1.23M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7638
1.23M
      (ctxt->options & XML_PARSE_OLDSAX))
7639
4.16k
      ent = xmlGetPredefinedEntity(name);
7640
1.23M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7641
1.23M
      (ctxt->userData==ctxt)) {
7642
15.3k
      ent = xmlSAX2GetEntity(ctxt, name);
7643
15.3k
  }
7644
1.23M
    }
7645
1.23M
    if (ctxt->instate == XML_PARSER_EOF)
7646
0
  return(NULL);
7647
    /*
7648
     * [ WFC: Entity Declared ]
7649
     * In a document without any DTD, a document with only an
7650
     * internal DTD subset which contains no parameter entity
7651
     * references, or a document with "standalone='yes'", the
7652
     * Name given in the entity reference must match that in an
7653
     * entity declaration, except that well-formed documents
7654
     * need not declare any of the following entities: amp, lt,
7655
     * gt, apos, quot.
7656
     * The declaration of a parameter entity must precede any
7657
     * reference to it.
7658
     * Similarly, the declaration of a general entity must
7659
     * precede any reference to it which appears in a default
7660
     * value in an attribute-list declaration. Note that if
7661
     * entities are declared in the external subset or in
7662
     * external parameter entities, a non-validating processor
7663
     * is not obligated to read and process their declarations;
7664
     * for such documents, the rule that an entity must be
7665
     * declared is a well-formedness constraint only if
7666
     * standalone='yes'.
7667
     */
7668
1.23M
    if (ent == NULL) {
7669
68.9k
  if ((ctxt->standalone == 1) ||
7670
68.9k
      ((ctxt->hasExternalSubset == 0) &&
7671
67.6k
       (ctxt->hasPErefs == 0))) {
7672
29.1k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7673
29.1k
         "Entity '%s' not defined\n", name);
7674
39.7k
  } else {
7675
39.7k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7676
39.7k
         "Entity '%s' not defined\n", name);
7677
39.7k
      if ((ctxt->inSubset == 0) &&
7678
39.7k
    (ctxt->sax != NULL) &&
7679
39.7k
    (ctxt->sax->reference != NULL)) {
7680
39.6k
    ctxt->sax->reference(ctxt->userData, name);
7681
39.6k
      }
7682
39.7k
  }
7683
68.9k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7684
68.9k
  ctxt->valid = 0;
7685
68.9k
    }
7686
7687
    /*
7688
     * [ WFC: Parsed Entity ]
7689
     * An entity reference must not contain the name of an
7690
     * unparsed entity
7691
     */
7692
1.17M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7693
665
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7694
665
     "Entity reference to unparsed entity %s\n", name);
7695
665
    }
7696
7697
    /*
7698
     * [ WFC: No External Entity References ]
7699
     * Attribute values cannot contain direct or indirect
7700
     * entity references to external entities.
7701
     */
7702
1.16M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7703
1.16M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7704
472
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7705
472
       "Attribute references external entity '%s'\n", name);
7706
472
    }
7707
    /*
7708
     * [ WFC: No < in Attribute Values ]
7709
     * The replacement text of any entity referred to directly or
7710
     * indirectly in an attribute value (other than "&lt;") must
7711
     * not contain a <.
7712
     */
7713
1.16M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7714
1.16M
       (ent != NULL) && 
7715
1.16M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7716
147k
  if (((ent->checked & 1) || (ent->checked == 0)) &&
7717
147k
       (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7718
2.17k
      xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7719
2.17k
  "'<' in entity '%s' is not allowed in attributes values\n", name);
7720
2.17k
        }
7721
147k
    }
7722
7723
    /*
7724
     * Internal check, no parameter entities here ...
7725
     */
7726
1.02M
    else {
7727
1.02M
  switch (ent->etype) {
7728
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7729
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7730
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7731
0
       "Attempt to reference the parameter entity '%s'\n",
7732
0
            name);
7733
0
      break;
7734
1.02M
      default:
7735
1.02M
      break;
7736
1.02M
  }
7737
1.02M
    }
7738
7739
    /*
7740
     * [ WFC: No Recursion ]
7741
     * A parsed entity must not contain a recursive reference
7742
     * to itself, either directly or indirectly.
7743
     * Done somewhere else
7744
     */
7745
1.23M
    return(ent);
7746
1.23M
}
7747
7748
/**
7749
 * xmlParseStringEntityRef:
7750
 * @ctxt:  an XML parser context
7751
 * @str:  a pointer to an index in the string
7752
 *
7753
 * parse ENTITY references declarations, but this version parses it from
7754
 * a string value.
7755
 *
7756
 * [68] EntityRef ::= '&' Name ';'
7757
 *
7758
 * [ WFC: Entity Declared ]
7759
 * In a document without any DTD, a document with only an internal DTD
7760
 * subset which contains no parameter entity references, or a document
7761
 * with "standalone='yes'", the Name given in the entity reference
7762
 * must match that in an entity declaration, except that well-formed
7763
 * documents need not declare any of the following entities: amp, lt,
7764
 * gt, apos, quot.  The declaration of a parameter entity must precede
7765
 * any reference to it.  Similarly, the declaration of a general entity
7766
 * must precede any reference to it which appears in a default value in an
7767
 * attribute-list declaration. Note that if entities are declared in the
7768
 * external subset or in external parameter entities, a non-validating
7769
 * processor is not obligated to read and process their declarations;
7770
 * for such documents, the rule that an entity must be declared is a
7771
 * well-formedness constraint only if standalone='yes'.
7772
 *
7773
 * [ WFC: Parsed Entity ]
7774
 * An entity reference must not contain the name of an unparsed entity
7775
 *
7776
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7777
 * is updated to the current location in the string.
7778
 */
7779
static xmlEntityPtr
7780
225k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7781
225k
    xmlChar *name;
7782
225k
    const xmlChar *ptr;
7783
225k
    xmlChar cur;
7784
225k
    xmlEntityPtr ent = NULL;
7785
7786
225k
    if ((str == NULL) || (*str == NULL))
7787
0
        return(NULL);
7788
225k
    ptr = *str;
7789
225k
    cur = *ptr;
7790
225k
    if (cur != '&')
7791
0
  return(NULL);
7792
7793
225k
    ptr++;
7794
225k
    name = xmlParseStringName(ctxt, &ptr);
7795
225k
    if (name == NULL) {
7796
1.61k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7797
1.61k
           "xmlParseStringEntityRef: no name\n");
7798
1.61k
  *str = ptr;
7799
1.61k
  return(NULL);
7800
1.61k
    }
7801
224k
    if (*ptr != ';') {
7802
1.37k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7803
1.37k
        xmlFree(name);
7804
1.37k
  *str = ptr;
7805
1.37k
  return(NULL);
7806
1.37k
    }
7807
223k
    ptr++;
7808
7809
7810
    /*
7811
     * Predefined entities override any extra definition
7812
     */
7813
223k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7814
164k
        ent = xmlGetPredefinedEntity(name);
7815
164k
        if (ent != NULL) {
7816
3.03k
            xmlFree(name);
7817
3.03k
            *str = ptr;
7818
3.03k
            return(ent);
7819
3.03k
        }
7820
164k
    }
7821
7822
    /*
7823
     * Increase the number of entity references parsed
7824
     */
7825
219k
    ctxt->nbentities++;
7826
7827
    /*
7828
     * Ask first SAX for entity resolution, otherwise try the
7829
     * entities which may have stored in the parser context.
7830
     */
7831
219k
    if (ctxt->sax != NULL) {
7832
219k
  if (ctxt->sax->getEntity != NULL)
7833
219k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7834
219k
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7835
18.1k
      ent = xmlGetPredefinedEntity(name);
7836
219k
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7837
65.2k
      ent = xmlSAX2GetEntity(ctxt, name);
7838
65.2k
  }
7839
219k
    }
7840
219k
    if (ctxt->instate == XML_PARSER_EOF) {
7841
0
  xmlFree(name);
7842
0
  return(NULL);
7843
0
    }
7844
7845
    /*
7846
     * [ WFC: Entity Declared ]
7847
     * In a document without any DTD, a document with only an
7848
     * internal DTD subset which contains no parameter entity
7849
     * references, or a document with "standalone='yes'", the
7850
     * Name given in the entity reference must match that in an
7851
     * entity declaration, except that well-formed documents
7852
     * need not declare any of the following entities: amp, lt,
7853
     * gt, apos, quot.
7854
     * The declaration of a parameter entity must precede any
7855
     * reference to it.
7856
     * Similarly, the declaration of a general entity must
7857
     * precede any reference to it which appears in a default
7858
     * value in an attribute-list declaration. Note that if
7859
     * entities are declared in the external subset or in
7860
     * external parameter entities, a non-validating processor
7861
     * is not obligated to read and process their declarations;
7862
     * for such documents, the rule that an entity must be
7863
     * declared is a well-formedness constraint only if
7864
     * standalone='yes'.
7865
     */
7866
219k
    if (ent == NULL) {
7867
65.2k
  if ((ctxt->standalone == 1) ||
7868
65.2k
      ((ctxt->hasExternalSubset == 0) &&
7869
64.2k
       (ctxt->hasPErefs == 0))) {
7870
64.0k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7871
64.0k
         "Entity '%s' not defined\n", name);
7872
64.0k
  } else {
7873
1.27k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7874
1.27k
        "Entity '%s' not defined\n",
7875
1.27k
        name);
7876
1.27k
  }
7877
65.2k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7878
  /* TODO ? check regressions ctxt->valid = 0; */
7879
65.2k
    }
7880
7881
    /*
7882
     * [ WFC: Parsed Entity ]
7883
     * An entity reference must not contain the name of an
7884
     * unparsed entity
7885
     */
7886
154k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7887
28
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7888
28
     "Entity reference to unparsed entity %s\n", name);
7889
28
    }
7890
7891
    /*
7892
     * [ WFC: No External Entity References ]
7893
     * Attribute values cannot contain direct or indirect
7894
     * entity references to external entities.
7895
     */
7896
154k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7897
154k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7898
109
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7899
109
   "Attribute references external entity '%s'\n", name);
7900
109
    }
7901
    /*
7902
     * [ WFC: No < in Attribute Values ]
7903
     * The replacement text of any entity referred to directly or
7904
     * indirectly in an attribute value (other than "&lt;") must
7905
     * not contain a <.
7906
     */
7907
154k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7908
154k
       (ent != NULL) && (ent->content != NULL) &&
7909
154k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7910
154k
       (xmlStrchr(ent->content, '<'))) {
7911
56.4k
  xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7912
56.4k
     "'<' in entity '%s' is not allowed in attributes values\n",
7913
56.4k
        name);
7914
56.4k
    }
7915
7916
    /*
7917
     * Internal check, no parameter entities here ...
7918
     */
7919
98.1k
    else {
7920
98.1k
  switch (ent->etype) {
7921
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7922
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7923
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7924
0
       "Attempt to reference the parameter entity '%s'\n",
7925
0
          name);
7926
0
      break;
7927
98.1k
      default:
7928
98.1k
      break;
7929
98.1k
  }
7930
98.1k
    }
7931
7932
    /*
7933
     * [ WFC: No Recursion ]
7934
     * A parsed entity must not contain a recursive reference
7935
     * to itself, either directly or indirectly.
7936
     * Done somewhere else
7937
     */
7938
7939
219k
    xmlFree(name);
7940
219k
    *str = ptr;
7941
219k
    return(ent);
7942
219k
}
7943
7944
/**
7945
 * xmlParsePEReference:
7946
 * @ctxt:  an XML parser context
7947
 *
7948
 * DEPRECATED: Internal function, don't use.
7949
 *
7950
 * parse PEReference declarations
7951
 * The entity content is handled directly by pushing it's content as
7952
 * a new input stream.
7953
 *
7954
 * [69] PEReference ::= '%' Name ';'
7955
 *
7956
 * [ WFC: No Recursion ]
7957
 * A parsed entity must not contain a recursive
7958
 * reference to itself, either directly or indirectly.
7959
 *
7960
 * [ WFC: Entity Declared ]
7961
 * In a document without any DTD, a document with only an internal DTD
7962
 * subset which contains no parameter entity references, or a document
7963
 * with "standalone='yes'", ...  ... The declaration of a parameter
7964
 * entity must precede any reference to it...
7965
 *
7966
 * [ VC: Entity Declared ]
7967
 * In a document with an external subset or external parameter entities
7968
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7969
 * must precede any reference to it...
7970
 *
7971
 * [ WFC: In DTD ]
7972
 * Parameter-entity references may only appear in the DTD.
7973
 * NOTE: misleading but this is handled.
7974
 */
7975
void
7976
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7977
2.81M
{
7978
2.81M
    const xmlChar *name;
7979
2.81M
    xmlEntityPtr entity = NULL;
7980
2.81M
    xmlParserInputPtr input;
7981
7982
2.81M
    if (RAW != '%')
7983
2.28M
        return;
7984
531k
    NEXT;
7985
531k
    name = xmlParseName(ctxt);
7986
531k
    if (name == NULL) {
7987
118k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7988
118k
  return;
7989
118k
    }
7990
412k
    if (xmlParserDebugEntities)
7991
0
  xmlGenericError(xmlGenericErrorContext,
7992
0
    "PEReference: %s\n", name);
7993
412k
    if (RAW != ';') {
7994
9.51k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7995
9.51k
        return;
7996
9.51k
    }
7997
7998
402k
    NEXT;
7999
8000
    /*
8001
     * Increase the number of entity references parsed
8002
     */
8003
402k
    ctxt->nbentities++;
8004
8005
    /*
8006
     * Request the entity from SAX
8007
     */
8008
402k
    if ((ctxt->sax != NULL) &&
8009
402k
  (ctxt->sax->getParameterEntity != NULL))
8010
402k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8011
402k
    if (ctxt->instate == XML_PARSER_EOF)
8012
0
  return;
8013
402k
    if (entity == NULL) {
8014
  /*
8015
   * [ WFC: Entity Declared ]
8016
   * In a document without any DTD, a document with only an
8017
   * internal DTD subset which contains no parameter entity
8018
   * references, or a document with "standalone='yes'", ...
8019
   * ... The declaration of a parameter entity must precede
8020
   * any reference to it...
8021
   */
8022
32.9k
  if ((ctxt->standalone == 1) ||
8023
32.9k
      ((ctxt->hasExternalSubset == 0) &&
8024
32.7k
       (ctxt->hasPErefs == 0))) {
8025
1.22k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8026
1.22k
            "PEReference: %%%s; not found\n",
8027
1.22k
            name);
8028
31.6k
  } else {
8029
      /*
8030
       * [ VC: Entity Declared ]
8031
       * In a document with an external subset or external
8032
       * parameter entities with "standalone='no'", ...
8033
       * ... The declaration of a parameter entity must
8034
       * precede any reference to it...
8035
       */
8036
31.6k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8037
7.47k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8038
7.47k
                                 "PEReference: %%%s; not found\n",
8039
7.47k
                                 name, NULL);
8040
7.47k
            } else
8041
24.2k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8042
24.2k
                              "PEReference: %%%s; not found\n",
8043
24.2k
                              name, NULL);
8044
31.6k
            ctxt->valid = 0;
8045
31.6k
  }
8046
32.9k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8047
370k
    } else {
8048
  /*
8049
   * Internal checking in case the entity quest barfed
8050
   */
8051
370k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8052
370k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8053
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8054
0
      "Internal: %%%s; is not a parameter entity\n",
8055
0
        name, NULL);
8056
370k
  } else {
8057
370k
            xmlChar start[4];
8058
370k
            xmlCharEncoding enc;
8059
8060
370k
      if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8061
20
          return;
8062
8063
370k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8064
370k
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8065
370k
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8066
370k
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8067
370k
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8068
370k
    (ctxt->replaceEntities == 0) &&
8069
370k
    (ctxt->validate == 0))
8070
73
    return;
8071
8072
369k
      input = xmlNewEntityInputStream(ctxt, entity);
8073
369k
      if (xmlPushInput(ctxt, input) < 0) {
8074
2.41k
                xmlFreeInputStream(input);
8075
2.41k
    return;
8076
2.41k
            }
8077
8078
367k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8079
                /*
8080
                 * Get the 4 first bytes and decode the charset
8081
                 * if enc != XML_CHAR_ENCODING_NONE
8082
                 * plug some encoding conversion routines.
8083
                 * Note that, since we may have some non-UTF8
8084
                 * encoding (like UTF16, bug 135229), the 'length'
8085
                 * is not known, but we can calculate based upon
8086
                 * the amount of data in the buffer.
8087
                 */
8088
11.7k
                GROW
8089
11.7k
                if (ctxt->instate == XML_PARSER_EOF)
8090
0
                    return;
8091
11.7k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8092
11.7k
                    start[0] = RAW;
8093
11.7k
                    start[1] = NXT(1);
8094
11.7k
                    start[2] = NXT(2);
8095
11.7k
                    start[3] = NXT(3);
8096
11.7k
                    enc = xmlDetectCharEncoding(start, 4);
8097
11.7k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8098
35
                        xmlSwitchEncoding(ctxt, enc);
8099
35
                    }
8100
11.7k
                }
8101
8102
11.7k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8103
11.7k
                    (IS_BLANK_CH(NXT(5)))) {
8104
9
                    xmlParseTextDecl(ctxt);
8105
9
                }
8106
11.7k
            }
8107
367k
  }
8108
370k
    }
8109
400k
    ctxt->hasPErefs = 1;
8110
400k
}
8111
8112
/**
8113
 * xmlLoadEntityContent:
8114
 * @ctxt:  an XML parser context
8115
 * @entity: an unloaded system entity
8116
 *
8117
 * Load the original content of the given system entity from the
8118
 * ExternalID/SystemID given. This is to be used for Included in Literal
8119
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8120
 *
8121
 * Returns 0 in case of success and -1 in case of failure
8122
 */
8123
static int
8124
4.65k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8125
4.65k
    xmlParserInputPtr input;
8126
4.65k
    xmlBufferPtr buf;
8127
4.65k
    int l, c;
8128
4.65k
    int count = 0;
8129
8130
4.65k
    if ((ctxt == NULL) || (entity == NULL) ||
8131
4.65k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8132
4.65k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8133
4.65k
  (entity->content != NULL)) {
8134
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8135
0
              "xmlLoadEntityContent parameter error");
8136
0
        return(-1);
8137
0
    }
8138
8139
4.65k
    if (xmlParserDebugEntities)
8140
0
  xmlGenericError(xmlGenericErrorContext,
8141
0
    "Reading %s entity content input\n", entity->name);
8142
8143
4.65k
    buf = xmlBufferCreate();
8144
4.65k
    if (buf == NULL) {
8145
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8146
0
              "xmlLoadEntityContent parameter error");
8147
0
        return(-1);
8148
0
    }
8149
4.65k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8150
8151
4.65k
    input = xmlNewEntityInputStream(ctxt, entity);
8152
4.65k
    if (input == NULL) {
8153
409
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8154
409
              "xmlLoadEntityContent input error");
8155
409
  xmlBufferFree(buf);
8156
409
        return(-1);
8157
409
    }
8158
8159
    /*
8160
     * Push the entity as the current input, read char by char
8161
     * saving to the buffer until the end of the entity or an error
8162
     */
8163
4.24k
    if (xmlPushInput(ctxt, input) < 0) {
8164
0
        xmlBufferFree(buf);
8165
0
  xmlFreeInputStream(input);
8166
0
  return(-1);
8167
0
    }
8168
8169
4.24k
    GROW;
8170
4.24k
    c = CUR_CHAR(l);
8171
2.05M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8172
2.05M
           (IS_CHAR(c))) {
8173
2.05M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8174
2.05M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8175
18.3k
      count = 0;
8176
18.3k
      GROW;
8177
18.3k
            if (ctxt->instate == XML_PARSER_EOF) {
8178
0
                xmlBufferFree(buf);
8179
0
                return(-1);
8180
0
            }
8181
18.3k
  }
8182
2.05M
  NEXTL(l);
8183
2.05M
  c = CUR_CHAR(l);
8184
2.05M
  if (c == 0) {
8185
3.75k
      count = 0;
8186
3.75k
      GROW;
8187
3.75k
            if (ctxt->instate == XML_PARSER_EOF) {
8188
0
                xmlBufferFree(buf);
8189
0
                return(-1);
8190
0
            }
8191
3.75k
      c = CUR_CHAR(l);
8192
3.75k
  }
8193
2.05M
    }
8194
8195
4.24k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8196
3.10k
        xmlPopInput(ctxt);
8197
3.10k
    } else if (!IS_CHAR(c)) {
8198
1.14k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8199
1.14k
                          "xmlLoadEntityContent: invalid char value %d\n",
8200
1.14k
                    c);
8201
1.14k
  xmlBufferFree(buf);
8202
1.14k
  return(-1);
8203
1.14k
    }
8204
3.10k
    entity->content = buf->content;
8205
3.10k
    buf->content = NULL;
8206
3.10k
    xmlBufferFree(buf);
8207
8208
3.10k
    return(0);
8209
4.24k
}
8210
8211
/**
8212
 * xmlParseStringPEReference:
8213
 * @ctxt:  an XML parser context
8214
 * @str:  a pointer to an index in the string
8215
 *
8216
 * parse PEReference declarations
8217
 *
8218
 * [69] PEReference ::= '%' Name ';'
8219
 *
8220
 * [ WFC: No Recursion ]
8221
 * A parsed entity must not contain a recursive
8222
 * reference to itself, either directly or indirectly.
8223
 *
8224
 * [ WFC: Entity Declared ]
8225
 * In a document without any DTD, a document with only an internal DTD
8226
 * subset which contains no parameter entity references, or a document
8227
 * with "standalone='yes'", ...  ... The declaration of a parameter
8228
 * entity must precede any reference to it...
8229
 *
8230
 * [ VC: Entity Declared ]
8231
 * In a document with an external subset or external parameter entities
8232
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8233
 * must precede any reference to it...
8234
 *
8235
 * [ WFC: In DTD ]
8236
 * Parameter-entity references may only appear in the DTD.
8237
 * NOTE: misleading but this is handled.
8238
 *
8239
 * Returns the string of the entity content.
8240
 *         str is updated to the current value of the index
8241
 */
8242
static xmlEntityPtr
8243
416k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8244
416k
    const xmlChar *ptr;
8245
416k
    xmlChar cur;
8246
416k
    xmlChar *name;
8247
416k
    xmlEntityPtr entity = NULL;
8248
8249
416k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8250
416k
    ptr = *str;
8251
416k
    cur = *ptr;
8252
416k
    if (cur != '%')
8253
0
        return(NULL);
8254
416k
    ptr++;
8255
416k
    name = xmlParseStringName(ctxt, &ptr);
8256
416k
    if (name == NULL) {
8257
90.3k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8258
90.3k
           "xmlParseStringPEReference: no name\n");
8259
90.3k
  *str = ptr;
8260
90.3k
  return(NULL);
8261
90.3k
    }
8262
326k
    cur = *ptr;
8263
326k
    if (cur != ';') {
8264
33.3k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8265
33.3k
  xmlFree(name);
8266
33.3k
  *str = ptr;
8267
33.3k
  return(NULL);
8268
33.3k
    }
8269
292k
    ptr++;
8270
8271
    /*
8272
     * Increase the number of entity references parsed
8273
     */
8274
292k
    ctxt->nbentities++;
8275
8276
    /*
8277
     * Request the entity from SAX
8278
     */
8279
292k
    if ((ctxt->sax != NULL) &&
8280
292k
  (ctxt->sax->getParameterEntity != NULL))
8281
292k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8282
292k
    if (ctxt->instate == XML_PARSER_EOF) {
8283
0
  xmlFree(name);
8284
0
  *str = ptr;
8285
0
  return(NULL);
8286
0
    }
8287
292k
    if (entity == NULL) {
8288
  /*
8289
   * [ WFC: Entity Declared ]
8290
   * In a document without any DTD, a document with only an
8291
   * internal DTD subset which contains no parameter entity
8292
   * references, or a document with "standalone='yes'", ...
8293
   * ... The declaration of a parameter entity must precede
8294
   * any reference to it...
8295
   */
8296
55.8k
  if ((ctxt->standalone == 1) ||
8297
55.8k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8298
3
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8299
3
     "PEReference: %%%s; not found\n", name);
8300
55.8k
  } else {
8301
      /*
8302
       * [ VC: Entity Declared ]
8303
       * In a document with an external subset or external
8304
       * parameter entities with "standalone='no'", ...
8305
       * ... The declaration of a parameter entity must
8306
       * precede any reference to it...
8307
       */
8308
55.8k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8309
55.8k
        "PEReference: %%%s; not found\n",
8310
55.8k
        name, NULL);
8311
55.8k
      ctxt->valid = 0;
8312
55.8k
  }
8313
55.8k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8314
237k
    } else {
8315
  /*
8316
   * Internal checking in case the entity quest barfed
8317
   */
8318
237k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8319
237k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8320
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8321
0
        "%%%s; is not a parameter entity\n",
8322
0
        name, NULL);
8323
0
  }
8324
237k
    }
8325
292k
    ctxt->hasPErefs = 1;
8326
292k
    xmlFree(name);
8327
292k
    *str = ptr;
8328
292k
    return(entity);
8329
292k
}
8330
8331
/**
8332
 * xmlParseDocTypeDecl:
8333
 * @ctxt:  an XML parser context
8334
 *
8335
 * DEPRECATED: Internal function, don't use.
8336
 *
8337
 * parse a DOCTYPE declaration
8338
 *
8339
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8340
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8341
 *
8342
 * [ VC: Root Element Type ]
8343
 * The Name in the document type declaration must match the element
8344
 * type of the root element.
8345
 */
8346
8347
void
8348
287k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8349
287k
    const xmlChar *name = NULL;
8350
287k
    xmlChar *ExternalID = NULL;
8351
287k
    xmlChar *URI = NULL;
8352
8353
    /*
8354
     * We know that '<!DOCTYPE' has been detected.
8355
     */
8356
287k
    SKIP(9);
8357
8358
287k
    SKIP_BLANKS;
8359
8360
    /*
8361
     * Parse the DOCTYPE name.
8362
     */
8363
287k
    name = xmlParseName(ctxt);
8364
287k
    if (name == NULL) {
8365
1.10k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8366
1.10k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8367
1.10k
    }
8368
287k
    ctxt->intSubName = name;
8369
8370
287k
    SKIP_BLANKS;
8371
8372
    /*
8373
     * Check for SystemID and ExternalID
8374
     */
8375
287k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8376
8377
287k
    if ((URI != NULL) || (ExternalID != NULL)) {
8378
111k
        ctxt->hasExternalSubset = 1;
8379
111k
    }
8380
287k
    ctxt->extSubURI = URI;
8381
287k
    ctxt->extSubSystem = ExternalID;
8382
8383
287k
    SKIP_BLANKS;
8384
8385
    /*
8386
     * Create and update the internal subset.
8387
     */
8388
287k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8389
287k
  (!ctxt->disableSAX))
8390
278k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8391
287k
    if (ctxt->instate == XML_PARSER_EOF)
8392
0
  return;
8393
8394
    /*
8395
     * Is there any internal subset declarations ?
8396
     * they are handled separately in xmlParseInternalSubset()
8397
     */
8398
287k
    if (RAW == '[')
8399
206k
  return;
8400
8401
    /*
8402
     * We should be at the end of the DOCTYPE declaration.
8403
     */
8404
80.6k
    if (RAW != '>') {
8405
9.80k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8406
9.80k
    }
8407
80.6k
    NEXT;
8408
80.6k
}
8409
8410
/**
8411
 * xmlParseInternalSubset:
8412
 * @ctxt:  an XML parser context
8413
 *
8414
 * parse the internal subset declaration
8415
 *
8416
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8417
 */
8418
8419
static void
8420
175k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8421
    /*
8422
     * Is there any DTD definition ?
8423
     */
8424
175k
    if (RAW == '[') {
8425
175k
        int baseInputNr = ctxt->inputNr;
8426
175k
        ctxt->instate = XML_PARSER_DTD;
8427
175k
        NEXT;
8428
  /*
8429
   * Parse the succession of Markup declarations and
8430
   * PEReferences.
8431
   * Subsequence (markupdecl | PEReference | S)*
8432
   */
8433
2.45M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8434
2.45M
               (ctxt->instate != XML_PARSER_EOF)) {
8435
2.32M
      int id = ctxt->input->id;
8436
2.32M
      unsigned long cons = CUR_CONSUMED;
8437
8438
2.32M
      SKIP_BLANKS;
8439
2.32M
      xmlParseMarkupDecl(ctxt);
8440
2.32M
      xmlParsePEReference(ctxt);
8441
8442
            /*
8443
             * Conditional sections are allowed from external entities included
8444
             * by PE References in the internal subset.
8445
             */
8446
2.32M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8447
2.32M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8448
0
                xmlParseConditionalSections(ctxt);
8449
0
            }
8450
8451
2.32M
      if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8452
48.4k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8453
48.4k
       "xmlParseInternalSubset: error detected in Markup declaration\n");
8454
48.4k
                if (ctxt->inputNr > baseInputNr)
8455
5.57k
                    xmlPopInput(ctxt);
8456
42.8k
                else
8457
42.8k
        break;
8458
48.4k
      }
8459
2.32M
  }
8460
175k
  if (RAW == ']') {
8461
125k
      NEXT;
8462
125k
      SKIP_BLANKS;
8463
125k
  }
8464
175k
    }
8465
8466
    /*
8467
     * We should be at the end of the DOCTYPE declaration.
8468
     */
8469
175k
    if (RAW != '>') {
8470
50.4k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8471
50.4k
  return;
8472
50.4k
    }
8473
125k
    NEXT;
8474
125k
}
8475
8476
#ifdef LIBXML_SAX1_ENABLED
8477
/**
8478
 * xmlParseAttribute:
8479
 * @ctxt:  an XML parser context
8480
 * @value:  a xmlChar ** used to store the value of the attribute
8481
 *
8482
 * DEPRECATED: Internal function, don't use.
8483
 *
8484
 * parse an attribute
8485
 *
8486
 * [41] Attribute ::= Name Eq AttValue
8487
 *
8488
 * [ WFC: No External Entity References ]
8489
 * Attribute values cannot contain direct or indirect entity references
8490
 * to external entities.
8491
 *
8492
 * [ WFC: No < in Attribute Values ]
8493
 * The replacement text of any entity referred to directly or indirectly in
8494
 * an attribute value (other than "&lt;") must not contain a <.
8495
 *
8496
 * [ VC: Attribute Value Type ]
8497
 * The attribute must have been declared; the value must be of the type
8498
 * declared for it.
8499
 *
8500
 * [25] Eq ::= S? '=' S?
8501
 *
8502
 * With namespace:
8503
 *
8504
 * [NS 11] Attribute ::= QName Eq AttValue
8505
 *
8506
 * Also the case QName == xmlns:??? is handled independently as a namespace
8507
 * definition.
8508
 *
8509
 * Returns the attribute name, and the value in *value.
8510
 */
8511
8512
const xmlChar *
8513
1.46M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8514
1.46M
    const xmlChar *name;
8515
1.46M
    xmlChar *val;
8516
8517
1.46M
    *value = NULL;
8518
1.46M
    GROW;
8519
1.46M
    name = xmlParseName(ctxt);
8520
1.46M
    if (name == NULL) {
8521
120k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8522
120k
                 "error parsing attribute name\n");
8523
120k
        return(NULL);
8524
120k
    }
8525
8526
    /*
8527
     * read the value
8528
     */
8529
1.34M
    SKIP_BLANKS;
8530
1.34M
    if (RAW == '=') {
8531
1.27M
        NEXT;
8532
1.27M
  SKIP_BLANKS;
8533
1.27M
  val = xmlParseAttValue(ctxt);
8534
1.27M
  ctxt->instate = XML_PARSER_CONTENT;
8535
1.27M
    } else {
8536
67.2k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8537
67.2k
         "Specification mandates value for attribute %s\n", name);
8538
67.2k
  return(NULL);
8539
67.2k
    }
8540
8541
    /*
8542
     * Check that xml:lang conforms to the specification
8543
     * No more registered as an error, just generate a warning now
8544
     * since this was deprecated in XML second edition
8545
     */
8546
1.27M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8547
4.43k
  if (!xmlCheckLanguageID(val)) {
8548
2.29k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8549
2.29k
              "Malformed value for xml:lang : %s\n",
8550
2.29k
        val, NULL);
8551
2.29k
  }
8552
4.43k
    }
8553
8554
    /*
8555
     * Check that xml:space conforms to the specification
8556
     */
8557
1.27M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8558
681
  if (xmlStrEqual(val, BAD_CAST "default"))
8559
68
      *(ctxt->space) = 0;
8560
613
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8561
249
      *(ctxt->space) = 1;
8562
364
  else {
8563
364
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8564
364
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8565
364
                                 val, NULL);
8566
364
  }
8567
681
    }
8568
8569
1.27M
    *value = val;
8570
1.27M
    return(name);
8571
1.34M
}
8572
8573
/**
8574
 * xmlParseStartTag:
8575
 * @ctxt:  an XML parser context
8576
 *
8577
 * DEPRECATED: Internal function, don't use.
8578
 *
8579
 * parse a start of tag either for rule element or
8580
 * EmptyElement. In both case we don't parse the tag closing chars.
8581
 *
8582
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8583
 *
8584
 * [ WFC: Unique Att Spec ]
8585
 * No attribute name may appear more than once in the same start-tag or
8586
 * empty-element tag.
8587
 *
8588
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8589
 *
8590
 * [ WFC: Unique Att Spec ]
8591
 * No attribute name may appear more than once in the same start-tag or
8592
 * empty-element tag.
8593
 *
8594
 * With namespace:
8595
 *
8596
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8597
 *
8598
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8599
 *
8600
 * Returns the element name parsed
8601
 */
8602
8603
const xmlChar *
8604
2.50M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8605
2.50M
    const xmlChar *name;
8606
2.50M
    const xmlChar *attname;
8607
2.50M
    xmlChar *attvalue;
8608
2.50M
    const xmlChar **atts = ctxt->atts;
8609
2.50M
    int nbatts = 0;
8610
2.50M
    int maxatts = ctxt->maxatts;
8611
2.50M
    int i;
8612
8613
2.50M
    if (RAW != '<') return(NULL);
8614
2.50M
    NEXT1;
8615
8616
2.50M
    name = xmlParseName(ctxt);
8617
2.50M
    if (name == NULL) {
8618
278k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8619
278k
       "xmlParseStartTag: invalid element name\n");
8620
278k
        return(NULL);
8621
278k
    }
8622
8623
    /*
8624
     * Now parse the attributes, it ends up with the ending
8625
     *
8626
     * (S Attribute)* S?
8627
     */
8628
2.22M
    SKIP_BLANKS;
8629
2.22M
    GROW;
8630
8631
2.63M
    while (((RAW != '>') &&
8632
2.63M
     ((RAW != '/') || (NXT(1) != '>')) &&
8633
2.63M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8634
1.46M
        int id = ctxt->input->id;
8635
1.46M
  unsigned long cons = CUR_CONSUMED;
8636
8637
1.46M
  attname = xmlParseAttribute(ctxt, &attvalue);
8638
1.46M
        if ((attname != NULL) && (attvalue != NULL)) {
8639
      /*
8640
       * [ WFC: Unique Att Spec ]
8641
       * No attribute name may appear more than once in the same
8642
       * start-tag or empty-element tag.
8643
       */
8644
1.62M
      for (i = 0; i < nbatts;i += 2) {
8645
356k
          if (xmlStrEqual(atts[i], attname)) {
8646
2.21k
        xmlErrAttributeDup(ctxt, NULL, attname);
8647
2.21k
        xmlFree(attvalue);
8648
2.21k
        goto failed;
8649
2.21k
    }
8650
356k
      }
8651
      /*
8652
       * Add the pair to atts
8653
       */
8654
1.26M
      if (atts == NULL) {
8655
159k
          maxatts = 22; /* allow for 10 attrs by default */
8656
159k
          atts = (const xmlChar **)
8657
159k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8658
159k
    if (atts == NULL) {
8659
0
        xmlErrMemory(ctxt, NULL);
8660
0
        if (attvalue != NULL)
8661
0
      xmlFree(attvalue);
8662
0
        goto failed;
8663
0
    }
8664
159k
    ctxt->atts = atts;
8665
159k
    ctxt->maxatts = maxatts;
8666
1.10M
      } else if (nbatts + 4 > maxatts) {
8667
427
          const xmlChar **n;
8668
8669
427
          maxatts *= 2;
8670
427
          n = (const xmlChar **) xmlRealloc((void *) atts,
8671
427
               maxatts * sizeof(const xmlChar *));
8672
427
    if (n == NULL) {
8673
0
        xmlErrMemory(ctxt, NULL);
8674
0
        if (attvalue != NULL)
8675
0
      xmlFree(attvalue);
8676
0
        goto failed;
8677
0
    }
8678
427
    atts = n;
8679
427
    ctxt->atts = atts;
8680
427
    ctxt->maxatts = maxatts;
8681
427
      }
8682
1.26M
      atts[nbatts++] = attname;
8683
1.26M
      atts[nbatts++] = attvalue;
8684
1.26M
      atts[nbatts] = NULL;
8685
1.26M
      atts[nbatts + 1] = NULL;
8686
1.26M
  } else {
8687
193k
      if (attvalue != NULL)
8688
0
    xmlFree(attvalue);
8689
193k
  }
8690
8691
1.46M
failed:
8692
8693
1.46M
  GROW
8694
1.46M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8695
925k
      break;
8696
535k
  if (SKIP_BLANKS == 0) {
8697
249k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8698
249k
         "attributes construct error\n");
8699
249k
  }
8700
535k
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8701
535k
            (attname == NULL) && (attvalue == NULL)) {
8702
120k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8703
120k
         "xmlParseStartTag: problem parsing attributes\n");
8704
120k
      break;
8705
120k
  }
8706
414k
  SHRINK;
8707
414k
        GROW;
8708
414k
    }
8709
8710
    /*
8711
     * SAX: Start of Element !
8712
     */
8713
2.22M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8714
2.22M
  (!ctxt->disableSAX)) {
8715
1.95M
  if (nbatts > 0)
8716
872k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8717
1.08M
  else
8718
1.08M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8719
1.95M
    }
8720
8721
2.22M
    if (atts != NULL) {
8722
        /* Free only the content strings */
8723
3.06M
        for (i = 1;i < nbatts;i+=2)
8724
1.26M
      if (atts[i] != NULL)
8725
1.26M
         xmlFree((xmlChar *) atts[i]);
8726
1.79M
    }
8727
2.22M
    return(name);
8728
2.22M
}
8729
8730
/**
8731
 * xmlParseEndTag1:
8732
 * @ctxt:  an XML parser context
8733
 * @line:  line of the start tag
8734
 * @nsNr:  number of namespaces on the start tag
8735
 *
8736
 * parse an end of tag
8737
 *
8738
 * [42] ETag ::= '</' Name S? '>'
8739
 *
8740
 * With namespace
8741
 *
8742
 * [NS 9] ETag ::= '</' QName S? '>'
8743
 */
8744
8745
static void
8746
1.01M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8747
1.01M
    const xmlChar *name;
8748
8749
1.01M
    GROW;
8750
1.01M
    if ((RAW != '<') || (NXT(1) != '/')) {
8751
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8752
0
           "xmlParseEndTag: '</' not found\n");
8753
0
  return;
8754
0
    }
8755
1.01M
    SKIP(2);
8756
8757
1.01M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8758
8759
    /*
8760
     * We should definitely be at the ending "S? '>'" part
8761
     */
8762
1.01M
    GROW;
8763
1.01M
    SKIP_BLANKS;
8764
1.01M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8765
30.3k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8766
30.3k
    } else
8767
982k
  NEXT1;
8768
8769
    /*
8770
     * [ WFC: Element Type Match ]
8771
     * The Name in an element's end-tag must match the element type in the
8772
     * start-tag.
8773
     *
8774
     */
8775
1.01M
    if (name != (xmlChar*)1) {
8776
90.7k
        if (name == NULL) name = BAD_CAST "unparsable";
8777
90.7k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8778
90.7k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8779
90.7k
                    ctxt->name, line, name);
8780
90.7k
    }
8781
8782
    /*
8783
     * SAX: End of Tag
8784
     */
8785
1.01M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8786
1.01M
  (!ctxt->disableSAX))
8787
784k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8788
8789
1.01M
    namePop(ctxt);
8790
1.01M
    spacePop(ctxt);
8791
1.01M
    return;
8792
1.01M
}
8793
8794
/**
8795
 * xmlParseEndTag:
8796
 * @ctxt:  an XML parser context
8797
 *
8798
 * DEPRECATED: Internal function, don't use.
8799
 *
8800
 * parse an end of tag
8801
 *
8802
 * [42] ETag ::= '</' Name S? '>'
8803
 *
8804
 * With namespace
8805
 *
8806
 * [NS 9] ETag ::= '</' QName S? '>'
8807
 */
8808
8809
void
8810
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8811
0
    xmlParseEndTag1(ctxt, 0);
8812
0
}
8813
#endif /* LIBXML_SAX1_ENABLED */
8814
8815
/************************************************************************
8816
 *                  *
8817
 *          SAX 2 specific operations       *
8818
 *                  *
8819
 ************************************************************************/
8820
8821
/*
8822
 * xmlGetNamespace:
8823
 * @ctxt:  an XML parser context
8824
 * @prefix:  the prefix to lookup
8825
 *
8826
 * Lookup the namespace name for the @prefix (which ca be NULL)
8827
 * The prefix must come from the @ctxt->dict dictionary
8828
 *
8829
 * Returns the namespace name or NULL if not bound
8830
 */
8831
static const xmlChar *
8832
5.76M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8833
5.76M
    int i;
8834
8835
5.76M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8836
6.21M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8837
1.95M
        if (ctxt->nsTab[i] == prefix) {
8838
1.47M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8839
2.27k
          return(NULL);
8840
1.47M
      return(ctxt->nsTab[i + 1]);
8841
1.47M
  }
8842
4.25M
    return(NULL);
8843
5.73M
}
8844
8845
/**
8846
 * xmlParseQName:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  pointer to store the prefix part
8849
 *
8850
 * parse an XML Namespace QName
8851
 *
8852
 * [6]  QName  ::= (Prefix ':')? LocalPart
8853
 * [7]  Prefix  ::= NCName
8854
 * [8]  LocalPart  ::= NCName
8855
 *
8856
 * Returns the Name parsed or NULL
8857
 */
8858
8859
static const xmlChar *
8860
11.1M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8861
11.1M
    const xmlChar *l, *p;
8862
8863
11.1M
    GROW;
8864
8865
11.1M
    l = xmlParseNCName(ctxt);
8866
11.1M
    if (l == NULL) {
8867
853k
        if (CUR == ':') {
8868
1.75k
      l = xmlParseName(ctxt);
8869
1.75k
      if (l != NULL) {
8870
1.75k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8871
1.75k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8872
1.75k
    *prefix = NULL;
8873
1.75k
    return(l);
8874
1.75k
      }
8875
1.75k
  }
8876
851k
        return(NULL);
8877
853k
    }
8878
10.2M
    if (CUR == ':') {
8879
1.90M
        NEXT;
8880
1.90M
  p = l;
8881
1.90M
  l = xmlParseNCName(ctxt);
8882
1.90M
  if (l == NULL) {
8883
11.1k
      xmlChar *tmp;
8884
8885
11.1k
            if (ctxt->instate == XML_PARSER_EOF)
8886
0
                return(NULL);
8887
11.1k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8888
11.1k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8889
11.1k
      l = xmlParseNmtoken(ctxt);
8890
11.1k
      if (l == NULL) {
8891
7.09k
                if (ctxt->instate == XML_PARSER_EOF)
8892
0
                    return(NULL);
8893
7.09k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8894
7.09k
            } else {
8895
4.02k
    tmp = xmlBuildQName(l, p, NULL, 0);
8896
4.02k
    xmlFree((char *)l);
8897
4.02k
      }
8898
11.1k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8899
11.1k
      if (tmp != NULL) xmlFree(tmp);
8900
11.1k
      *prefix = NULL;
8901
11.1k
      return(p);
8902
11.1k
  }
8903
1.89M
  if (CUR == ':') {
8904
4.99k
      xmlChar *tmp;
8905
8906
4.99k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8907
4.99k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8908
4.99k
      NEXT;
8909
4.99k
      tmp = (xmlChar *) xmlParseName(ctxt);
8910
4.99k
      if (tmp != NULL) {
8911
4.02k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8912
4.02k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8913
4.02k
    if (tmp != NULL) xmlFree(tmp);
8914
4.02k
    *prefix = p;
8915
4.02k
    return(l);
8916
4.02k
      }
8917
969
            if (ctxt->instate == XML_PARSER_EOF)
8918
0
                return(NULL);
8919
969
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8920
969
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8921
969
      if (tmp != NULL) xmlFree(tmp);
8922
969
      *prefix = p;
8923
969
      return(l);
8924
969
  }
8925
1.88M
  *prefix = p;
8926
1.88M
    } else
8927
8.36M
        *prefix = NULL;
8928
10.2M
    return(l);
8929
10.2M
}
8930
8931
/**
8932
 * xmlParseQNameAndCompare:
8933
 * @ctxt:  an XML parser context
8934
 * @name:  the localname
8935
 * @prefix:  the prefix, if any.
8936
 *
8937
 * parse an XML name and compares for match
8938
 * (specialized for endtag parsing)
8939
 *
8940
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8941
 * and the name for mismatch
8942
 */
8943
8944
static const xmlChar *
8945
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8946
510k
                        xmlChar const *prefix) {
8947
510k
    const xmlChar *cmp;
8948
510k
    const xmlChar *in;
8949
510k
    const xmlChar *ret;
8950
510k
    const xmlChar *prefix2;
8951
8952
510k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8953
8954
510k
    GROW;
8955
510k
    in = ctxt->input->cur;
8956
8957
510k
    cmp = prefix;
8958
1.86M
    while (*in != 0 && *in == *cmp) {
8959
1.34M
  ++in;
8960
1.34M
  ++cmp;
8961
1.34M
    }
8962
510k
    if ((*cmp == 0) && (*in == ':')) {
8963
492k
        in++;
8964
492k
  cmp = name;
8965
4.13M
  while (*in != 0 && *in == *cmp) {
8966
3.64M
      ++in;
8967
3.64M
      ++cmp;
8968
3.64M
  }
8969
492k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8970
      /* success */
8971
447k
            ctxt->input->col += in - ctxt->input->cur;
8972
447k
      ctxt->input->cur = in;
8973
447k
      return((const xmlChar*) 1);
8974
447k
  }
8975
492k
    }
8976
    /*
8977
     * all strings coms from the dictionary, equality can be done directly
8978
     */
8979
63.2k
    ret = xmlParseQName (ctxt, &prefix2);
8980
63.2k
    if ((ret == name) && (prefix == prefix2))
8981
658
  return((const xmlChar*) 1);
8982
62.6k
    return ret;
8983
63.2k
}
8984
8985
/**
8986
 * xmlParseAttValueInternal:
8987
 * @ctxt:  an XML parser context
8988
 * @len:  attribute len result
8989
 * @alloc:  whether the attribute was reallocated as a new string
8990
 * @normalize:  if 1 then further non-CDATA normalization must be done
8991
 *
8992
 * parse a value for an attribute.
8993
 * NOTE: if no normalization is needed, the routine will return pointers
8994
 *       directly from the data buffer.
8995
 *
8996
 * 3.3.3 Attribute-Value Normalization:
8997
 * Before the value of an attribute is passed to the application or
8998
 * checked for validity, the XML processor must normalize it as follows:
8999
 * - a character reference is processed by appending the referenced
9000
 *   character to the attribute value
9001
 * - an entity reference is processed by recursively processing the
9002
 *   replacement text of the entity
9003
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9004
 *   appending #x20 to the normalized value, except that only a single
9005
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9006
 *   parsed entity or the literal entity value of an internal parsed entity
9007
 * - other characters are processed by appending them to the normalized value
9008
 * If the declared value is not CDATA, then the XML processor must further
9009
 * process the normalized attribute value by discarding any leading and
9010
 * trailing space (#x20) characters, and by replacing sequences of space
9011
 * (#x20) characters by a single space (#x20) character.
9012
 * All attributes for which no declaration has been read should be treated
9013
 * by a non-validating parser as if declared CDATA.
9014
 *
9015
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9016
 *     caller if it was copied, this can be detected by val[*len] == 0.
9017
 */
9018
9019
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9020
7.43k
    const xmlChar *oldbase = ctxt->input->base;\
9021
7.43k
    GROW;\
9022
7.43k
    if (ctxt->instate == XML_PARSER_EOF)\
9023
7.43k
        return(NULL);\
9024
7.43k
    if (oldbase != ctxt->input->base) {\
9025
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9026
0
        start = start + delta;\
9027
0
        in = in + delta;\
9028
0
    }\
9029
7.43k
    end = ctxt->input->end;
9030
9031
static xmlChar *
9032
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9033
                         int normalize)
9034
6.32M
{
9035
6.32M
    xmlChar limit = 0;
9036
6.32M
    const xmlChar *in = NULL, *start, *end, *last;
9037
6.32M
    xmlChar *ret = NULL;
9038
6.32M
    int line, col;
9039
6.32M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9040
1.86M
                    XML_MAX_HUGE_LENGTH :
9041
6.32M
                    XML_MAX_TEXT_LENGTH;
9042
9043
6.32M
    GROW;
9044
6.32M
    in = (xmlChar *) CUR_PTR;
9045
6.32M
    line = ctxt->input->line;
9046
6.32M
    col = ctxt->input->col;
9047
6.32M
    if (*in != '"' && *in != '\'') {
9048
16.0k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9049
16.0k
        return (NULL);
9050
16.0k
    }
9051
6.31M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9052
9053
    /*
9054
     * try to handle in this routine the most common case where no
9055
     * allocation of a new string is required and where content is
9056
     * pure ASCII.
9057
     */
9058
6.31M
    limit = *in++;
9059
6.31M
    col++;
9060
6.31M
    end = ctxt->input->end;
9061
6.31M
    start = in;
9062
6.31M
    if (in >= end) {
9063
745
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9064
745
    }
9065
6.31M
    if (normalize) {
9066
        /*
9067
   * Skip any leading spaces
9068
   */
9069
272k
  while ((in < end) && (*in != limit) &&
9070
272k
         ((*in == 0x20) || (*in == 0x9) ||
9071
269k
          (*in == 0xA) || (*in == 0xD))) {
9072
94.4k
      if (*in == 0xA) {
9073
39.7k
          line++; col = 1;
9074
54.6k
      } else {
9075
54.6k
          col++;
9076
54.6k
      }
9077
94.4k
      in++;
9078
94.4k
      start = in;
9079
94.4k
      if (in >= end) {
9080
225
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9081
225
                if ((in - start) > maxLength) {
9082
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9083
0
                                   "AttValue length too long\n");
9084
0
                    return(NULL);
9085
0
                }
9086
225
      }
9087
94.4k
  }
9088
597k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9089
597k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9090
424k
      col++;
9091
424k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9092
418k
      if (in >= end) {
9093
492
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9094
492
                if ((in - start) > maxLength) {
9095
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9096
0
                                   "AttValue length too long\n");
9097
0
                    return(NULL);
9098
0
                }
9099
492
      }
9100
418k
  }
9101
178k
  last = in;
9102
  /*
9103
   * skip the trailing blanks
9104
   */
9105
187k
  while ((last[-1] == 0x20) && (last > start)) last--;
9106
225k
  while ((in < end) && (*in != limit) &&
9107
225k
         ((*in == 0x20) || (*in == 0x9) ||
9108
96.2k
          (*in == 0xA) || (*in == 0xD))) {
9109
47.0k
      if (*in == 0xA) {
9110
21.9k
          line++, col = 1;
9111
25.0k
      } else {
9112
25.0k
          col++;
9113
25.0k
      }
9114
47.0k
      in++;
9115
47.0k
      if (in >= end) {
9116
197
    const xmlChar *oldbase = ctxt->input->base;
9117
197
    GROW;
9118
197
                if (ctxt->instate == XML_PARSER_EOF)
9119
0
                    return(NULL);
9120
197
    if (oldbase != ctxt->input->base) {
9121
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9122
0
        start = start + delta;
9123
0
        in = in + delta;
9124
0
        last = last + delta;
9125
0
    }
9126
197
    end = ctxt->input->end;
9127
197
                if ((in - start) > maxLength) {
9128
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9129
0
                                   "AttValue length too long\n");
9130
0
                    return(NULL);
9131
0
                }
9132
197
      }
9133
47.0k
  }
9134
178k
        if ((in - start) > maxLength) {
9135
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9136
0
                           "AttValue length too long\n");
9137
0
            return(NULL);
9138
0
        }
9139
178k
  if (*in != limit) goto need_complex;
9140
6.13M
    } else {
9141
122M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9142
122M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9143
116M
      in++;
9144
116M
      col++;
9145
116M
      if (in >= end) {
9146
5.96k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9147
5.96k
                if ((in - start) > maxLength) {
9148
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9149
0
                                   "AttValue length too long\n");
9150
0
                    return(NULL);
9151
0
                }
9152
5.96k
      }
9153
116M
  }
9154
6.13M
  last = in;
9155
6.13M
        if ((in - start) > maxLength) {
9156
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9157
0
                           "AttValue length too long\n");
9158
0
            return(NULL);
9159
0
        }
9160
6.13M
  if (*in != limit) goto need_complex;
9161
6.13M
    }
9162
5.91M
    in++;
9163
5.91M
    col++;
9164
5.91M
    if (len != NULL) {
9165
4.67M
        if (alloc) *alloc = 0;
9166
4.67M
        *len = last - start;
9167
4.67M
        ret = (xmlChar *) start;
9168
4.67M
    } else {
9169
1.24M
        if (alloc) *alloc = 1;
9170
1.24M
        ret = xmlStrndup(start, last - start);
9171
1.24M
    }
9172
5.91M
    CUR_PTR = in;
9173
5.91M
    ctxt->input->line = line;
9174
5.91M
    ctxt->input->col = col;
9175
5.91M
    return ret;
9176
394k
need_complex:
9177
394k
    if (alloc) *alloc = 1;
9178
394k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9179
6.31M
}
9180
9181
/**
9182
 * xmlParseAttribute2:
9183
 * @ctxt:  an XML parser context
9184
 * @pref:  the element prefix
9185
 * @elem:  the element name
9186
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9187
 * @value:  a xmlChar ** used to store the value of the attribute
9188
 * @len:  an int * to save the length of the attribute
9189
 * @alloc:  an int * to indicate if the attribute was allocated
9190
 *
9191
 * parse an attribute in the new SAX2 framework.
9192
 *
9193
 * Returns the attribute name, and the value in *value, .
9194
 */
9195
9196
static const xmlChar *
9197
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9198
                   const xmlChar * pref, const xmlChar * elem,
9199
                   const xmlChar ** prefix, xmlChar ** value,
9200
                   int *len, int *alloc)
9201
5.04M
{
9202
5.04M
    const xmlChar *name;
9203
5.04M
    xmlChar *val, *internal_val = NULL;
9204
5.04M
    int normalize = 0;
9205
9206
5.04M
    *value = NULL;
9207
5.04M
    GROW;
9208
5.04M
    name = xmlParseQName(ctxt, prefix);
9209
5.04M
    if (name == NULL) {
9210
73.3k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9211
73.3k
                       "error parsing attribute name\n");
9212
73.3k
        return (NULL);
9213
73.3k
    }
9214
9215
    /*
9216
     * get the type if needed
9217
     */
9218
4.97M
    if (ctxt->attsSpecial != NULL) {
9219
731k
        int type;
9220
9221
731k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9222
731k
                                                 pref, elem, *prefix, name);
9223
731k
        if (type != 0)
9224
179k
            normalize = 1;
9225
731k
    }
9226
9227
    /*
9228
     * read the value
9229
     */
9230
4.97M
    SKIP_BLANKS;
9231
4.97M
    if (RAW == '=') {
9232
4.92M
        NEXT;
9233
4.92M
        SKIP_BLANKS;
9234
4.92M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9235
4.92M
  if (normalize) {
9236
      /*
9237
       * Sometimes a second normalisation pass for spaces is needed
9238
       * but that only happens if charrefs or entities references
9239
       * have been used in the attribute value, i.e. the attribute
9240
       * value have been extracted in an allocated string already.
9241
       */
9242
178k
      if (*alloc) {
9243
50.2k
          const xmlChar *val2;
9244
9245
50.2k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9246
50.2k
    if ((val2 != NULL) && (val2 != val)) {
9247
10.6k
        xmlFree(val);
9248
10.6k
        val = (xmlChar *) val2;
9249
10.6k
    }
9250
50.2k
      }
9251
178k
  }
9252
4.92M
        ctxt->instate = XML_PARSER_CONTENT;
9253
4.92M
    } else {
9254
43.7k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9255
43.7k
                          "Specification mandates value for attribute %s\n",
9256
43.7k
                          name);
9257
43.7k
        return (NULL);
9258
43.7k
    }
9259
9260
4.92M
    if (*prefix == ctxt->str_xml) {
9261
        /*
9262
         * Check that xml:lang conforms to the specification
9263
         * No more registered as an error, just generate a warning now
9264
         * since this was deprecated in XML second edition
9265
         */
9266
33.2k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9267
5.04k
            internal_val = xmlStrndup(val, *len);
9268
5.04k
            if (!xmlCheckLanguageID(internal_val)) {
9269
2.58k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9270
2.58k
                              "Malformed value for xml:lang : %s\n",
9271
2.58k
                              internal_val, NULL);
9272
2.58k
            }
9273
5.04k
        }
9274
9275
        /*
9276
         * Check that xml:space conforms to the specification
9277
         */
9278
33.2k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9279
914
            internal_val = xmlStrndup(val, *len);
9280
914
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9281
70
                *(ctxt->space) = 0;
9282
844
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9283
333
                *(ctxt->space) = 1;
9284
511
            else {
9285
511
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9286
511
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9287
511
                              internal_val, NULL);
9288
511
            }
9289
914
        }
9290
33.2k
        if (internal_val) {
9291
5.68k
            xmlFree(internal_val);
9292
5.68k
        }
9293
33.2k
    }
9294
9295
4.92M
    *value = val;
9296
4.92M
    return (name);
9297
4.97M
}
9298
/**
9299
 * xmlParseStartTag2:
9300
 * @ctxt:  an XML parser context
9301
 *
9302
 * parse a start of tag either for rule element or
9303
 * EmptyElement. In both case we don't parse the tag closing chars.
9304
 * This routine is called when running SAX2 parsing
9305
 *
9306
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9307
 *
9308
 * [ WFC: Unique Att Spec ]
9309
 * No attribute name may appear more than once in the same start-tag or
9310
 * empty-element tag.
9311
 *
9312
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9313
 *
9314
 * [ WFC: Unique Att Spec ]
9315
 * No attribute name may appear more than once in the same start-tag or
9316
 * empty-element tag.
9317
 *
9318
 * With namespace:
9319
 *
9320
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9321
 *
9322
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9323
 *
9324
 * Returns the element name parsed
9325
 */
9326
9327
static const xmlChar *
9328
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9329
6.01M
                  const xmlChar **URI, int *tlen) {
9330
6.01M
    const xmlChar *localname;
9331
6.01M
    const xmlChar *prefix;
9332
6.01M
    const xmlChar *attname;
9333
6.01M
    const xmlChar *aprefix;
9334
6.01M
    const xmlChar *nsname;
9335
6.01M
    xmlChar *attvalue;
9336
6.01M
    const xmlChar **atts = ctxt->atts;
9337
6.01M
    int maxatts = ctxt->maxatts;
9338
6.01M
    int nratts, nbatts, nbdef, inputid;
9339
6.01M
    int i, j, nbNs, attval;
9340
6.01M
    unsigned long cur;
9341
6.01M
    int nsNr = ctxt->nsNr;
9342
9343
6.01M
    if (RAW != '<') return(NULL);
9344
6.01M
    NEXT1;
9345
9346
    /*
9347
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9348
     *       point since the attribute values may be stored as pointers to
9349
     *       the buffer and calling SHRINK would destroy them !
9350
     *       The Shrinking is only possible once the full set of attribute
9351
     *       callbacks have been done.
9352
     */
9353
6.01M
    SHRINK;
9354
6.01M
    cur = ctxt->input->cur - ctxt->input->base;
9355
6.01M
    inputid = ctxt->input->id;
9356
6.01M
    nbatts = 0;
9357
6.01M
    nratts = 0;
9358
6.01M
    nbdef = 0;
9359
6.01M
    nbNs = 0;
9360
6.01M
    attval = 0;
9361
    /* Forget any namespaces added during an earlier parse of this element. */
9362
6.01M
    ctxt->nsNr = nsNr;
9363
9364
6.01M
    localname = xmlParseQName(ctxt, &prefix);
9365
6.01M
    if (localname == NULL) {
9366
776k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9367
776k
           "StartTag: invalid element name\n");
9368
776k
        return(NULL);
9369
776k
    }
9370
5.23M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9371
9372
    /*
9373
     * Now parse the attributes, it ends up with the ending
9374
     *
9375
     * (S Attribute)* S?
9376
     */
9377
5.23M
    SKIP_BLANKS;
9378
5.23M
    GROW;
9379
9380
7.08M
    while (((RAW != '>') &&
9381
7.08M
     ((RAW != '/') || (NXT(1) != '>')) &&
9382
7.08M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9383
5.04M
  int id = ctxt->input->id;
9384
5.04M
  unsigned long cons = CUR_CONSUMED;
9385
5.04M
  int len = -1, alloc = 0;
9386
9387
5.04M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9388
5.04M
                               &aprefix, &attvalue, &len, &alloc);
9389
5.04M
        if ((attname == NULL) || (attvalue == NULL))
9390
123k
            goto next_attr;
9391
4.92M
  if (len < 0) len = xmlStrlen(attvalue);
9392
9393
4.92M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9394
24.6k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9395
24.6k
            xmlURIPtr uri;
9396
9397
24.6k
            if (URL == NULL) {
9398
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9399
0
                if ((attvalue != NULL) && (alloc != 0))
9400
0
                    xmlFree(attvalue);
9401
0
                localname = NULL;
9402
0
                goto done;
9403
0
            }
9404
24.6k
            if (*URL != 0) {
9405
23.7k
                uri = xmlParseURI((const char *) URL);
9406
23.7k
                if (uri == NULL) {
9407
5.09k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9408
5.09k
                             "xmlns: '%s' is not a valid URI\n",
9409
5.09k
                                       URL, NULL, NULL);
9410
18.6k
                } else {
9411
18.6k
                    if (uri->scheme == NULL) {
9412
2.04k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9413
2.04k
                                  "xmlns: URI %s is not absolute\n",
9414
2.04k
                                  URL, NULL, NULL);
9415
2.04k
                    }
9416
18.6k
                    xmlFreeURI(uri);
9417
18.6k
                }
9418
23.7k
                if (URL == ctxt->str_xml_ns) {
9419
0
                    if (attname != ctxt->str_xml) {
9420
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9421
0
                     "xml namespace URI cannot be the default namespace\n",
9422
0
                                 NULL, NULL, NULL);
9423
0
                    }
9424
0
                    goto next_attr;
9425
0
                }
9426
23.7k
                if ((len == 29) &&
9427
23.7k
                    (xmlStrEqual(URL,
9428
211
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9429
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9430
0
                         "reuse of the xmlns namespace name is forbidden\n",
9431
0
                             NULL, NULL, NULL);
9432
0
                    goto next_attr;
9433
0
                }
9434
23.7k
            }
9435
            /*
9436
             * check that it's not a defined namespace
9437
             */
9438
47.8k
            for (j = 1;j <= nbNs;j++)
9439
24.3k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9440
1.16k
                    break;
9441
24.6k
            if (j <= nbNs)
9442
1.16k
                xmlErrAttributeDup(ctxt, NULL, attname);
9443
23.4k
            else
9444
23.4k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9445
9446
4.89M
        } else if (aprefix == ctxt->str_xmlns) {
9447
191k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9448
191k
            xmlURIPtr uri;
9449
9450
191k
            if (attname == ctxt->str_xml) {
9451
150
                if (URL != ctxt->str_xml_ns) {
9452
150
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9453
150
                             "xml namespace prefix mapped to wrong URI\n",
9454
150
                             NULL, NULL, NULL);
9455
150
                }
9456
                /*
9457
                 * Do not keep a namespace definition node
9458
                 */
9459
150
                goto next_attr;
9460
150
            }
9461
190k
            if (URL == ctxt->str_xml_ns) {
9462
0
                if (attname != ctxt->str_xml) {
9463
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9464
0
                             "xml namespace URI mapped to wrong prefix\n",
9465
0
                             NULL, NULL, NULL);
9466
0
                }
9467
0
                goto next_attr;
9468
0
            }
9469
190k
            if (attname == ctxt->str_xmlns) {
9470
104
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9471
104
                         "redefinition of the xmlns prefix is forbidden\n",
9472
104
                         NULL, NULL, NULL);
9473
104
                goto next_attr;
9474
104
            }
9475
190k
            if ((len == 29) &&
9476
190k
                (xmlStrEqual(URL,
9477
975
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9478
19
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9479
19
                         "reuse of the xmlns namespace name is forbidden\n",
9480
19
                         NULL, NULL, NULL);
9481
19
                goto next_attr;
9482
19
            }
9483
190k
            if ((URL == NULL) || (URL[0] == 0)) {
9484
356
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9485
356
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9486
356
                              attname, NULL, NULL);
9487
356
                goto next_attr;
9488
190k
            } else {
9489
190k
                uri = xmlParseURI((const char *) URL);
9490
190k
                if (uri == NULL) {
9491
13.1k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9492
13.1k
                         "xmlns:%s: '%s' is not a valid URI\n",
9493
13.1k
                                       attname, URL, NULL);
9494
177k
                } else {
9495
177k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9496
455
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9497
455
                                  "xmlns:%s: URI %s is not absolute\n",
9498
455
                                  attname, URL, NULL);
9499
455
                    }
9500
177k
                    xmlFreeURI(uri);
9501
177k
                }
9502
190k
            }
9503
9504
            /*
9505
             * check that it's not a defined namespace
9506
             */
9507
234k
            for (j = 1;j <= nbNs;j++)
9508
44.7k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9509
1.10k
                    break;
9510
190k
            if (j <= nbNs)
9511
1.10k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9512
189k
            else
9513
189k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9514
9515
4.70M
        } else {
9516
            /*
9517
             * Add the pair to atts
9518
             */
9519
4.70M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9520
177k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9521
0
                    goto next_attr;
9522
0
                }
9523
177k
                maxatts = ctxt->maxatts;
9524
177k
                atts = ctxt->atts;
9525
177k
            }
9526
4.70M
            ctxt->attallocs[nratts++] = alloc;
9527
4.70M
            atts[nbatts++] = attname;
9528
4.70M
            atts[nbatts++] = aprefix;
9529
            /*
9530
             * The namespace URI field is used temporarily to point at the
9531
             * base of the current input buffer for non-alloced attributes.
9532
             * When the input buffer is reallocated, all the pointers become
9533
             * invalid, but they can be reconstructed later.
9534
             */
9535
4.70M
            if (alloc)
9536
229k
                atts[nbatts++] = NULL;
9537
4.47M
            else
9538
4.47M
                atts[nbatts++] = ctxt->input->base;
9539
4.70M
            atts[nbatts++] = attvalue;
9540
4.70M
            attvalue += len;
9541
4.70M
            atts[nbatts++] = attvalue;
9542
            /*
9543
             * tag if some deallocation is needed
9544
             */
9545
4.70M
            if (alloc != 0) attval = 1;
9546
4.70M
            attvalue = NULL; /* moved into atts */
9547
4.70M
        }
9548
9549
5.04M
next_attr:
9550
5.04M
        if ((attvalue != NULL) && (alloc != 0)) {
9551
16.4k
            xmlFree(attvalue);
9552
16.4k
            attvalue = NULL;
9553
16.4k
        }
9554
9555
5.04M
  GROW
9556
5.04M
        if (ctxt->instate == XML_PARSER_EOF)
9557
0
            break;
9558
5.04M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9559
2.99M
      break;
9560
2.04M
  if (SKIP_BLANKS == 0) {
9561
200k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9562
200k
         "attributes construct error\n");
9563
200k
      break;
9564
200k
  }
9565
1.84M
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9566
1.84M
            (attname == NULL) && (attvalue == NULL)) {
9567
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9568
0
           "xmlParseStartTag: problem parsing attributes\n");
9569
0
      break;
9570
0
  }
9571
1.84M
        GROW;
9572
1.84M
    }
9573
9574
5.23M
    if (ctxt->input->id != inputid) {
9575
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9576
0
                    "Unexpected change of input\n");
9577
0
        localname = NULL;
9578
0
        goto done;
9579
0
    }
9580
9581
    /* Reconstruct attribute value pointers. */
9582
9.94M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9583
4.70M
        if (atts[i+2] != NULL) {
9584
            /*
9585
             * Arithmetic on dangling pointers is technically undefined
9586
             * behavior, but well...
9587
             */
9588
4.47M
            ptrdiff_t offset = ctxt->input->base - atts[i+2];
9589
4.47M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9590
4.47M
            atts[i+3] += offset;  /* value */
9591
4.47M
            atts[i+4] += offset;  /* valuend */
9592
4.47M
        }
9593
4.70M
    }
9594
9595
    /*
9596
     * The attributes defaulting
9597
     */
9598
5.23M
    if (ctxt->attsDefault != NULL) {
9599
616k
        xmlDefAttrsPtr defaults;
9600
9601
616k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9602
616k
  if (defaults != NULL) {
9603
64.6k
      for (i = 0;i < defaults->nbAttrs;i++) {
9604
41.4k
          attname = defaults->values[5 * i];
9605
41.4k
    aprefix = defaults->values[5 * i + 1];
9606
9607
                /*
9608
     * special work for namespaces defaulted defs
9609
     */
9610
41.4k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9611
        /*
9612
         * check that it's not a defined namespace
9613
         */
9614
1.93k
        for (j = 1;j <= nbNs;j++)
9615
978
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9616
440
          break;
9617
1.39k
              if (j <= nbNs) continue;
9618
9619
955
        nsname = xmlGetNamespace(ctxt, NULL);
9620
955
        if (nsname != defaults->values[5 * i + 2]) {
9621
725
      if (nsPush(ctxt, NULL,
9622
725
                 defaults->values[5 * i + 2]) > 0)
9623
725
          nbNs++;
9624
725
        }
9625
40.0k
    } else if (aprefix == ctxt->str_xmlns) {
9626
        /*
9627
         * check that it's not a defined namespace
9628
         */
9629
13.8k
        for (j = 1;j <= nbNs;j++)
9630
9.97k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9631
9.02k
          break;
9632
12.8k
              if (j <= nbNs) continue;
9633
9634
3.83k
        nsname = xmlGetNamespace(ctxt, attname);
9635
3.83k
        if (nsname != defaults->values[2]) {
9636
3.03k
      if (nsPush(ctxt, attname,
9637
3.03k
                 defaults->values[5 * i + 2]) > 0)
9638
2.92k
          nbNs++;
9639
3.03k
        }
9640
27.1k
    } else {
9641
        /*
9642
         * check that it's not a defined attribute
9643
         */
9644
82.9k
        for (j = 0;j < nbatts;j+=5) {
9645
56.5k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9646
800
          break;
9647
56.5k
        }
9648
27.1k
        if (j < nbatts) continue;
9649
9650
26.3k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9651
2.98k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9652
0
                            localname = NULL;
9653
0
                            goto done;
9654
0
      }
9655
2.98k
      maxatts = ctxt->maxatts;
9656
2.98k
      atts = ctxt->atts;
9657
2.98k
        }
9658
26.3k
        atts[nbatts++] = attname;
9659
26.3k
        atts[nbatts++] = aprefix;
9660
26.3k
        if (aprefix == NULL)
9661
23.3k
      atts[nbatts++] = NULL;
9662
3.03k
        else
9663
3.03k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9664
26.3k
        atts[nbatts++] = defaults->values[5 * i + 2];
9665
26.3k
        atts[nbatts++] = defaults->values[5 * i + 3];
9666
26.3k
        if ((ctxt->standalone == 1) &&
9667
26.3k
            (defaults->values[5 * i + 4] != NULL)) {
9668
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9669
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9670
0
                                   attname, localname);
9671
0
        }
9672
26.3k
        nbdef++;
9673
26.3k
    }
9674
41.4k
      }
9675
23.2k
  }
9676
616k
    }
9677
9678
    /*
9679
     * The attributes checkings
9680
     */
9681
9.96M
    for (i = 0; i < nbatts;i += 5) {
9682
        /*
9683
  * The default namespace does not apply to attribute names.
9684
  */
9685
4.73M
  if (atts[i + 1] != NULL) {
9686
523k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9687
523k
      if (nsname == NULL) {
9688
52.6k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9689
52.6k
        "Namespace prefix %s for %s on %s is not defined\n",
9690
52.6k
        atts[i + 1], atts[i], localname);
9691
52.6k
      }
9692
523k
      atts[i + 2] = nsname;
9693
523k
  } else
9694
4.20M
      nsname = NULL;
9695
  /*
9696
   * [ WFC: Unique Att Spec ]
9697
   * No attribute name may appear more than once in the same
9698
   * start-tag or empty-element tag.
9699
   * As extended by the Namespace in XML REC.
9700
   */
9701
6.71M
        for (j = 0; j < i;j += 5) {
9702
1.98M
      if (atts[i] == atts[j]) {
9703
12.0k
          if (atts[i+1] == atts[j+1]) {
9704
3.59k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9705
3.59k
        break;
9706
3.59k
    }
9707
8.42k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9708
59
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9709
59
           "Namespaced Attribute %s in '%s' redefined\n",
9710
59
           atts[i], nsname, NULL);
9711
59
        break;
9712
59
    }
9713
8.42k
      }
9714
1.98M
  }
9715
4.73M
    }
9716
9717
5.23M
    nsname = xmlGetNamespace(ctxt, prefix);
9718
5.23M
    if ((prefix != NULL) && (nsname == NULL)) {
9719
135k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9720
135k
           "Namespace prefix %s on %s is not defined\n",
9721
135k
     prefix, localname, NULL);
9722
135k
    }
9723
5.23M
    *pref = prefix;
9724
5.23M
    *URI = nsname;
9725
9726
    /*
9727
     * SAX: Start of Element !
9728
     */
9729
5.23M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9730
5.23M
  (!ctxt->disableSAX)) {
9731
4.49M
  if (nbNs > 0)
9732
154k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9733
154k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9734
154k
        nbatts / 5, nbdef, atts);
9735
4.33M
  else
9736
4.33M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9737
4.33M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9738
4.49M
    }
9739
9740
5.23M
done:
9741
    /*
9742
     * Free up attribute allocated strings if needed
9743
     */
9744
5.23M
    if (attval != 0) {
9745
485k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9746
279k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9747
229k
          xmlFree((xmlChar *) atts[i]);
9748
206k
    }
9749
9750
5.23M
    return(localname);
9751
5.23M
}
9752
9753
/**
9754
 * xmlParseEndTag2:
9755
 * @ctxt:  an XML parser context
9756
 * @line:  line of the start tag
9757
 * @nsNr:  number of namespaces on the start tag
9758
 *
9759
 * parse an end of tag
9760
 *
9761
 * [42] ETag ::= '</' Name S? '>'
9762
 *
9763
 * With namespace
9764
 *
9765
 * [NS 9] ETag ::= '</' QName S? '>'
9766
 */
9767
9768
static void
9769
1.82M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9770
1.82M
    const xmlChar *name;
9771
9772
1.82M
    GROW;
9773
1.82M
    if ((RAW != '<') || (NXT(1) != '/')) {
9774
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9775
0
  return;
9776
0
    }
9777
1.82M
    SKIP(2);
9778
9779
1.82M
    if (tag->prefix == NULL)
9780
1.31M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9781
510k
    else
9782
510k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9783
9784
    /*
9785
     * We should definitely be at the ending "S? '>'" part
9786
     */
9787
1.82M
    GROW;
9788
1.82M
    if (ctxt->instate == XML_PARSER_EOF)
9789
0
        return;
9790
1.82M
    SKIP_BLANKS;
9791
1.82M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9792
42.5k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9793
42.5k
    } else
9794
1.78M
  NEXT1;
9795
9796
    /*
9797
     * [ WFC: Element Type Match ]
9798
     * The Name in an element's end-tag must match the element type in the
9799
     * start-tag.
9800
     *
9801
     */
9802
1.82M
    if (name != (xmlChar*)1) {
9803
118k
        if (name == NULL) name = BAD_CAST "unparsable";
9804
118k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9805
118k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9806
118k
                    ctxt->name, tag->line, name);
9807
118k
    }
9808
9809
    /*
9810
     * SAX: End of Tag
9811
     */
9812
1.82M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9813
1.82M
  (!ctxt->disableSAX))
9814
1.45M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9815
1.45M
                                tag->URI);
9816
9817
1.82M
    spacePop(ctxt);
9818
1.82M
    if (tag->nsNr != 0)
9819
35.5k
  nsPop(ctxt, tag->nsNr);
9820
1.82M
}
9821
9822
/**
9823
 * xmlParseCDSect:
9824
 * @ctxt:  an XML parser context
9825
 *
9826
 * DEPRECATED: Internal function, don't use.
9827
 *
9828
 * Parse escaped pure raw content.
9829
 *
9830
 * [18] CDSect ::= CDStart CData CDEnd
9831
 *
9832
 * [19] CDStart ::= '<![CDATA['
9833
 *
9834
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9835
 *
9836
 * [21] CDEnd ::= ']]>'
9837
 */
9838
void
9839
18.8k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9840
18.8k
    xmlChar *buf = NULL;
9841
18.8k
    int len = 0;
9842
18.8k
    int size = XML_PARSER_BUFFER_SIZE;
9843
18.8k
    int r, rl;
9844
18.8k
    int s, sl;
9845
18.8k
    int cur, l;
9846
18.8k
    int count = 0;
9847
18.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9848
5.45k
                    XML_MAX_HUGE_LENGTH :
9849
18.8k
                    XML_MAX_TEXT_LENGTH;
9850
9851
    /* Check 2.6.0 was NXT(0) not RAW */
9852
18.8k
    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9853
18.8k
  SKIP(9);
9854
18.8k
    } else
9855
0
        return;
9856
9857
18.8k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9858
18.8k
    r = CUR_CHAR(rl);
9859
18.8k
    if (!IS_CHAR(r)) {
9860
242
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9861
242
  ctxt->instate = XML_PARSER_CONTENT;
9862
242
        return;
9863
242
    }
9864
18.6k
    NEXTL(rl);
9865
18.6k
    s = CUR_CHAR(sl);
9866
18.6k
    if (!IS_CHAR(s)) {
9867
216
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9868
216
  ctxt->instate = XML_PARSER_CONTENT;
9869
216
        return;
9870
216
    }
9871
18.3k
    NEXTL(sl);
9872
18.3k
    cur = CUR_CHAR(l);
9873
18.3k
    buf = (xmlChar *) xmlMallocAtomic(size);
9874
18.3k
    if (buf == NULL) {
9875
0
  xmlErrMemory(ctxt, NULL);
9876
0
  return;
9877
0
    }
9878
12.5M
    while (IS_CHAR(cur) &&
9879
12.5M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9880
12.5M
  if (len + 5 >= size) {
9881
39.5k
      xmlChar *tmp;
9882
9883
39.5k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9884
39.5k
      if (tmp == NULL) {
9885
0
          xmlFree(buf);
9886
0
    xmlErrMemory(ctxt, NULL);
9887
0
    return;
9888
0
      }
9889
39.5k
      buf = tmp;
9890
39.5k
      size *= 2;
9891
39.5k
  }
9892
12.5M
  COPY_BUF(rl,buf,len,r);
9893
12.5M
  r = s;
9894
12.5M
  rl = sl;
9895
12.5M
  s = cur;
9896
12.5M
  sl = l;
9897
12.5M
  count++;
9898
12.5M
  if (count > 50) {
9899
237k
      SHRINK;
9900
237k
      GROW;
9901
237k
            if (ctxt->instate == XML_PARSER_EOF) {
9902
0
    xmlFree(buf);
9903
0
    return;
9904
0
            }
9905
237k
      count = 0;
9906
237k
  }
9907
12.5M
  NEXTL(l);
9908
12.5M
  cur = CUR_CHAR(l);
9909
12.5M
        if (len > maxLength) {
9910
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9911
0
                           "CData section too big found\n");
9912
0
            xmlFree(buf);
9913
0
            return;
9914
0
        }
9915
12.5M
    }
9916
18.3k
    buf[len] = 0;
9917
18.3k
    ctxt->instate = XML_PARSER_CONTENT;
9918
18.3k
    if (cur != '>') {
9919
5.43k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9920
5.43k
                       "CData section not finished\n%.50s\n", buf);
9921
5.43k
  xmlFree(buf);
9922
5.43k
        return;
9923
5.43k
    }
9924
12.9k
    NEXTL(l);
9925
9926
    /*
9927
     * OK the buffer is to be consumed as cdata.
9928
     */
9929
12.9k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9930
8.56k
  if (ctxt->sax->cdataBlock != NULL)
9931
6.06k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9932
2.50k
  else if (ctxt->sax->characters != NULL)
9933
2.50k
      ctxt->sax->characters(ctxt->userData, buf, len);
9934
8.56k
    }
9935
12.9k
    xmlFree(buf);
9936
12.9k
}
9937
9938
/**
9939
 * xmlParseContentInternal:
9940
 * @ctxt:  an XML parser context
9941
 *
9942
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9943
 * unexpected EOF to the caller.
9944
 */
9945
9946
static void
9947
962k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9948
962k
    int nameNr = ctxt->nameNr;
9949
9950
962k
    GROW;
9951
17.6M
    while ((RAW != 0) &&
9952
17.6M
     (ctxt->instate != XML_PARSER_EOF)) {
9953
16.7M
        int id = ctxt->input->id;
9954
16.7M
  unsigned long cons = CUR_CONSUMED;
9955
16.7M
  const xmlChar *cur = ctxt->input->cur;
9956
9957
  /*
9958
   * First case : a Processing Instruction.
9959
   */
9960
16.7M
  if ((*cur == '<') && (cur[1] == '?')) {
9961
61.1k
      xmlParsePI(ctxt);
9962
61.1k
  }
9963
9964
  /*
9965
   * Second case : a CDSection
9966
   */
9967
  /* 2.6.0 test was *cur not RAW */
9968
16.7M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9969
18.8k
      xmlParseCDSect(ctxt);
9970
18.8k
  }
9971
9972
  /*
9973
   * Third case :  a comment
9974
   */
9975
16.6M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9976
16.6M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9977
94.6k
      xmlParseComment(ctxt);
9978
94.6k
      ctxt->instate = XML_PARSER_CONTENT;
9979
94.6k
  }
9980
9981
  /*
9982
   * Fourth case :  a sub-element.
9983
   */
9984
16.6M
  else if (*cur == '<') {
9985
6.44M
            if (NXT(1) == '/') {
9986
1.54M
                if (ctxt->nameNr <= nameNr)
9987
64.4k
                    break;
9988
1.48M
          xmlParseElementEnd(ctxt);
9989
4.89M
            } else {
9990
4.89M
          xmlParseElementStart(ctxt);
9991
4.89M
            }
9992
6.44M
  }
9993
9994
  /*
9995
   * Fifth case : a reference. If if has not been resolved,
9996
   *    parsing returns it's Name, create the node
9997
   */
9998
9999
10.1M
  else if (*cur == '&') {
10000
1.51M
      xmlParseReference(ctxt);
10001
1.51M
  }
10002
10003
  /*
10004
   * Last case, text. Note that References are handled directly.
10005
   */
10006
8.64M
  else {
10007
8.64M
      xmlParseCharData(ctxt, 0);
10008
8.64M
  }
10009
10010
16.7M
  GROW;
10011
16.7M
  SHRINK;
10012
10013
16.7M
  if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
10014
5.22k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10015
5.22k
                  "detected an error in element content\n");
10016
5.22k
      xmlHaltParser(ctxt);
10017
5.22k
            break;
10018
5.22k
  }
10019
16.7M
    }
10020
962k
}
10021
10022
/**
10023
 * xmlParseContent:
10024
 * @ctxt:  an XML parser context
10025
 *
10026
 * Parse a content sequence. Stops at EOF or '</'.
10027
 *
10028
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10029
 */
10030
10031
void
10032
821k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10033
821k
    int nameNr = ctxt->nameNr;
10034
10035
821k
    xmlParseContentInternal(ctxt);
10036
10037
821k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10038
3.62k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10039
3.62k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10040
3.62k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10041
3.62k
                "Premature end of data in tag %s line %d\n",
10042
3.62k
    name, line, NULL);
10043
3.62k
    }
10044
821k
}
10045
10046
/**
10047
 * xmlParseElement:
10048
 * @ctxt:  an XML parser context
10049
 *
10050
 * DEPRECATED: Internal function, don't use.
10051
 *
10052
 * parse an XML element
10053
 *
10054
 * [39] element ::= EmptyElemTag | STag content ETag
10055
 *
10056
 * [ WFC: Element Type Match ]
10057
 * The Name in an element's end-tag must match the element type in the
10058
 * start-tag.
10059
 *
10060
 */
10061
10062
void
10063
191k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10064
191k
    if (xmlParseElementStart(ctxt) != 0)
10065
49.5k
        return;
10066
10067
141k
    xmlParseContentInternal(ctxt);
10068
141k
    if (ctxt->instate == XML_PARSER_EOF)
10069
1.15k
  return;
10070
10071
140k
    if (CUR == 0) {
10072
76.5k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10073
76.5k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10074
76.5k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10075
76.5k
                "Premature end of data in tag %s line %d\n",
10076
76.5k
    name, line, NULL);
10077
76.5k
        return;
10078
76.5k
    }
10079
10080
64.0k
    xmlParseElementEnd(ctxt);
10081
64.0k
}
10082
10083
/**
10084
 * xmlParseElementStart:
10085
 * @ctxt:  an XML parser context
10086
 *
10087
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10088
 * opening tag was parsed, 1 if an empty element was parsed.
10089
 */
10090
static int
10091
5.09M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10092
5.09M
    const xmlChar *name;
10093
5.09M
    const xmlChar *prefix = NULL;
10094
5.09M
    const xmlChar *URI = NULL;
10095
5.09M
    xmlParserNodeInfo node_info;
10096
5.09M
    int line, tlen = 0;
10097
5.09M
    xmlNodePtr ret;
10098
5.09M
    int nsNr = ctxt->nsNr;
10099
10100
5.09M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10101
5.09M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10102
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10103
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10104
0
        xmlParserMaxDepth);
10105
0
  xmlHaltParser(ctxt);
10106
0
  return(-1);
10107
0
    }
10108
10109
    /* Capture start position */
10110
5.09M
    if (ctxt->record_info) {
10111
0
        node_info.begin_pos = ctxt->input->consumed +
10112
0
                          (CUR_PTR - ctxt->input->base);
10113
0
  node_info.begin_line = ctxt->input->line;
10114
0
    }
10115
10116
5.09M
    if (ctxt->spaceNr == 0)
10117
0
  spacePush(ctxt, -1);
10118
5.09M
    else if (*ctxt->space == -2)
10119
440k
  spacePush(ctxt, -1);
10120
4.64M
    else
10121
4.64M
  spacePush(ctxt, *ctxt->space);
10122
10123
5.09M
    line = ctxt->input->line;
10124
5.09M
#ifdef LIBXML_SAX1_ENABLED
10125
5.09M
    if (ctxt->sax2)
10126
3.58M
#endif /* LIBXML_SAX1_ENABLED */
10127
3.58M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10128
1.50M
#ifdef LIBXML_SAX1_ENABLED
10129
1.50M
    else
10130
1.50M
  name = xmlParseStartTag(ctxt);
10131
5.09M
#endif /* LIBXML_SAX1_ENABLED */
10132
5.09M
    if (ctxt->instate == XML_PARSER_EOF)
10133
27
  return(-1);
10134
5.09M
    if (name == NULL) {
10135
1.02M
  spacePop(ctxt);
10136
1.02M
        return(-1);
10137
1.02M
    }
10138
4.06M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10139
4.06M
    ret = ctxt->node;
10140
10141
4.06M
#ifdef LIBXML_VALID_ENABLED
10142
    /*
10143
     * [ VC: Root Element Type ]
10144
     * The Name in the document type declaration must match the element
10145
     * type of the root element.
10146
     */
10147
4.06M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10148
4.06M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10149
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10150
4.06M
#endif /* LIBXML_VALID_ENABLED */
10151
10152
    /*
10153
     * Check for an Empty Element.
10154
     */
10155
4.06M
    if ((RAW == '/') && (NXT(1) == '>')) {
10156
1.39M
        SKIP(2);
10157
1.39M
  if (ctxt->sax2) {
10158
1.14M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10159
1.14M
    (!ctxt->disableSAX))
10160
807k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10161
1.14M
#ifdef LIBXML_SAX1_ENABLED
10162
1.14M
  } else {
10163
251k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10164
251k
    (!ctxt->disableSAX))
10165
212k
    ctxt->sax->endElement(ctxt->userData, name);
10166
251k
#endif /* LIBXML_SAX1_ENABLED */
10167
251k
  }
10168
1.39M
  namePop(ctxt);
10169
1.39M
  spacePop(ctxt);
10170
1.39M
  if (nsNr != ctxt->nsNr)
10171
21.0k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10172
1.39M
  if ( ret != NULL && ctxt->record_info ) {
10173
0
     node_info.end_pos = ctxt->input->consumed +
10174
0
            (CUR_PTR - ctxt->input->base);
10175
0
     node_info.end_line = ctxt->input->line;
10176
0
     node_info.node = ret;
10177
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10178
0
  }
10179
1.39M
  return(1);
10180
1.39M
    }
10181
2.66M
    if (RAW == '>') {
10182
2.49M
        NEXT1;
10183
2.49M
    } else {
10184
171k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10185
171k
         "Couldn't find end of Start Tag %s line %d\n",
10186
171k
                    name, line, NULL);
10187
10188
  /*
10189
   * end of parsing of this node.
10190
   */
10191
171k
  nodePop(ctxt);
10192
171k
  namePop(ctxt);
10193
171k
  spacePop(ctxt);
10194
171k
  if (nsNr != ctxt->nsNr)
10195
7.18k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10196
10197
  /*
10198
   * Capture end position and add node
10199
   */
10200
171k
  if ( ret != NULL && ctxt->record_info ) {
10201
0
     node_info.end_pos = ctxt->input->consumed +
10202
0
            (CUR_PTR - ctxt->input->base);
10203
0
     node_info.end_line = ctxt->input->line;
10204
0
     node_info.node = ret;
10205
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10206
0
  }
10207
171k
  return(-1);
10208
171k
    }
10209
10210
2.49M
    return(0);
10211
2.66M
}
10212
10213
/**
10214
 * xmlParseElementEnd:
10215
 * @ctxt:  an XML parser context
10216
 *
10217
 * Parse the end of an XML element.
10218
 */
10219
static void
10220
1.54M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10221
1.54M
    xmlParserNodeInfo node_info;
10222
1.54M
    xmlNodePtr ret = ctxt->node;
10223
10224
1.54M
    if (ctxt->nameNr <= 0)
10225
0
        return;
10226
10227
    /*
10228
     * parse the end of tag: '</' should be here.
10229
     */
10230
1.54M
    if (ctxt->sax2) {
10231
961k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10232
961k
  namePop(ctxt);
10233
961k
    }
10234
587k
#ifdef LIBXML_SAX1_ENABLED
10235
587k
    else
10236
587k
  xmlParseEndTag1(ctxt, 0);
10237
1.54M
#endif /* LIBXML_SAX1_ENABLED */
10238
10239
    /*
10240
     * Capture end position and add node
10241
     */
10242
1.54M
    if ( ret != NULL && ctxt->record_info ) {
10243
0
       node_info.end_pos = ctxt->input->consumed +
10244
0
                          (CUR_PTR - ctxt->input->base);
10245
0
       node_info.end_line = ctxt->input->line;
10246
0
       node_info.node = ret;
10247
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10248
0
    }
10249
1.54M
}
10250
10251
/**
10252
 * xmlParseVersionNum:
10253
 * @ctxt:  an XML parser context
10254
 *
10255
 * DEPRECATED: Internal function, don't use.
10256
 *
10257
 * parse the XML version value.
10258
 *
10259
 * [26] VersionNum ::= '1.' [0-9]+
10260
 *
10261
 * In practice allow [0-9].[0-9]+ at that level
10262
 *
10263
 * Returns the string giving the XML version number, or NULL
10264
 */
10265
xmlChar *
10266
260k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10267
260k
    xmlChar *buf = NULL;
10268
260k
    int len = 0;
10269
260k
    int size = 10;
10270
260k
    xmlChar cur;
10271
10272
260k
    buf = (xmlChar *) xmlMallocAtomic(size);
10273
260k
    if (buf == NULL) {
10274
0
  xmlErrMemory(ctxt, NULL);
10275
0
  return(NULL);
10276
0
    }
10277
260k
    cur = CUR;
10278
260k
    if (!((cur >= '0') && (cur <= '9'))) {
10279
2.38k
  xmlFree(buf);
10280
2.38k
  return(NULL);
10281
2.38k
    }
10282
258k
    buf[len++] = cur;
10283
258k
    NEXT;
10284
258k
    cur=CUR;
10285
258k
    if (cur != '.') {
10286
2.36k
  xmlFree(buf);
10287
2.36k
  return(NULL);
10288
2.36k
    }
10289
256k
    buf[len++] = cur;
10290
256k
    NEXT;
10291
256k
    cur=CUR;
10292
970k
    while ((cur >= '0') && (cur <= '9')) {
10293
714k
  if (len + 1 >= size) {
10294
933
      xmlChar *tmp;
10295
10296
933
      size *= 2;
10297
933
      tmp = (xmlChar *) xmlRealloc(buf, size);
10298
933
      if (tmp == NULL) {
10299
0
          xmlFree(buf);
10300
0
    xmlErrMemory(ctxt, NULL);
10301
0
    return(NULL);
10302
0
      }
10303
933
      buf = tmp;
10304
933
  }
10305
714k
  buf[len++] = cur;
10306
714k
  NEXT;
10307
714k
  cur=CUR;
10308
714k
    }
10309
256k
    buf[len] = 0;
10310
256k
    return(buf);
10311
256k
}
10312
10313
/**
10314
 * xmlParseVersionInfo:
10315
 * @ctxt:  an XML parser context
10316
 *
10317
 * DEPRECATED: Internal function, don't use.
10318
 *
10319
 * parse the XML version.
10320
 *
10321
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10322
 *
10323
 * [25] Eq ::= S? '=' S?
10324
 *
10325
 * Returns the version string, e.g. "1.0"
10326
 */
10327
10328
xmlChar *
10329
289k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10330
289k
    xmlChar *version = NULL;
10331
10332
289k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10333
265k
  SKIP(7);
10334
265k
  SKIP_BLANKS;
10335
265k
  if (RAW != '=') {
10336
2.35k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10337
2.35k
      return(NULL);
10338
2.35k
        }
10339
263k
  NEXT;
10340
263k
  SKIP_BLANKS;
10341
263k
  if (RAW == '"') {
10342
233k
      NEXT;
10343
233k
      version = xmlParseVersionNum(ctxt);
10344
233k
      if (RAW != '"') {
10345
8.40k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10346
8.40k
      } else
10347
225k
          NEXT;
10348
233k
  } else if (RAW == '\''){
10349
26.9k
      NEXT;
10350
26.9k
      version = xmlParseVersionNum(ctxt);
10351
26.9k
      if (RAW != '\'') {
10352
824
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10353
824
      } else
10354
26.1k
          NEXT;
10355
26.9k
  } else {
10356
2.60k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10357
2.60k
  }
10358
263k
    }
10359
286k
    return(version);
10360
289k
}
10361
10362
/**
10363
 * xmlParseEncName:
10364
 * @ctxt:  an XML parser context
10365
 *
10366
 * DEPRECATED: Internal function, don't use.
10367
 *
10368
 * parse the XML encoding name
10369
 *
10370
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10371
 *
10372
 * Returns the encoding name value or NULL
10373
 */
10374
xmlChar *
10375
116k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10376
116k
    xmlChar *buf = NULL;
10377
116k
    int len = 0;
10378
116k
    int size = 10;
10379
116k
    xmlChar cur;
10380
10381
116k
    cur = CUR;
10382
116k
    if (((cur >= 'a') && (cur <= 'z')) ||
10383
116k
        ((cur >= 'A') && (cur <= 'Z'))) {
10384
116k
  buf = (xmlChar *) xmlMallocAtomic(size);
10385
116k
  if (buf == NULL) {
10386
0
      xmlErrMemory(ctxt, NULL);
10387
0
      return(NULL);
10388
0
  }
10389
10390
116k
  buf[len++] = cur;
10391
116k
  NEXT;
10392
116k
  cur = CUR;
10393
1.31M
  while (((cur >= 'a') && (cur <= 'z')) ||
10394
1.31M
         ((cur >= 'A') && (cur <= 'Z')) ||
10395
1.31M
         ((cur >= '0') && (cur <= '9')) ||
10396
1.31M
         (cur == '.') || (cur == '_') ||
10397
1.31M
         (cur == '-')) {
10398
1.20M
      if (len + 1 >= size) {
10399
39.5k
          xmlChar *tmp;
10400
10401
39.5k
    size *= 2;
10402
39.5k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10403
39.5k
    if (tmp == NULL) {
10404
0
        xmlErrMemory(ctxt, NULL);
10405
0
        xmlFree(buf);
10406
0
        return(NULL);
10407
0
    }
10408
39.5k
    buf = tmp;
10409
39.5k
      }
10410
1.20M
      buf[len++] = cur;
10411
1.20M
      NEXT;
10412
1.20M
      cur = CUR;
10413
1.20M
      if (cur == 0) {
10414
461
          SHRINK;
10415
461
    GROW;
10416
461
    cur = CUR;
10417
461
      }
10418
1.20M
        }
10419
116k
  buf[len] = 0;
10420
116k
    } else {
10421
594
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10422
594
    }
10423
116k
    return(buf);
10424
116k
}
10425
10426
/**
10427
 * xmlParseEncodingDecl:
10428
 * @ctxt:  an XML parser context
10429
 *
10430
 * DEPRECATED: Internal function, don't use.
10431
 *
10432
 * parse the XML encoding declaration
10433
 *
10434
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10435
 *
10436
 * this setups the conversion filters.
10437
 *
10438
 * Returns the encoding value or NULL
10439
 */
10440
10441
const xmlChar *
10442
196k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10443
196k
    xmlChar *encoding = NULL;
10444
10445
196k
    SKIP_BLANKS;
10446
196k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10447
118k
  SKIP(8);
10448
118k
  SKIP_BLANKS;
10449
118k
  if (RAW != '=') {
10450
552
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10451
552
      return(NULL);
10452
552
        }
10453
117k
  NEXT;
10454
117k
  SKIP_BLANKS;
10455
117k
  if (RAW == '"') {
10456
101k
      NEXT;
10457
101k
      encoding = xmlParseEncName(ctxt);
10458
101k
      if (RAW != '"') {
10459
3.06k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10460
3.06k
    xmlFree((xmlChar *) encoding);
10461
3.06k
    return(NULL);
10462
3.06k
      } else
10463
98.0k
          NEXT;
10464
101k
  } else if (RAW == '\''){
10465
15.8k
      NEXT;
10466
15.8k
      encoding = xmlParseEncName(ctxt);
10467
15.8k
      if (RAW != '\'') {
10468
377
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10469
377
    xmlFree((xmlChar *) encoding);
10470
377
    return(NULL);
10471
377
      } else
10472
15.4k
          NEXT;
10473
15.8k
  } else {
10474
672
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10475
672
  }
10476
10477
        /*
10478
         * Non standard parsing, allowing the user to ignore encoding
10479
         */
10480
114k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10481
39.3k
      xmlFree((xmlChar *) encoding);
10482
39.3k
            return(NULL);
10483
39.3k
  }
10484
10485
  /*
10486
   * UTF-16 encoding switch has already taken place at this stage,
10487
   * more over the little-endian/big-endian selection is already done
10488
   */
10489
74.7k
        if ((encoding != NULL) &&
10490
74.7k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10491
74.4k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10492
      /*
10493
       * If no encoding was passed to the parser, that we are
10494
       * using UTF-16 and no decoder is present i.e. the
10495
       * document is apparently UTF-8 compatible, then raise an
10496
       * encoding mismatch fatal error
10497
       */
10498
168
      if ((ctxt->encoding == NULL) &&
10499
168
          (ctxt->input->buf != NULL) &&
10500
168
          (ctxt->input->buf->encoder == NULL)) {
10501
168
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10502
168
      "Document labelled UTF-16 but has UTF-8 content\n");
10503
168
      }
10504
168
      if (ctxt->encoding != NULL)
10505
0
    xmlFree((xmlChar *) ctxt->encoding);
10506
168
      ctxt->encoding = encoding;
10507
168
  }
10508
  /*
10509
   * UTF-8 encoding is handled natively
10510
   */
10511
74.6k
        else if ((encoding != NULL) &&
10512
74.6k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10513
74.2k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10514
45.2k
      if (ctxt->encoding != NULL)
10515
0
    xmlFree((xmlChar *) ctxt->encoding);
10516
45.2k
      ctxt->encoding = encoding;
10517
45.2k
  }
10518
29.3k
  else if (encoding != NULL) {
10519
28.9k
      xmlCharEncodingHandlerPtr handler;
10520
10521
28.9k
      if (ctxt->input->encoding != NULL)
10522
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10523
28.9k
      ctxt->input->encoding = encoding;
10524
10525
28.9k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10526
28.9k
      if (handler != NULL) {
10527
28.1k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10528
        /* failed to convert */
10529
111
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10530
111
        return(NULL);
10531
111
    }
10532
28.1k
      } else {
10533
824
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10534
824
      "Unsupported encoding %s\n", encoding);
10535
824
    return(NULL);
10536
824
      }
10537
28.9k
  }
10538
74.7k
    }
10539
151k
    return(encoding);
10540
196k
}
10541
10542
/**
10543
 * xmlParseSDDecl:
10544
 * @ctxt:  an XML parser context
10545
 *
10546
 * DEPRECATED: Internal function, don't use.
10547
 *
10548
 * parse the XML standalone declaration
10549
 *
10550
 * [32] SDDecl ::= S 'standalone' Eq
10551
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10552
 *
10553
 * [ VC: Standalone Document Declaration ]
10554
 * TODO The standalone document declaration must have the value "no"
10555
 * if any external markup declarations contain declarations of:
10556
 *  - attributes with default values, if elements to which these
10557
 *    attributes apply appear in the document without specifications
10558
 *    of values for these attributes, or
10559
 *  - entities (other than amp, lt, gt, apos, quot), if references
10560
 *    to those entities appear in the document, or
10561
 *  - attributes with values subject to normalization, where the
10562
 *    attribute appears in the document with a value which will change
10563
 *    as a result of normalization, or
10564
 *  - element types with element content, if white space occurs directly
10565
 *    within any instance of those types.
10566
 *
10567
 * Returns:
10568
 *   1 if standalone="yes"
10569
 *   0 if standalone="no"
10570
 *  -2 if standalone attribute is missing or invalid
10571
 *    (A standalone value of -2 means that the XML declaration was found,
10572
 *     but no value was specified for the standalone attribute).
10573
 */
10574
10575
int
10576
160k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10577
160k
    int standalone = -2;
10578
10579
160k
    SKIP_BLANKS;
10580
160k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10581
38.6k
  SKIP(10);
10582
38.6k
        SKIP_BLANKS;
10583
38.6k
  if (RAW != '=') {
10584
334
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10585
334
      return(standalone);
10586
334
        }
10587
38.2k
  NEXT;
10588
38.2k
  SKIP_BLANKS;
10589
38.2k
        if (RAW == '\''){
10590
16.9k
      NEXT;
10591
16.9k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10592
13.4k
          standalone = 0;
10593
13.4k
                SKIP(2);
10594
13.4k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10595
3.40k
                 (NXT(2) == 's')) {
10596
2.90k
          standalone = 1;
10597
2.90k
    SKIP(3);
10598
2.90k
            } else {
10599
502
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10600
502
      }
10601
16.9k
      if (RAW != '\'') {
10602
640
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10603
640
      } else
10604
16.2k
          NEXT;
10605
21.3k
  } else if (RAW == '"'){
10606
21.1k
      NEXT;
10607
21.1k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10608
8.20k
          standalone = 0;
10609
8.20k
    SKIP(2);
10610
12.9k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10611
12.9k
                 (NXT(2) == 's')) {
10612
12.2k
          standalone = 1;
10613
12.2k
                SKIP(3);
10614
12.2k
            } else {
10615
680
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10616
680
      }
10617
21.1k
      if (RAW != '"') {
10618
911
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10619
911
      } else
10620
20.2k
          NEXT;
10621
21.1k
  } else {
10622
231
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10623
231
        }
10624
38.2k
    }
10625
160k
    return(standalone);
10626
160k
}
10627
10628
/**
10629
 * xmlParseXMLDecl:
10630
 * @ctxt:  an XML parser context
10631
 *
10632
 * DEPRECATED: Internal function, don't use.
10633
 *
10634
 * parse an XML declaration header
10635
 *
10636
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10637
 */
10638
10639
void
10640
271k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10641
271k
    xmlChar *version;
10642
10643
    /*
10644
     * This value for standalone indicates that the document has an
10645
     * XML declaration but it does not have a standalone attribute.
10646
     * It will be overwritten later if a standalone attribute is found.
10647
     */
10648
271k
    ctxt->input->standalone = -2;
10649
10650
    /*
10651
     * We know that '<?xml' is here.
10652
     */
10653
271k
    SKIP(5);
10654
10655
271k
    if (!IS_BLANK_CH(RAW)) {
10656
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10657
0
                 "Blank needed after '<?xml'\n");
10658
0
    }
10659
271k
    SKIP_BLANKS;
10660
10661
    /*
10662
     * We must have the VersionInfo here.
10663
     */
10664
271k
    version = xmlParseVersionInfo(ctxt);
10665
271k
    if (version == NULL) {
10666
32.0k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10667
239k
    } else {
10668
239k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10669
      /*
10670
       * Changed here for XML-1.0 5th edition
10671
       */
10672
3.88k
      if (ctxt->options & XML_PARSE_OLD10) {
10673
1.28k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10674
1.28k
                "Unsupported version '%s'\n",
10675
1.28k
                version);
10676
2.59k
      } else {
10677
2.59k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10678
2.31k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10679
2.31k
                      "Unsupported version '%s'\n",
10680
2.31k
          version, NULL);
10681
2.31k
    } else {
10682
277
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10683
277
              "Unsupported version '%s'\n",
10684
277
              version);
10685
277
    }
10686
2.59k
      }
10687
3.88k
  }
10688
239k
  if (ctxt->version != NULL)
10689
0
      xmlFree((void *) ctxt->version);
10690
239k
  ctxt->version = version;
10691
239k
    }
10692
10693
    /*
10694
     * We may have the encoding declaration
10695
     */
10696
271k
    if (!IS_BLANK_CH(RAW)) {
10697
131k
        if ((RAW == '?') && (NXT(1) == '>')) {
10698
92.9k
      SKIP(2);
10699
92.9k
      return;
10700
92.9k
  }
10701
38.1k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10702
38.1k
    }
10703
178k
    xmlParseEncodingDecl(ctxt);
10704
178k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10705
178k
         (ctxt->instate == XML_PARSER_EOF)) {
10706
  /*
10707
   * The XML REC instructs us to stop parsing right here
10708
   */
10709
787
        return;
10710
787
    }
10711
10712
    /*
10713
     * We may have the standalone status.
10714
     */
10715
177k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10716
17.6k
        if ((RAW == '?') && (NXT(1) == '>')) {
10717
16.7k
      SKIP(2);
10718
16.7k
      return;
10719
16.7k
  }
10720
926
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10721
926
    }
10722
10723
    /*
10724
     * We can grow the input buffer freely at that point
10725
     */
10726
160k
    GROW;
10727
10728
160k
    SKIP_BLANKS;
10729
160k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10730
10731
160k
    SKIP_BLANKS;
10732
160k
    if ((RAW == '?') && (NXT(1) == '>')) {
10733
103k
        SKIP(2);
10734
103k
    } else if (RAW == '>') {
10735
        /* Deprecated old WD ... */
10736
399
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10737
399
  NEXT;
10738
56.7k
    } else {
10739
56.7k
        int c;
10740
10741
56.7k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10742
2.52M
        while ((c = CUR) != 0) {
10743
2.51M
            NEXT;
10744
2.51M
            if (c == '>')
10745
49.5k
                break;
10746
2.51M
        }
10747
56.7k
    }
10748
160k
}
10749
10750
/**
10751
 * xmlParseMisc:
10752
 * @ctxt:  an XML parser context
10753
 *
10754
 * DEPRECATED: Internal function, don't use.
10755
 *
10756
 * parse an XML Misc* optional field.
10757
 *
10758
 * [27] Misc ::= Comment | PI |  S
10759
 */
10760
10761
void
10762
524k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10763
561k
    while (ctxt->instate != XML_PARSER_EOF) {
10764
561k
        SKIP_BLANKS;
10765
561k
        GROW;
10766
561k
        if ((RAW == '<') && (NXT(1) == '?')) {
10767
19.8k
      xmlParsePI(ctxt);
10768
541k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10769
17.3k
      xmlParseComment(ctxt);
10770
524k
        } else {
10771
524k
            break;
10772
524k
        }
10773
561k
    }
10774
524k
}
10775
10776
/**
10777
 * xmlParseDocument:
10778
 * @ctxt:  an XML parser context
10779
 *
10780
 * parse an XML document (and build a tree if using the standard SAX
10781
 * interface).
10782
 *
10783
 * [1] document ::= prolog element Misc*
10784
 *
10785
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10786
 *
10787
 * Returns 0, -1 in case of error. the parser context is augmented
10788
 *                as a result of the parsing.
10789
 */
10790
10791
int
10792
239k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10793
239k
    xmlChar start[4];
10794
239k
    xmlCharEncoding enc;
10795
10796
239k
    xmlInitParser();
10797
10798
239k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10799
0
        return(-1);
10800
10801
239k
    GROW;
10802
10803
    /*
10804
     * SAX: detecting the level.
10805
     */
10806
239k
    xmlDetectSAX2(ctxt);
10807
10808
    /*
10809
     * SAX: beginning of the document processing.
10810
     */
10811
239k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10812
239k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10813
239k
    if (ctxt->instate == XML_PARSER_EOF)
10814
0
  return(-1);
10815
10816
239k
    if ((ctxt->encoding == NULL) &&
10817
239k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10818
  /*
10819
   * Get the 4 first bytes and decode the charset
10820
   * if enc != XML_CHAR_ENCODING_NONE
10821
   * plug some encoding conversion routines.
10822
   */
10823
234k
  start[0] = RAW;
10824
234k
  start[1] = NXT(1);
10825
234k
  start[2] = NXT(2);
10826
234k
  start[3] = NXT(3);
10827
234k
  enc = xmlDetectCharEncoding(&start[0], 4);
10828
234k
  if (enc != XML_CHAR_ENCODING_NONE) {
10829
102k
      xmlSwitchEncoding(ctxt, enc);
10830
102k
  }
10831
234k
    }
10832
10833
10834
239k
    if (CUR == 0) {
10835
1.84k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10836
1.84k
  return(-1);
10837
1.84k
    }
10838
10839
    /*
10840
     * Check for the XMLDecl in the Prolog.
10841
     * do not GROW here to avoid the detected encoder to decode more
10842
     * than just the first line, unless the amount of data is really
10843
     * too small to hold "<?xml version="1.0" encoding="foo"
10844
     */
10845
237k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10846
15.6k
       GROW;
10847
15.6k
    }
10848
237k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10849
10850
  /*
10851
   * Note that we will switch encoding on the fly.
10852
   */
10853
94.7k
  xmlParseXMLDecl(ctxt);
10854
94.7k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10855
94.7k
      (ctxt->instate == XML_PARSER_EOF)) {
10856
      /*
10857
       * The XML REC instructs us to stop parsing right here
10858
       */
10859
315
      return(-1);
10860
315
  }
10861
94.4k
  ctxt->standalone = ctxt->input->standalone;
10862
94.4k
  SKIP_BLANKS;
10863
142k
    } else {
10864
142k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10865
142k
    }
10866
237k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10867
225k
        ctxt->sax->startDocument(ctxt->userData);
10868
237k
    if (ctxt->instate == XML_PARSER_EOF)
10869
0
  return(-1);
10870
237k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10871
237k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10872
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10873
0
    }
10874
10875
    /*
10876
     * The Misc part of the Prolog
10877
     */
10878
237k
    xmlParseMisc(ctxt);
10879
10880
    /*
10881
     * Then possibly doc type declaration(s) and more Misc
10882
     * (doctypedecl Misc*)?
10883
     */
10884
237k
    GROW;
10885
237k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10886
10887
102k
  ctxt->inSubset = 1;
10888
102k
  xmlParseDocTypeDecl(ctxt);
10889
102k
  if (RAW == '[') {
10890
74.5k
      ctxt->instate = XML_PARSER_DTD;
10891
74.5k
      xmlParseInternalSubset(ctxt);
10892
74.5k
      if (ctxt->instate == XML_PARSER_EOF)
10893
4.72k
    return(-1);
10894
74.5k
  }
10895
10896
  /*
10897
   * Create and update the external subset.
10898
   */
10899
98.1k
  ctxt->inSubset = 2;
10900
98.1k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10901
98.1k
      (!ctxt->disableSAX))
10902
74.3k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10903
74.3k
                                ctxt->extSubSystem, ctxt->extSubURI);
10904
98.1k
  if (ctxt->instate == XML_PARSER_EOF)
10905
2.29k
      return(-1);
10906
95.8k
  ctxt->inSubset = 0;
10907
10908
95.8k
        xmlCleanSpecialAttr(ctxt);
10909
10910
95.8k
  ctxt->instate = XML_PARSER_PROLOG;
10911
95.8k
  xmlParseMisc(ctxt);
10912
95.8k
    }
10913
10914
    /*
10915
     * Time to start parsing the tree itself
10916
     */
10917
230k
    GROW;
10918
230k
    if (RAW != '<') {
10919
38.8k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10920
38.8k
           "Start tag expected, '<' not found\n");
10921
191k
    } else {
10922
191k
  ctxt->instate = XML_PARSER_CONTENT;
10923
191k
  xmlParseElement(ctxt);
10924
191k
  ctxt->instate = XML_PARSER_EPILOG;
10925
10926
10927
  /*
10928
   * The Misc part at the end
10929
   */
10930
191k
  xmlParseMisc(ctxt);
10931
10932
191k
  if (RAW != 0) {
10933
51.2k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10934
51.2k
  }
10935
191k
  ctxt->instate = XML_PARSER_EOF;
10936
191k
    }
10937
10938
    /*
10939
     * SAX: end of the document processing.
10940
     */
10941
230k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10942
230k
        ctxt->sax->endDocument(ctxt->userData);
10943
10944
    /*
10945
     * Remove locally kept entity definitions if the tree was not built
10946
     */
10947
230k
    if ((ctxt->myDoc != NULL) &&
10948
230k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10949
1.14k
  xmlFreeDoc(ctxt->myDoc);
10950
1.14k
  ctxt->myDoc = NULL;
10951
1.14k
    }
10952
10953
230k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10954
28.0k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10955
28.0k
  if (ctxt->valid)
10956
19.3k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10957
28.0k
  if (ctxt->nsWellFormed)
10958
27.0k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10959
28.0k
  if (ctxt->options & XML_PARSE_OLD10)
10960
7.18k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10961
28.0k
    }
10962
230k
    if (! ctxt->wellFormed) {
10963
202k
  ctxt->valid = 0;
10964
202k
  return(-1);
10965
202k
    }
10966
28.0k
    return(0);
10967
230k
}
10968
10969
/**
10970
 * xmlParseExtParsedEnt:
10971
 * @ctxt:  an XML parser context
10972
 *
10973
 * parse a general parsed entity
10974
 * An external general parsed entity is well-formed if it matches the
10975
 * production labeled extParsedEnt.
10976
 *
10977
 * [78] extParsedEnt ::= TextDecl? content
10978
 *
10979
 * Returns 0, -1 in case of error. the parser context is augmented
10980
 *                as a result of the parsing.
10981
 */
10982
10983
int
10984
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10985
0
    xmlChar start[4];
10986
0
    xmlCharEncoding enc;
10987
10988
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10989
0
        return(-1);
10990
10991
0
    xmlDetectSAX2(ctxt);
10992
10993
0
    GROW;
10994
10995
    /*
10996
     * SAX: beginning of the document processing.
10997
     */
10998
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10999
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11000
11001
    /*
11002
     * Get the 4 first bytes and decode the charset
11003
     * if enc != XML_CHAR_ENCODING_NONE
11004
     * plug some encoding conversion routines.
11005
     */
11006
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11007
0
  start[0] = RAW;
11008
0
  start[1] = NXT(1);
11009
0
  start[2] = NXT(2);
11010
0
  start[3] = NXT(3);
11011
0
  enc = xmlDetectCharEncoding(start, 4);
11012
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11013
0
      xmlSwitchEncoding(ctxt, enc);
11014
0
  }
11015
0
    }
11016
11017
11018
0
    if (CUR == 0) {
11019
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11020
0
    }
11021
11022
    /*
11023
     * Check for the XMLDecl in the Prolog.
11024
     */
11025
0
    GROW;
11026
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11027
11028
  /*
11029
   * Note that we will switch encoding on the fly.
11030
   */
11031
0
  xmlParseXMLDecl(ctxt);
11032
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11033
      /*
11034
       * The XML REC instructs us to stop parsing right here
11035
       */
11036
0
      return(-1);
11037
0
  }
11038
0
  SKIP_BLANKS;
11039
0
    } else {
11040
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11041
0
    }
11042
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11043
0
        ctxt->sax->startDocument(ctxt->userData);
11044
0
    if (ctxt->instate == XML_PARSER_EOF)
11045
0
  return(-1);
11046
11047
    /*
11048
     * Doing validity checking on chunk doesn't make sense
11049
     */
11050
0
    ctxt->instate = XML_PARSER_CONTENT;
11051
0
    ctxt->validate = 0;
11052
0
    ctxt->loadsubset = 0;
11053
0
    ctxt->depth = 0;
11054
11055
0
    xmlParseContent(ctxt);
11056
0
    if (ctxt->instate == XML_PARSER_EOF)
11057
0
  return(-1);
11058
11059
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11060
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11061
0
    } else if (RAW != 0) {
11062
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11063
0
    }
11064
11065
    /*
11066
     * SAX: end of the document processing.
11067
     */
11068
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11069
0
        ctxt->sax->endDocument(ctxt->userData);
11070
11071
0
    if (! ctxt->wellFormed) return(-1);
11072
0
    return(0);
11073
0
}
11074
11075
#ifdef LIBXML_PUSH_ENABLED
11076
/************************************************************************
11077
 *                  *
11078
 *    Progressive parsing interfaces        *
11079
 *                  *
11080
 ************************************************************************/
11081
11082
/**
11083
 * xmlParseLookupSequence:
11084
 * @ctxt:  an XML parser context
11085
 * @first:  the first char to lookup
11086
 * @next:  the next char to lookup or zero
11087
 * @third:  the next char to lookup or zero
11088
 *
11089
 * Try to find if a sequence (first, next, third) or  just (first next) or
11090
 * (first) is available in the input stream.
11091
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11092
 * to avoid rescanning sequences of bytes, it DOES change the state of the
11093
 * parser, do not use liberally.
11094
 *
11095
 * Returns the index to the current parsing point if the full sequence
11096
 *      is available, -1 otherwise.
11097
 */
11098
static int
11099
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11100
1.25M
                       xmlChar next, xmlChar third) {
11101
1.25M
    int base, len;
11102
1.25M
    xmlParserInputPtr in;
11103
1.25M
    const xmlChar *buf;
11104
11105
1.25M
    in = ctxt->input;
11106
1.25M
    if (in == NULL) return(-1);
11107
1.25M
    base = in->cur - in->base;
11108
1.25M
    if (base < 0) return(-1);
11109
1.25M
    if (ctxt->checkIndex > base)
11110
297k
        base = ctxt->checkIndex;
11111
1.25M
    if (in->buf == NULL) {
11112
0
  buf = in->base;
11113
0
  len = in->length;
11114
1.25M
    } else {
11115
1.25M
  buf = xmlBufContent(in->buf->buffer);
11116
1.25M
  len = xmlBufUse(in->buf->buffer);
11117
1.25M
    }
11118
    /* take into account the sequence length */
11119
1.25M
    if (third) len -= 2;
11120
980k
    else if (next) len --;
11121
2.38G
    for (;base < len;base++) {
11122
2.38G
        if (buf[base] == first) {
11123
1.68M
      if (third != 0) {
11124
689k
    if ((buf[base + 1] != next) ||
11125
689k
        (buf[base + 2] != third)) continue;
11126
999k
      } else if (next != 0) {
11127
470k
    if (buf[base + 1] != next) continue;
11128
470k
      }
11129
879k
      ctxt->checkIndex = 0;
11130
#ifdef DEBUG_PUSH
11131
      if (next == 0)
11132
    xmlGenericError(xmlGenericErrorContext,
11133
      "PP: lookup '%c' found at %d\n",
11134
      first, base);
11135
      else if (third == 0)
11136
    xmlGenericError(xmlGenericErrorContext,
11137
      "PP: lookup '%c%c' found at %d\n",
11138
      first, next, base);
11139
      else
11140
    xmlGenericError(xmlGenericErrorContext,
11141
      "PP: lookup '%c%c%c' found at %d\n",
11142
      first, next, third, base);
11143
#endif
11144
879k
      return(base - (in->cur - in->base));
11145
1.68M
  }
11146
2.38G
    }
11147
379k
    ctxt->checkIndex = base;
11148
#ifdef DEBUG_PUSH
11149
    if (next == 0)
11150
  xmlGenericError(xmlGenericErrorContext,
11151
    "PP: lookup '%c' failed\n", first);
11152
    else if (third == 0)
11153
  xmlGenericError(xmlGenericErrorContext,
11154
    "PP: lookup '%c%c' failed\n", first, next);
11155
    else
11156
  xmlGenericError(xmlGenericErrorContext,
11157
    "PP: lookup '%c%c%c' failed\n", first, next, third);
11158
#endif
11159
379k
    return(-1);
11160
1.25M
}
11161
11162
/**
11163
 * xmlParseGetLasts:
11164
 * @ctxt:  an XML parser context
11165
 * @lastlt:  pointer to store the last '<' from the input
11166
 * @lastgt:  pointer to store the last '>' from the input
11167
 *
11168
 * Lookup the last < and > in the current chunk
11169
 */
11170
static void
11171
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11172
3.03M
                 const xmlChar **lastgt) {
11173
3.03M
    const xmlChar *tmp;
11174
11175
3.03M
    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11176
0
  xmlGenericError(xmlGenericErrorContext,
11177
0
        "Internal error: xmlParseGetLasts\n");
11178
0
  return;
11179
0
    }
11180
3.03M
    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11181
2.01M
        tmp = ctxt->input->end;
11182
2.01M
  tmp--;
11183
1.45G
  while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11184
2.01M
  if (tmp < ctxt->input->base) {
11185
270k
      *lastlt = NULL;
11186
270k
      *lastgt = NULL;
11187
1.74M
  } else {
11188
1.74M
      *lastlt = tmp;
11189
1.74M
      tmp++;
11190
185M
      while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11191
183M
          if (*tmp == '\'') {
11192
58.8k
        tmp++;
11193
37.7M
        while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11194
58.8k
        if (tmp < ctxt->input->end) tmp++;
11195
183M
    } else if (*tmp == '"') {
11196
837k
        tmp++;
11197
111M
        while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11198
837k
        if (tmp < ctxt->input->end) tmp++;
11199
837k
    } else
11200
182M
        tmp++;
11201
183M
      }
11202
1.74M
      if (tmp < ctxt->input->end)
11203
838k
          *lastgt = tmp;
11204
910k
      else {
11205
910k
          tmp = *lastlt;
11206
910k
    tmp--;
11207
109M
    while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11208
910k
    if (tmp >= ctxt->input->base)
11209
863k
        *lastgt = tmp;
11210
47.6k
    else
11211
47.6k
        *lastgt = NULL;
11212
910k
      }
11213
1.74M
  }
11214
2.01M
    } else {
11215
1.01M
        *lastlt = NULL;
11216
1.01M
  *lastgt = NULL;
11217
1.01M
    }
11218
3.03M
}
11219
/**
11220
 * xmlCheckCdataPush:
11221
 * @cur: pointer to the block of characters
11222
 * @len: length of the block in bytes
11223
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11224
 *
11225
 * Check that the block of characters is okay as SCdata content [20]
11226
 *
11227
 * Returns the number of bytes to pass if okay, a negative index where an
11228
 *         UTF-8 error occurred otherwise
11229
 */
11230
static int
11231
84.0k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11232
84.0k
    int ix;
11233
84.0k
    unsigned char c;
11234
84.0k
    int codepoint;
11235
11236
84.0k
    if ((utf == NULL) || (len <= 0))
11237
309
        return(0);
11238
11239
7.54M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11240
7.52M
        c = utf[ix];
11241
7.52M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11242
5.20M
      if (c >= 0x20)
11243
5.02M
    ix++;
11244
171k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11245
155k
          ix++;
11246
16.5k
      else
11247
16.5k
          return(-ix);
11248
5.20M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11249
526k
      if (ix + 2 > len) return(complete ? -ix : ix);
11250
525k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11251
9.53k
          return(-ix);
11252
515k
      codepoint = (utf[ix] & 0x1f) << 6;
11253
515k
      codepoint |= utf[ix+1] & 0x3f;
11254
515k
      if (!xmlIsCharQ(codepoint))
11255
1.01k
          return(-ix);
11256
514k
      ix += 2;
11257
1.79M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11258
640k
      if (ix + 3 > len) return(complete ? -ix : ix);
11259
639k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11260
639k
          ((utf[ix+2] & 0xc0) != 0x80))
11261
5.73k
        return(-ix);
11262
633k
      codepoint = (utf[ix] & 0xf) << 12;
11263
633k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11264
633k
      codepoint |= utf[ix+2] & 0x3f;
11265
633k
      if (!xmlIsCharQ(codepoint))
11266
549
          return(-ix);
11267
633k
      ix += 3;
11268
1.15M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11269
1.14M
      if (ix + 4 > len) return(complete ? -ix : ix);
11270
1.14M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11271
1.14M
          ((utf[ix+2] & 0xc0) != 0x80) ||
11272
1.14M
    ((utf[ix+3] & 0xc0) != 0x80))
11273
9.16k
        return(-ix);
11274
1.13M
      codepoint = (utf[ix] & 0x7) << 18;
11275
1.13M
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11276
1.13M
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11277
1.13M
      codepoint |= utf[ix+3] & 0x3f;
11278
1.13M
      if (!xmlIsCharQ(codepoint))
11279
1.66k
          return(-ix);
11280
1.13M
      ix += 4;
11281
1.13M
  } else       /* unknown encoding */
11282
9.65k
      return(-ix);
11283
7.52M
      }
11284
25.7k
      return(ix);
11285
83.7k
}
11286
11287
/**
11288
 * xmlParseTryOrFinish:
11289
 * @ctxt:  an XML parser context
11290
 * @terminate:  last chunk indicator
11291
 *
11292
 * Try to progress on parsing
11293
 *
11294
 * Returns zero if no parsing was possible
11295
 */
11296
static int
11297
2.70M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11298
2.70M
    int ret = 0;
11299
2.70M
    int avail, tlen;
11300
2.70M
    xmlChar cur, next;
11301
2.70M
    const xmlChar *lastlt, *lastgt;
11302
11303
2.70M
    if (ctxt->input == NULL)
11304
0
        return(0);
11305
11306
#ifdef DEBUG_PUSH
11307
    switch (ctxt->instate) {
11308
  case XML_PARSER_EOF:
11309
      xmlGenericError(xmlGenericErrorContext,
11310
        "PP: try EOF\n"); break;
11311
  case XML_PARSER_START:
11312
      xmlGenericError(xmlGenericErrorContext,
11313
        "PP: try START\n"); break;
11314
  case XML_PARSER_MISC:
11315
      xmlGenericError(xmlGenericErrorContext,
11316
        "PP: try MISC\n");break;
11317
  case XML_PARSER_COMMENT:
11318
      xmlGenericError(xmlGenericErrorContext,
11319
        "PP: try COMMENT\n");break;
11320
  case XML_PARSER_PROLOG:
11321
      xmlGenericError(xmlGenericErrorContext,
11322
        "PP: try PROLOG\n");break;
11323
  case XML_PARSER_START_TAG:
11324
      xmlGenericError(xmlGenericErrorContext,
11325
        "PP: try START_TAG\n");break;
11326
  case XML_PARSER_CONTENT:
11327
      xmlGenericError(xmlGenericErrorContext,
11328
        "PP: try CONTENT\n");break;
11329
  case XML_PARSER_CDATA_SECTION:
11330
      xmlGenericError(xmlGenericErrorContext,
11331
        "PP: try CDATA_SECTION\n");break;
11332
  case XML_PARSER_END_TAG:
11333
      xmlGenericError(xmlGenericErrorContext,
11334
        "PP: try END_TAG\n");break;
11335
  case XML_PARSER_ENTITY_DECL:
11336
      xmlGenericError(xmlGenericErrorContext,
11337
        "PP: try ENTITY_DECL\n");break;
11338
  case XML_PARSER_ENTITY_VALUE:
11339
      xmlGenericError(xmlGenericErrorContext,
11340
        "PP: try ENTITY_VALUE\n");break;
11341
  case XML_PARSER_ATTRIBUTE_VALUE:
11342
      xmlGenericError(xmlGenericErrorContext,
11343
        "PP: try ATTRIBUTE_VALUE\n");break;
11344
  case XML_PARSER_DTD:
11345
      xmlGenericError(xmlGenericErrorContext,
11346
        "PP: try DTD\n");break;
11347
  case XML_PARSER_EPILOG:
11348
      xmlGenericError(xmlGenericErrorContext,
11349
        "PP: try EPILOG\n");break;
11350
  case XML_PARSER_PI:
11351
      xmlGenericError(xmlGenericErrorContext,
11352
        "PP: try PI\n");break;
11353
        case XML_PARSER_IGNORE:
11354
            xmlGenericError(xmlGenericErrorContext,
11355
        "PP: try IGNORE\n");break;
11356
    }
11357
#endif
11358
11359
2.70M
    if ((ctxt->input != NULL) &&
11360
2.70M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11361
37.8k
  xmlSHRINK(ctxt);
11362
37.8k
  ctxt->checkIndex = 0;
11363
37.8k
    }
11364
2.70M
    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11365
11366
20.1M
    while (ctxt->instate != XML_PARSER_EOF) {
11367
20.1M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11368
126k
      return(0);
11369
11370
19.9M
  if (ctxt->input == NULL) break;
11371
19.9M
  if (ctxt->input->buf == NULL)
11372
0
      avail = ctxt->input->length -
11373
0
              (ctxt->input->cur - ctxt->input->base);
11374
19.9M
  else {
11375
      /*
11376
       * If we are operating on converted input, try to flush
11377
       * remaining chars to avoid them stalling in the non-converted
11378
       * buffer. But do not do this in document start where
11379
       * encoding="..." may not have been read and we work on a
11380
       * guessed encoding.
11381
       */
11382
19.9M
      if ((ctxt->instate != XML_PARSER_START) &&
11383
19.9M
          (ctxt->input->buf->raw != NULL) &&
11384
19.9M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11385
103k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11386
103k
                                                 ctxt->input);
11387
103k
    size_t current = ctxt->input->cur - ctxt->input->base;
11388
11389
103k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11390
103k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11391
103k
                                      base, current);
11392
103k
      }
11393
19.9M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11394
19.9M
        (ctxt->input->cur - ctxt->input->base);
11395
19.9M
  }
11396
19.9M
        if (avail < 1)
11397
311k
      goto done;
11398
19.6M
        switch (ctxt->instate) {
11399
0
            case XML_PARSER_EOF:
11400
          /*
11401
     * Document parsing is done !
11402
     */
11403
0
          goto done;
11404
755k
            case XML_PARSER_START:
11405
755k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11406
235k
        xmlChar start[4];
11407
235k
        xmlCharEncoding enc;
11408
11409
        /*
11410
         * Very first chars read from the document flow.
11411
         */
11412
235k
        if (avail < 4)
11413
21.3k
      goto done;
11414
11415
        /*
11416
         * Get the 4 first bytes and decode the charset
11417
         * if enc != XML_CHAR_ENCODING_NONE
11418
         * plug some encoding conversion routines,
11419
         * else xmlSwitchEncoding will set to (default)
11420
         * UTF8.
11421
         */
11422
214k
        start[0] = RAW;
11423
214k
        start[1] = NXT(1);
11424
214k
        start[2] = NXT(2);
11425
214k
        start[3] = NXT(3);
11426
214k
        enc = xmlDetectCharEncoding(start, 4);
11427
214k
        xmlSwitchEncoding(ctxt, enc);
11428
214k
        break;
11429
235k
    }
11430
11431
519k
    if (avail < 2)
11432
810
        goto done;
11433
518k
    cur = ctxt->input->cur[0];
11434
518k
    next = ctxt->input->cur[1];
11435
518k
    if (cur == 0) {
11436
2.59k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11437
2.59k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11438
2.59k
                  &xmlDefaultSAXLocator);
11439
2.59k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11440
2.59k
        xmlHaltParser(ctxt);
11441
#ifdef DEBUG_PUSH
11442
        xmlGenericError(xmlGenericErrorContext,
11443
          "PP: entering EOF\n");
11444
#endif
11445
2.59k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11446
2.59k
      ctxt->sax->endDocument(ctxt->userData);
11447
2.59k
        goto done;
11448
2.59k
    }
11449
516k
          if ((cur == '<') && (next == '?')) {
11450
        /* PI or XML decl */
11451
283k
        if (avail < 5) return(ret);
11452
283k
        if ((!terminate) &&
11453
283k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11454
92.1k
      return(ret);
11455
191k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11456
191k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11457
191k
                  &xmlDefaultSAXLocator);
11458
191k
        if ((ctxt->input->cur[2] == 'x') &&
11459
191k
      (ctxt->input->cur[3] == 'm') &&
11460
191k
      (ctxt->input->cur[4] == 'l') &&
11461
191k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11462
176k
      ret += 5;
11463
#ifdef DEBUG_PUSH
11464
      xmlGenericError(xmlGenericErrorContext,
11465
        "PP: Parsing XML Decl\n");
11466
#endif
11467
176k
      xmlParseXMLDecl(ctxt);
11468
176k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11469
          /*
11470
           * The XML REC instructs us to stop parsing right
11471
           * here
11472
           */
11473
472
          xmlHaltParser(ctxt);
11474
472
          return(0);
11475
472
      }
11476
176k
      ctxt->standalone = ctxt->input->standalone;
11477
176k
      if ((ctxt->encoding == NULL) &&
11478
176k
          (ctxt->input->encoding != NULL))
11479
18.4k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11480
176k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11481
176k
          (!ctxt->disableSAX))
11482
158k
          ctxt->sax->startDocument(ctxt->userData);
11483
176k
      ctxt->instate = XML_PARSER_MISC;
11484
#ifdef DEBUG_PUSH
11485
      xmlGenericError(xmlGenericErrorContext,
11486
        "PP: entering MISC\n");
11487
#endif
11488
176k
        } else {
11489
14.7k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11490
14.7k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11491
14.7k
          (!ctxt->disableSAX))
11492
14.7k
          ctxt->sax->startDocument(ctxt->userData);
11493
14.7k
      ctxt->instate = XML_PARSER_MISC;
11494
#ifdef DEBUG_PUSH
11495
      xmlGenericError(xmlGenericErrorContext,
11496
        "PP: entering MISC\n");
11497
#endif
11498
14.7k
        }
11499
232k
    } else {
11500
232k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11501
232k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11502
232k
                  &xmlDefaultSAXLocator);
11503
232k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11504
232k
        if (ctxt->version == NULL) {
11505
0
            xmlErrMemory(ctxt, NULL);
11506
0
      break;
11507
0
        }
11508
232k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11509
232k
            (!ctxt->disableSAX))
11510
232k
      ctxt->sax->startDocument(ctxt->userData);
11511
232k
        ctxt->instate = XML_PARSER_MISC;
11512
#ifdef DEBUG_PUSH
11513
        xmlGenericError(xmlGenericErrorContext,
11514
          "PP: entering MISC\n");
11515
#endif
11516
232k
    }
11517
423k
    break;
11518
3.97M
            case XML_PARSER_START_TAG: {
11519
3.97M
          const xmlChar *name;
11520
3.97M
    const xmlChar *prefix = NULL;
11521
3.97M
    const xmlChar *URI = NULL;
11522
3.97M
                int line = ctxt->input->line;
11523
3.97M
    int nsNr = ctxt->nsNr;
11524
11525
3.97M
    if ((avail < 2) && (ctxt->inputNr == 1))
11526
0
        goto done;
11527
3.97M
    cur = ctxt->input->cur[0];
11528
3.97M
          if (cur != '<') {
11529
17.7k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11530
17.7k
        xmlHaltParser(ctxt);
11531
17.7k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11532
17.7k
      ctxt->sax->endDocument(ctxt->userData);
11533
17.7k
        goto done;
11534
17.7k
    }
11535
3.95M
    if (!terminate) {
11536
3.84M
        if (ctxt->progressive) {
11537
            /* > can be found unescaped in attribute values */
11538
3.84M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11539
528k
          goto done;
11540
3.84M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11541
0
      goto done;
11542
0
        }
11543
3.84M
    }
11544
3.42M
    if (ctxt->spaceNr == 0)
11545
12.0k
        spacePush(ctxt, -1);
11546
3.41M
    else if (*ctxt->space == -2)
11547
135k
        spacePush(ctxt, -1);
11548
3.27M
    else
11549
3.27M
        spacePush(ctxt, *ctxt->space);
11550
3.42M
#ifdef LIBXML_SAX1_ENABLED
11551
3.42M
    if (ctxt->sax2)
11552
2.43M
#endif /* LIBXML_SAX1_ENABLED */
11553
2.43M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11554
995k
#ifdef LIBXML_SAX1_ENABLED
11555
995k
    else
11556
995k
        name = xmlParseStartTag(ctxt);
11557
3.42M
#endif /* LIBXML_SAX1_ENABLED */
11558
3.42M
    if (ctxt->instate == XML_PARSER_EOF)
11559
0
        goto done;
11560
3.42M
    if (name == NULL) {
11561
32.0k
        spacePop(ctxt);
11562
32.0k
        xmlHaltParser(ctxt);
11563
32.0k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11564
32.0k
      ctxt->sax->endDocument(ctxt->userData);
11565
32.0k
        goto done;
11566
32.0k
    }
11567
3.39M
#ifdef LIBXML_VALID_ENABLED
11568
    /*
11569
     * [ VC: Root Element Type ]
11570
     * The Name in the document type declaration must match
11571
     * the element type of the root element.
11572
     */
11573
3.39M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11574
3.39M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11575
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11576
3.39M
#endif /* LIBXML_VALID_ENABLED */
11577
11578
    /*
11579
     * Check for an Empty Element.
11580
     */
11581
3.39M
    if ((RAW == '/') && (NXT(1) == '>')) {
11582
1.40M
        SKIP(2);
11583
11584
1.40M
        if (ctxt->sax2) {
11585
1.12M
      if ((ctxt->sax != NULL) &&
11586
1.12M
          (ctxt->sax->endElementNs != NULL) &&
11587
1.12M
          (!ctxt->disableSAX))
11588
1.12M
          ctxt->sax->endElementNs(ctxt->userData, name,
11589
1.12M
                                  prefix, URI);
11590
1.12M
      if (ctxt->nsNr - nsNr > 0)
11591
4.05k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11592
1.12M
#ifdef LIBXML_SAX1_ENABLED
11593
1.12M
        } else {
11594
271k
      if ((ctxt->sax != NULL) &&
11595
271k
          (ctxt->sax->endElement != NULL) &&
11596
271k
          (!ctxt->disableSAX))
11597
271k
          ctxt->sax->endElement(ctxt->userData, name);
11598
271k
#endif /* LIBXML_SAX1_ENABLED */
11599
271k
        }
11600
1.40M
        if (ctxt->instate == XML_PARSER_EOF)
11601
0
      goto done;
11602
1.40M
        spacePop(ctxt);
11603
1.40M
        if (ctxt->nameNr == 0) {
11604
13.3k
      ctxt->instate = XML_PARSER_EPILOG;
11605
1.38M
        } else {
11606
1.38M
      ctxt->instate = XML_PARSER_CONTENT;
11607
1.38M
        }
11608
1.40M
                    ctxt->progressive = 1;
11609
1.40M
        break;
11610
1.40M
    }
11611
1.99M
    if (RAW == '>') {
11612
1.79M
        NEXT;
11613
1.79M
    } else {
11614
198k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11615
198k
           "Couldn't find end of Start Tag %s\n",
11616
198k
           name);
11617
198k
        nodePop(ctxt);
11618
198k
        spacePop(ctxt);
11619
198k
    }
11620
1.99M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11621
11622
1.99M
    ctxt->instate = XML_PARSER_CONTENT;
11623
1.99M
                ctxt->progressive = 1;
11624
1.99M
                break;
11625
3.39M
      }
11626
12.1M
            case XML_PARSER_CONTENT: {
11627
12.1M
    int id;
11628
12.1M
    unsigned long cons;
11629
12.1M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
55.5k
        goto done;
11631
12.1M
    cur = ctxt->input->cur[0];
11632
12.1M
    next = ctxt->input->cur[1];
11633
11634
12.1M
    id = ctxt->input->id;
11635
12.1M
          cons = CUR_CONSUMED;
11636
12.1M
    if ((cur == '<') && (next == '/')) {
11637
1.29M
        ctxt->instate = XML_PARSER_END_TAG;
11638
1.29M
        break;
11639
10.8M
          } else if ((cur == '<') && (next == '?')) {
11640
26.0k
        if ((!terminate) &&
11641
26.0k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11642
13.5k
                        ctxt->progressive = XML_PARSER_PI;
11643
13.5k
      goto done;
11644
13.5k
                    }
11645
12.5k
        xmlParsePI(ctxt);
11646
12.5k
        ctxt->instate = XML_PARSER_CONTENT;
11647
12.5k
                    ctxt->progressive = 1;
11648
10.7M
    } else if ((cur == '<') && (next != '!')) {
11649
3.13M
        ctxt->instate = XML_PARSER_START_TAG;
11650
3.13M
        break;
11651
7.66M
    } else if ((cur == '<') && (next == '!') &&
11652
7.66M
               (ctxt->input->cur[2] == '-') &&
11653
7.66M
         (ctxt->input->cur[3] == '-')) {
11654
122k
        int term;
11655
11656
122k
              if (avail < 4)
11657
0
            goto done;
11658
122k
        ctxt->input->cur += 4;
11659
122k
        term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11660
122k
        ctxt->input->cur -= 4;
11661
122k
        if ((!terminate) && (term < 0)) {
11662
43.9k
                        ctxt->progressive = XML_PARSER_COMMENT;
11663
43.9k
      goto done;
11664
43.9k
                    }
11665
78.8k
        xmlParseComment(ctxt);
11666
78.8k
        ctxt->instate = XML_PARSER_CONTENT;
11667
78.8k
                    ctxt->progressive = 1;
11668
7.54M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11669
7.54M
        (ctxt->input->cur[2] == '[') &&
11670
7.54M
        (ctxt->input->cur[3] == 'C') &&
11671
7.54M
        (ctxt->input->cur[4] == 'D') &&
11672
7.54M
        (ctxt->input->cur[5] == 'A') &&
11673
7.54M
        (ctxt->input->cur[6] == 'T') &&
11674
7.54M
        (ctxt->input->cur[7] == 'A') &&
11675
7.54M
        (ctxt->input->cur[8] == '[')) {
11676
17.9k
        SKIP(9);
11677
17.9k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11678
17.9k
        break;
11679
7.52M
    } else if ((cur == '<') && (next == '!') &&
11680
7.52M
               (avail < 9)) {
11681
3.53k
        goto done;
11682
7.52M
    } else if (cur == '&') {
11683
459k
        if ((!terminate) &&
11684
459k
            (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11685
89.4k
      goto done;
11686
370k
        xmlParseReference(ctxt);
11687
7.06M
    } else {
11688
        /* TODO Avoid the extra copy, handle directly !!! */
11689
        /*
11690
         * Goal of the following test is:
11691
         *  - minimize calls to the SAX 'character' callback
11692
         *    when they are mergeable
11693
         *  - handle an problem for isBlank when we only parse
11694
         *    a sequence of blank chars and the next one is
11695
         *    not available to check against '<' presence.
11696
         *  - tries to homogenize the differences in SAX
11697
         *    callbacks between the push and pull versions
11698
         *    of the parser.
11699
         */
11700
7.06M
        if ((ctxt->inputNr == 1) &&
11701
7.06M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11702
4.41M
      if (!terminate) {
11703
4.34M
          if (ctxt->progressive) {
11704
4.34M
        if ((lastlt == NULL) ||
11705
4.34M
            (ctxt->input->cur > lastlt))
11706
515k
            goto done;
11707
4.34M
          } else if (xmlParseLookupSequence(ctxt,
11708
0
                                            '<', 0, 0) < 0) {
11709
0
        goto done;
11710
0
          }
11711
4.34M
      }
11712
4.41M
                    }
11713
6.54M
        ctxt->checkIndex = 0;
11714
6.54M
        xmlParseCharData(ctxt, 0);
11715
6.54M
    }
11716
7.00M
    if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11717
48.6k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11718
48.6k
                    "detected an error in element content\n");
11719
48.6k
        xmlHaltParser(ctxt);
11720
48.6k
        break;
11721
48.6k
    }
11722
6.95M
    break;
11723
7.00M
      }
11724
6.95M
            case XML_PARSER_END_TAG:
11725
1.35M
    if (avail < 2)
11726
0
        goto done;
11727
1.35M
    if (!terminate) {
11728
1.32M
        if (ctxt->progressive) {
11729
            /* > can be found unescaped in attribute values */
11730
1.32M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11731
66.6k
          goto done;
11732
1.32M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11733
0
      goto done;
11734
0
        }
11735
1.32M
    }
11736
1.29M
    if (ctxt->sax2) {
11737
866k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11738
866k
        nameNsPop(ctxt);
11739
866k
    }
11740
426k
#ifdef LIBXML_SAX1_ENABLED
11741
426k
      else
11742
426k
        xmlParseEndTag1(ctxt, 0);
11743
1.29M
#endif /* LIBXML_SAX1_ENABLED */
11744
1.29M
    if (ctxt->instate == XML_PARSER_EOF) {
11745
        /* Nothing */
11746
1.29M
    } else if (ctxt->nameNr == 0) {
11747
67.6k
        ctxt->instate = XML_PARSER_EPILOG;
11748
1.22M
    } else {
11749
1.22M
        ctxt->instate = XML_PARSER_CONTENT;
11750
1.22M
    }
11751
1.29M
    break;
11752
106k
            case XML_PARSER_CDATA_SECTION: {
11753
          /*
11754
     * The Push mode need to have the SAX callback for
11755
     * cdataBlock merge back contiguous callbacks.
11756
     */
11757
106k
    int base;
11758
11759
106k
    base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11760
106k
    if (base < 0) {
11761
74.2k
        if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11762
51.9k
            int tmp;
11763
11764
51.9k
      tmp = xmlCheckCdataPush(ctxt->input->cur,
11765
51.9k
                              XML_PARSER_BIG_BUFFER_SIZE, 0);
11766
51.9k
      if (tmp < 0) {
11767
2.22k
          tmp = -tmp;
11768
2.22k
          ctxt->input->cur += tmp;
11769
2.22k
          goto encoding_error;
11770
2.22k
      }
11771
49.7k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11772
49.7k
          if (ctxt->sax->cdataBlock != NULL)
11773
24.8k
        ctxt->sax->cdataBlock(ctxt->userData,
11774
24.8k
                              ctxt->input->cur, tmp);
11775
24.9k
          else if (ctxt->sax->characters != NULL)
11776
24.9k
        ctxt->sax->characters(ctxt->userData,
11777
24.9k
                              ctxt->input->cur, tmp);
11778
49.7k
      }
11779
49.7k
      if (ctxt->instate == XML_PARSER_EOF)
11780
0
          goto done;
11781
49.7k
      SKIPL(tmp);
11782
49.7k
      ctxt->checkIndex = 0;
11783
49.7k
        }
11784
72.0k
        goto done;
11785
74.2k
    } else {
11786
32.1k
        int tmp;
11787
11788
32.1k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11789
32.1k
        if ((tmp < 0) || (tmp != base)) {
11790
22.6k
      tmp = -tmp;
11791
22.6k
      ctxt->input->cur += tmp;
11792
22.6k
      goto encoding_error;
11793
22.6k
        }
11794
9.41k
        if ((ctxt->sax != NULL) && (base == 0) &&
11795
9.41k
            (ctxt->sax->cdataBlock != NULL) &&
11796
9.41k
            (!ctxt->disableSAX)) {
11797
      /*
11798
       * Special case to provide identical behaviour
11799
       * between pull and push parsers on enpty CDATA
11800
       * sections
11801
       */
11802
195
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11803
195
           (!strncmp((const char *)&ctxt->input->cur[-9],
11804
195
                     "<![CDATA[", 9)))
11805
194
           ctxt->sax->cdataBlock(ctxt->userData,
11806
194
                                 BAD_CAST "", 0);
11807
9.22k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11808
9.22k
      (!ctxt->disableSAX)) {
11809
9.10k
      if (ctxt->sax->cdataBlock != NULL)
11810
7.27k
          ctxt->sax->cdataBlock(ctxt->userData,
11811
7.27k
              ctxt->input->cur, base);
11812
1.83k
      else if (ctxt->sax->characters != NULL)
11813
1.83k
          ctxt->sax->characters(ctxt->userData,
11814
1.83k
              ctxt->input->cur, base);
11815
9.10k
        }
11816
9.41k
        if (ctxt->instate == XML_PARSER_EOF)
11817
0
      goto done;
11818
9.41k
        SKIPL(base + 3);
11819
9.41k
        ctxt->checkIndex = 0;
11820
9.41k
        ctxt->instate = XML_PARSER_CONTENT;
11821
#ifdef DEBUG_PUSH
11822
        xmlGenericError(xmlGenericErrorContext,
11823
          "PP: entering CONTENT\n");
11824
#endif
11825
9.41k
    }
11826
9.41k
    break;
11827
106k
      }
11828
482k
            case XML_PARSER_MISC:
11829
482k
    SKIP_BLANKS;
11830
482k
    if (ctxt->input->buf == NULL)
11831
0
        avail = ctxt->input->length -
11832
0
                (ctxt->input->cur - ctxt->input->base);
11833
482k
    else
11834
482k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11835
482k
                (ctxt->input->cur - ctxt->input->base);
11836
482k
    if (avail < 2)
11837
3.75k
        goto done;
11838
478k
    cur = ctxt->input->cur[0];
11839
478k
    next = ctxt->input->cur[1];
11840
478k
          if ((cur == '<') && (next == '?')) {
11841
21.2k
        if ((!terminate) &&
11842
21.2k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11843
2.51k
                        ctxt->progressive = XML_PARSER_PI;
11844
2.51k
      goto done;
11845
2.51k
                    }
11846
#ifdef DEBUG_PUSH
11847
        xmlGenericError(xmlGenericErrorContext,
11848
          "PP: Parsing PI\n");
11849
#endif
11850
18.7k
        xmlParsePI(ctxt);
11851
18.7k
        if (ctxt->instate == XML_PARSER_EOF)
11852
0
      goto done;
11853
18.7k
        ctxt->instate = XML_PARSER_MISC;
11854
18.7k
                    ctxt->progressive = 1;
11855
18.7k
        ctxt->checkIndex = 0;
11856
457k
    } else if ((cur == '<') && (next == '!') &&
11857
457k
        (ctxt->input->cur[2] == '-') &&
11858
457k
        (ctxt->input->cur[3] == '-')) {
11859
30.9k
        if ((!terminate) &&
11860
30.9k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11861
20.8k
                        ctxt->progressive = XML_PARSER_COMMENT;
11862
20.8k
      goto done;
11863
20.8k
                    }
11864
#ifdef DEBUG_PUSH
11865
        xmlGenericError(xmlGenericErrorContext,
11866
          "PP: Parsing Comment\n");
11867
#endif
11868
10.1k
        xmlParseComment(ctxt);
11869
10.1k
        if (ctxt->instate == XML_PARSER_EOF)
11870
0
      goto done;
11871
10.1k
        ctxt->instate = XML_PARSER_MISC;
11872
10.1k
                    ctxt->progressive = 1;
11873
10.1k
        ctxt->checkIndex = 0;
11874
426k
    } else if ((cur == '<') && (next == '!') &&
11875
426k
        (ctxt->input->cur[2] == 'D') &&
11876
426k
        (ctxt->input->cur[3] == 'O') &&
11877
426k
        (ctxt->input->cur[4] == 'C') &&
11878
426k
        (ctxt->input->cur[5] == 'T') &&
11879
426k
        (ctxt->input->cur[6] == 'Y') &&
11880
426k
        (ctxt->input->cur[7] == 'P') &&
11881
426k
        (ctxt->input->cur[8] == 'E')) {
11882
213k
        if ((!terminate) &&
11883
213k
            (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11884
29.2k
                        ctxt->progressive = XML_PARSER_DTD;
11885
29.2k
      goto done;
11886
29.2k
                    }
11887
#ifdef DEBUG_PUSH
11888
        xmlGenericError(xmlGenericErrorContext,
11889
          "PP: Parsing internal subset\n");
11890
#endif
11891
184k
        ctxt->inSubset = 1;
11892
184k
                    ctxt->progressive = 0;
11893
184k
        ctxt->checkIndex = 0;
11894
184k
        xmlParseDocTypeDecl(ctxt);
11895
184k
        if (ctxt->instate == XML_PARSER_EOF)
11896
0
      goto done;
11897
184k
        if (RAW == '[') {
11898
133k
      ctxt->instate = XML_PARSER_DTD;
11899
#ifdef DEBUG_PUSH
11900
      xmlGenericError(xmlGenericErrorContext,
11901
        "PP: entering DTD\n");
11902
#endif
11903
133k
        } else {
11904
      /*
11905
       * Create and update the external subset.
11906
       */
11907
51.2k
      ctxt->inSubset = 2;
11908
51.2k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11909
51.2k
          (ctxt->sax->externalSubset != NULL))
11910
47.8k
          ctxt->sax->externalSubset(ctxt->userData,
11911
47.8k
            ctxt->intSubName, ctxt->extSubSystem,
11912
47.8k
            ctxt->extSubURI);
11913
51.2k
      ctxt->inSubset = 0;
11914
51.2k
      xmlCleanSpecialAttr(ctxt);
11915
51.2k
      ctxt->instate = XML_PARSER_PROLOG;
11916
#ifdef DEBUG_PUSH
11917
      xmlGenericError(xmlGenericErrorContext,
11918
        "PP: entering PROLOG\n");
11919
#endif
11920
51.2k
        }
11921
212k
    } else if ((cur == '<') && (next == '!') &&
11922
212k
               (avail < 9)) {
11923
1.72k
        goto done;
11924
210k
    } else {
11925
210k
        ctxt->instate = XML_PARSER_START_TAG;
11926
210k
        ctxt->progressive = XML_PARSER_START_TAG;
11927
210k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11928
#ifdef DEBUG_PUSH
11929
        xmlGenericError(xmlGenericErrorContext,
11930
          "PP: entering START_TAG\n");
11931
#endif
11932
210k
    }
11933
424k
    break;
11934
424k
            case XML_PARSER_PROLOG:
11935
153k
    SKIP_BLANKS;
11936
153k
    if (ctxt->input->buf == NULL)
11937
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11938
153k
    else
11939
153k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11940
153k
                            (ctxt->input->cur - ctxt->input->base);
11941
153k
    if (avail < 2)
11942
2.78k
        goto done;
11943
150k
    cur = ctxt->input->cur[0];
11944
150k
    next = ctxt->input->cur[1];
11945
150k
          if ((cur == '<') && (next == '?')) {
11946
17.2k
        if ((!terminate) &&
11947
17.2k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11948
6.43k
                        ctxt->progressive = XML_PARSER_PI;
11949
6.43k
      goto done;
11950
6.43k
                    }
11951
#ifdef DEBUG_PUSH
11952
        xmlGenericError(xmlGenericErrorContext,
11953
          "PP: Parsing PI\n");
11954
#endif
11955
10.7k
        xmlParsePI(ctxt);
11956
10.7k
        if (ctxt->instate == XML_PARSER_EOF)
11957
0
      goto done;
11958
10.7k
        ctxt->instate = XML_PARSER_PROLOG;
11959
10.7k
                    ctxt->progressive = 1;
11960
133k
    } else if ((cur == '<') && (next == '!') &&
11961
133k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11962
18.0k
        if ((!terminate) &&
11963
18.0k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11964
2.35k
                        ctxt->progressive = XML_PARSER_COMMENT;
11965
2.35k
      goto done;
11966
2.35k
                    }
11967
#ifdef DEBUG_PUSH
11968
        xmlGenericError(xmlGenericErrorContext,
11969
          "PP: Parsing Comment\n");
11970
#endif
11971
15.7k
        xmlParseComment(ctxt);
11972
15.7k
        if (ctxt->instate == XML_PARSER_EOF)
11973
0
      goto done;
11974
15.7k
        ctxt->instate = XML_PARSER_PROLOG;
11975
15.7k
                    ctxt->progressive = 1;
11976
115k
    } else if ((cur == '<') && (next == '!') &&
11977
115k
               (avail < 4)) {
11978
241
        goto done;
11979
115k
    } else {
11980
115k
        ctxt->instate = XML_PARSER_START_TAG;
11981
115k
        if (ctxt->progressive == 0)
11982
99.9k
      ctxt->progressive = XML_PARSER_START_TAG;
11983
115k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11984
#ifdef DEBUG_PUSH
11985
        xmlGenericError(xmlGenericErrorContext,
11986
          "PP: entering START_TAG\n");
11987
#endif
11988
115k
    }
11989
141k
    break;
11990
141k
            case XML_PARSER_EPILOG:
11991
78.9k
    SKIP_BLANKS;
11992
78.9k
    if (ctxt->input->buf == NULL)
11993
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11994
78.9k
    else
11995
78.9k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11996
78.9k
                            (ctxt->input->cur - ctxt->input->base);
11997
78.9k
    if (avail < 2)
11998
66.7k
        goto done;
11999
12.2k
    cur = ctxt->input->cur[0];
12000
12.2k
    next = ctxt->input->cur[1];
12001
12.2k
          if ((cur == '<') && (next == '?')) {
12002
2.55k
        if ((!terminate) &&
12003
2.55k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12004
1.56k
                        ctxt->progressive = XML_PARSER_PI;
12005
1.56k
      goto done;
12006
1.56k
                    }
12007
#ifdef DEBUG_PUSH
12008
        xmlGenericError(xmlGenericErrorContext,
12009
          "PP: Parsing PI\n");
12010
#endif
12011
998
        xmlParsePI(ctxt);
12012
998
        if (ctxt->instate == XML_PARSER_EOF)
12013
0
      goto done;
12014
998
        ctxt->instate = XML_PARSER_EPILOG;
12015
998
                    ctxt->progressive = 1;
12016
9.68k
    } else if ((cur == '<') && (next == '!') &&
12017
9.68k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12018
1.99k
        if ((!terminate) &&
12019
1.99k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12020
1.24k
                        ctxt->progressive = XML_PARSER_COMMENT;
12021
1.24k
      goto done;
12022
1.24k
                    }
12023
#ifdef DEBUG_PUSH
12024
        xmlGenericError(xmlGenericErrorContext,
12025
          "PP: Parsing Comment\n");
12026
#endif
12027
756
        xmlParseComment(ctxt);
12028
756
        if (ctxt->instate == XML_PARSER_EOF)
12029
0
      goto done;
12030
756
        ctxt->instate = XML_PARSER_EPILOG;
12031
756
                    ctxt->progressive = 1;
12032
7.68k
    } else if ((cur == '<') && (next == '!') &&
12033
7.68k
               (avail < 4)) {
12034
233
        goto done;
12035
7.45k
    } else {
12036
7.45k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12037
7.45k
        xmlHaltParser(ctxt);
12038
#ifdef DEBUG_PUSH
12039
        xmlGenericError(xmlGenericErrorContext,
12040
          "PP: entering EOF\n");
12041
#endif
12042
7.45k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12043
7.45k
      ctxt->sax->endDocument(ctxt->userData);
12044
7.45k
        goto done;
12045
7.45k
    }
12046
1.75k
    break;
12047
588k
            case XML_PARSER_DTD: {
12048
          /*
12049
     * Sorry but progressive parsing of the internal subset
12050
     * is not expected to be supported. We first check that
12051
     * the full content of the internal subset is available and
12052
     * the parsing is launched only at that point.
12053
     * Internal subset ends up with "']' S? '>'" in an unescaped
12054
     * section and not in a ']]>' sequence which are conditional
12055
     * sections (whoever argued to keep that crap in XML deserve
12056
     * a place in hell !).
12057
     */
12058
588k
    int base, i;
12059
588k
    xmlChar *buf;
12060
588k
          xmlChar quote = 0;
12061
588k
                size_t use;
12062
12063
588k
    base = ctxt->input->cur - ctxt->input->base;
12064
588k
    if (base < 0) return(0);
12065
588k
    if (ctxt->checkIndex > base)
12066
329k
        base = ctxt->checkIndex;
12067
588k
    buf = xmlBufContent(ctxt->input->buf->buffer);
12068
588k
                use = xmlBufUse(ctxt->input->buf->buffer);
12069
807M
    for (;(unsigned int) base < use; base++) {
12070
807M
        if (quote != 0) {
12071
520M
            if (buf[base] == quote)
12072
5.32M
          quote = 0;
12073
520M
      continue;
12074
520M
        }
12075
286M
        if ((quote == 0) && (buf[base] == '<')) {
12076
8.79M
            int found  = 0;
12077
      /* special handling of comments */
12078
8.79M
            if (((unsigned int) base + 4 < use) &&
12079
8.79M
          (buf[base + 1] == '!') &&
12080
8.79M
          (buf[base + 2] == '-') &&
12081
8.79M
          (buf[base + 3] == '-')) {
12082
101M
          for (;(unsigned int) base + 3 < use; base++) {
12083
101M
        if ((buf[base] == '-') &&
12084
101M
            (buf[base + 1] == '-') &&
12085
101M
            (buf[base + 2] == '>')) {
12086
1.64M
            found = 1;
12087
1.64M
            base += 2;
12088
1.64M
            break;
12089
1.64M
        }
12090
101M
                }
12091
1.70M
          if (!found) {
12092
#if 0
12093
              fprintf(stderr, "unfinished comment\n");
12094
#endif
12095
59.5k
              break; /* for */
12096
59.5k
                }
12097
1.64M
                continue;
12098
1.70M
      }
12099
8.79M
        }
12100
285M
        if (buf[base] == '"') {
12101
4.62M
            quote = '"';
12102
4.62M
      continue;
12103
4.62M
        }
12104
280M
        if (buf[base] == '\'') {
12105
843k
            quote = '\'';
12106
843k
      continue;
12107
843k
        }
12108
279M
        if (buf[base] == ']') {
12109
#if 0
12110
            fprintf(stderr, "%c%c%c%c: ", buf[base],
12111
              buf[base + 1], buf[base + 2], buf[base + 3]);
12112
#endif
12113
151k
            if ((unsigned int) base +1 >= use)
12114
694
          break;
12115
150k
      if (buf[base + 1] == ']') {
12116
          /* conditional crap, skip both ']' ! */
12117
16.5k
          base++;
12118
16.5k
          continue;
12119
16.5k
      }
12120
196k
            for (i = 1; (unsigned int) base + i < use; i++) {
12121
195k
          if (buf[base + i] == '>') {
12122
#if 0
12123
              fprintf(stderr, "found\n");
12124
#endif
12125
101k
              goto found_end_int_subset;
12126
101k
          }
12127
94.6k
          if (!IS_BLANK_CH(buf[base + i])) {
12128
#if 0
12129
              fprintf(stderr, "not found\n");
12130
#endif
12131
32.5k
              goto not_end_of_int_subset;
12132
32.5k
          }
12133
94.6k
      }
12134
#if 0
12135
      fprintf(stderr, "end of stream\n");
12136
#endif
12137
556
            break;
12138
12139
134k
        }
12140
279M
not_end_of_int_subset:
12141
279M
                    continue; /* for */
12142
279M
    }
12143
    /*
12144
     * We didn't found the end of the Internal subset
12145
     */
12146
487k
                if (quote == 0)
12147
346k
                    ctxt->checkIndex = base;
12148
141k
                else
12149
141k
                    ctxt->checkIndex = 0;
12150
#ifdef DEBUG_PUSH
12151
    if (next == 0)
12152
        xmlGenericError(xmlGenericErrorContext,
12153
          "PP: lookup of int subset end filed\n");
12154
#endif
12155
487k
          goto done;
12156
12157
101k
found_end_int_subset:
12158
101k
                ctxt->checkIndex = 0;
12159
101k
    xmlParseInternalSubset(ctxt);
12160
101k
    if (ctxt->instate == XML_PARSER_EOF)
12161
2.77k
        goto done;
12162
98.3k
    ctxt->inSubset = 2;
12163
98.3k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12164
98.3k
        (ctxt->sax->externalSubset != NULL))
12165
81.6k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12166
81.6k
          ctxt->extSubSystem, ctxt->extSubURI);
12167
98.3k
    ctxt->inSubset = 0;
12168
98.3k
    xmlCleanSpecialAttr(ctxt);
12169
98.3k
    if (ctxt->instate == XML_PARSER_EOF)
12170
1.36k
        goto done;
12171
96.9k
    ctxt->instate = XML_PARSER_PROLOG;
12172
96.9k
    ctxt->checkIndex = 0;
12173
#ifdef DEBUG_PUSH
12174
    xmlGenericError(xmlGenericErrorContext,
12175
      "PP: entering PROLOG\n");
12176
#endif
12177
96.9k
                break;
12178
98.3k
      }
12179
0
            case XML_PARSER_COMMENT:
12180
0
    xmlGenericError(xmlGenericErrorContext,
12181
0
      "PP: internal error, state == COMMENT\n");
12182
0
    ctxt->instate = XML_PARSER_CONTENT;
12183
#ifdef DEBUG_PUSH
12184
    xmlGenericError(xmlGenericErrorContext,
12185
      "PP: entering CONTENT\n");
12186
#endif
12187
0
    break;
12188
0
            case XML_PARSER_IGNORE:
12189
0
    xmlGenericError(xmlGenericErrorContext,
12190
0
      "PP: internal error, state == IGNORE");
12191
0
          ctxt->instate = XML_PARSER_DTD;
12192
#ifdef DEBUG_PUSH
12193
    xmlGenericError(xmlGenericErrorContext,
12194
      "PP: entering DTD\n");
12195
#endif
12196
0
          break;
12197
0
            case XML_PARSER_PI:
12198
0
    xmlGenericError(xmlGenericErrorContext,
12199
0
      "PP: internal error, state == PI\n");
12200
0
    ctxt->instate = XML_PARSER_CONTENT;
12201
#ifdef DEBUG_PUSH
12202
    xmlGenericError(xmlGenericErrorContext,
12203
      "PP: entering CONTENT\n");
12204
#endif
12205
0
    break;
12206
0
            case XML_PARSER_ENTITY_DECL:
12207
0
    xmlGenericError(xmlGenericErrorContext,
12208
0
      "PP: internal error, state == ENTITY_DECL\n");
12209
0
    ctxt->instate = XML_PARSER_DTD;
12210
#ifdef DEBUG_PUSH
12211
    xmlGenericError(xmlGenericErrorContext,
12212
      "PP: entering DTD\n");
12213
#endif
12214
0
    break;
12215
0
            case XML_PARSER_ENTITY_VALUE:
12216
0
    xmlGenericError(xmlGenericErrorContext,
12217
0
      "PP: internal error, state == ENTITY_VALUE\n");
12218
0
    ctxt->instate = XML_PARSER_CONTENT;
12219
#ifdef DEBUG_PUSH
12220
    xmlGenericError(xmlGenericErrorContext,
12221
      "PP: entering DTD\n");
12222
#endif
12223
0
    break;
12224
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12225
0
    xmlGenericError(xmlGenericErrorContext,
12226
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12227
0
    ctxt->instate = XML_PARSER_START_TAG;
12228
#ifdef DEBUG_PUSH
12229
    xmlGenericError(xmlGenericErrorContext,
12230
      "PP: entering START_TAG\n");
12231
#endif
12232
0
    break;
12233
0
            case XML_PARSER_SYSTEM_LITERAL:
12234
0
    xmlGenericError(xmlGenericErrorContext,
12235
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12236
0
    ctxt->instate = XML_PARSER_START_TAG;
12237
#ifdef DEBUG_PUSH
12238
    xmlGenericError(xmlGenericErrorContext,
12239
      "PP: entering START_TAG\n");
12240
#endif
12241
0
    break;
12242
0
            case XML_PARSER_PUBLIC_LITERAL:
12243
0
    xmlGenericError(xmlGenericErrorContext,
12244
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12245
0
    ctxt->instate = XML_PARSER_START_TAG;
12246
#ifdef DEBUG_PUSH
12247
    xmlGenericError(xmlGenericErrorContext,
12248
      "PP: entering START_TAG\n");
12249
#endif
12250
0
    break;
12251
19.6M
  }
12252
19.6M
    }
12253
2.46M
done:
12254
#ifdef DEBUG_PUSH
12255
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12256
#endif
12257
2.46M
    return(ret);
12258
24.9k
encoding_error:
12259
24.9k
    {
12260
24.9k
        char buffer[150];
12261
12262
24.9k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12263
24.9k
      ctxt->input->cur[0], ctxt->input->cur[1],
12264
24.9k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12265
24.9k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12266
24.9k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12267
24.9k
         BAD_CAST buffer, NULL);
12268
24.9k
    }
12269
24.9k
    return(0);
12270
2.70M
}
12271
12272
/**
12273
 * xmlParseCheckTransition:
12274
 * @ctxt:  an XML parser context
12275
 * @chunk:  a char array
12276
 * @size:  the size in byte of the chunk
12277
 *
12278
 * Check depending on the current parser state if the chunk given must be
12279
 * processed immediately or one need more data to advance on parsing.
12280
 *
12281
 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12282
 */
12283
static int
12284
2.98M
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12285
2.98M
    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12286
0
        return(-1);
12287
2.98M
    if (ctxt->instate == XML_PARSER_START_TAG) {
12288
900k
        if (memchr(chunk, '>', size) != NULL)
12289
462k
            return(1);
12290
437k
        return(0);
12291
900k
    }
12292
2.08M
    if (ctxt->progressive == XML_PARSER_COMMENT) {
12293
118k
        if (memchr(chunk, '>', size) != NULL)
12294
63.6k
            return(1);
12295
55.2k
        return(0);
12296
118k
    }
12297
1.96M
    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12298
173k
        if (memchr(chunk, '>', size) != NULL)
12299
83.5k
            return(1);
12300
90.1k
        return(0);
12301
173k
    }
12302
1.79M
    if (ctxt->progressive == XML_PARSER_PI) {
12303
35.9k
        if (memchr(chunk, '>', size) != NULL)
12304
21.1k
            return(1);
12305
14.8k
        return(0);
12306
35.9k
    }
12307
1.75M
    if (ctxt->instate == XML_PARSER_END_TAG) {
12308
64.0k
        if (memchr(chunk, '>', size) != NULL)
12309
49.0k
            return(1);
12310
14.9k
        return(0);
12311
64.0k
    }
12312
1.69M
    if ((ctxt->progressive == XML_PARSER_DTD) ||
12313
1.69M
        (ctxt->instate == XML_PARSER_DTD)) {
12314
618k
        if (memchr(chunk, '>', size) != NULL)
12315
438k
            return(1);
12316
180k
        return(0);
12317
618k
    }
12318
1.07M
    return(1);
12319
1.69M
}
12320
12321
/**
12322
 * xmlParseChunk:
12323
 * @ctxt:  an XML parser context
12324
 * @chunk:  an char array
12325
 * @size:  the size in byte of the chunk
12326
 * @terminate:  last chunk indicator
12327
 *
12328
 * Parse a Chunk of memory
12329
 *
12330
 * Returns zero if no error, the xmlParserErrors otherwise.
12331
 */
12332
int
12333
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12334
5.63M
              int terminate) {
12335
5.63M
    int end_in_lf = 0;
12336
5.63M
    int remain = 0;
12337
5.63M
    size_t old_avail = 0;
12338
5.63M
    size_t avail = 0;
12339
12340
5.63M
    if (ctxt == NULL)
12341
0
        return(XML_ERR_INTERNAL_ERROR);
12342
5.63M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12343
2.14M
        return(ctxt->errNo);
12344
3.49M
    if (ctxt->instate == XML_PARSER_EOF)
12345
850
        return(-1);
12346
3.49M
    if (ctxt->instate == XML_PARSER_START)
12347
539k
        xmlDetectSAX2(ctxt);
12348
3.49M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12349
3.49M
        (chunk[size - 1] == '\r')) {
12350
20.9k
  end_in_lf = 1;
12351
20.9k
  size--;
12352
20.9k
    }
12353
12354
3.50M
xmldecl_done:
12355
12356
3.50M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12357
3.50M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12358
3.22M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12359
3.22M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12360
3.22M
  int res;
12361
12362
3.22M
        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12363
        /*
12364
         * Specific handling if we autodetected an encoding, we should not
12365
         * push more than the first line ... which depend on the encoding
12366
         * And only push the rest once the final encoding was detected
12367
         */
12368
3.22M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12369
3.22M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12370
22.0k
            unsigned int len = 45;
12371
12372
22.0k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12373
22.0k
                               BAD_CAST "UTF-16")) ||
12374
22.0k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12375
7.77k
                               BAD_CAST "UTF16")))
12376
14.2k
                len = 90;
12377
7.77k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12378
7.77k
                                    BAD_CAST "UCS-4")) ||
12379
7.77k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12380
7.51k
                                    BAD_CAST "UCS4")))
12381
265
                len = 180;
12382
12383
22.0k
            if (ctxt->input->buf->rawconsumed < len)
12384
5.24k
                len -= ctxt->input->buf->rawconsumed;
12385
12386
            /*
12387
             * Change size for reading the initial declaration only
12388
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12389
             * will blindly copy extra bytes from memory.
12390
             */
12391
22.0k
            if ((unsigned int) size > len) {
12392
14.1k
                remain = size - len;
12393
14.1k
                size = len;
12394
14.1k
            } else {
12395
7.85k
                remain = 0;
12396
7.85k
            }
12397
22.0k
        }
12398
3.22M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12399
3.22M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12400
3.22M
  if (res < 0) {
12401
1.35k
      ctxt->errNo = XML_PARSER_EOF;
12402
1.35k
      xmlHaltParser(ctxt);
12403
1.35k
      return (XML_PARSER_EOF);
12404
1.35k
  }
12405
#ifdef DEBUG_PUSH
12406
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12407
#endif
12408
12409
3.22M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12410
283k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12411
283k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12412
283k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12413
283k
        (in->raw != NULL)) {
12414
17.5k
    int nbchars;
12415
17.5k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12416
17.5k
    size_t current = ctxt->input->cur - ctxt->input->base;
12417
12418
17.5k
    nbchars = xmlCharEncInput(in, terminate);
12419
17.5k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12420
17.5k
    if (nbchars < 0) {
12421
        /* TODO 2.6.0 */
12422
1.84k
        xmlGenericError(xmlGenericErrorContext,
12423
1.84k
            "xmlParseChunk: encoder error\n");
12424
1.84k
                    xmlHaltParser(ctxt);
12425
1.84k
        return(XML_ERR_INVALID_ENCODING);
12426
1.84k
    }
12427
17.5k
      }
12428
283k
  }
12429
283k
    }
12430
3.50M
    if (remain != 0) {
12431
13.6k
        xmlParseTryOrFinish(ctxt, 0);
12432
3.48M
    } else {
12433
3.48M
        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12434
3.48M
            avail = xmlBufUse(ctxt->input->buf->buffer);
12435
        /*
12436
         * Depending on the current state it may not be such
12437
         * a good idea to try parsing if there is nothing in the chunk
12438
         * which would be worth doing a parser state transition and we
12439
         * need to wait for more data
12440
         */
12441
3.48M
        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12442
3.48M
            (old_avail == 0) || (avail == 0) ||
12443
3.48M
            (xmlParseCheckTransition(ctxt,
12444
2.98M
                       (const char *)&ctxt->input->base[old_avail],
12445
2.98M
                                     avail - old_avail)))
12446
2.69M
            xmlParseTryOrFinish(ctxt, terminate);
12447
3.48M
    }
12448
3.50M
    if (ctxt->instate == XML_PARSER_EOF)
12449
114k
        return(ctxt->errNo);
12450
12451
3.38M
    if ((ctxt->input != NULL) &&
12452
3.38M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12453
3.38M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12454
3.38M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12455
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12456
0
        xmlHaltParser(ctxt);
12457
0
    }
12458
3.38M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12459
129k
        return(ctxt->errNo);
12460
12461
3.25M
    if (remain != 0) {
12462
12.9k
        chunk += size;
12463
12.9k
        size = remain;
12464
12.9k
        remain = 0;
12465
12.9k
        goto xmldecl_done;
12466
12.9k
    }
12467
3.24M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12468
3.24M
        (ctxt->input->buf != NULL)) {
12469
19.3k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12470
19.3k
           ctxt->input);
12471
19.3k
  size_t current = ctxt->input->cur - ctxt->input->base;
12472
12473
19.3k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12474
12475
19.3k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12476
19.3k
            base, current);
12477
19.3k
    }
12478
3.24M
    if (terminate) {
12479
  /*
12480
   * Check for termination
12481
   */
12482
150k
  int cur_avail = 0;
12483
12484
150k
  if (ctxt->input != NULL) {
12485
150k
      if (ctxt->input->buf == NULL)
12486
0
    cur_avail = ctxt->input->length -
12487
0
          (ctxt->input->cur - ctxt->input->base);
12488
150k
      else
12489
150k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12490
150k
                    (ctxt->input->cur - ctxt->input->base);
12491
150k
  }
12492
12493
150k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12494
150k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12495
88.9k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12496
88.9k
  }
12497
150k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12498
1.72k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12499
1.72k
  }
12500
150k
  if (ctxt->instate != XML_PARSER_EOF) {
12501
150k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12502
150k
    ctxt->sax->endDocument(ctxt->userData);
12503
150k
  }
12504
150k
  ctxt->instate = XML_PARSER_EOF;
12505
150k
    }
12506
3.24M
    if (ctxt->wellFormed == 0)
12507
917k
  return((xmlParserErrors) ctxt->errNo);
12508
2.32M
    else
12509
2.32M
        return(0);
12510
3.24M
}
12511
12512
/************************************************************************
12513
 *                  *
12514
 *    I/O front end functions to the parser     *
12515
 *                  *
12516
 ************************************************************************/
12517
12518
/**
12519
 * xmlCreatePushParserCtxt:
12520
 * @sax:  a SAX handler
12521
 * @user_data:  The user data returned on SAX callbacks
12522
 * @chunk:  a pointer to an array of chars
12523
 * @size:  number of chars in the array
12524
 * @filename:  an optional file name or URI
12525
 *
12526
 * Create a parser context for using the XML parser in push mode.
12527
 * If @buffer and @size are non-NULL, the data is used to detect
12528
 * the encoding.  The remaining characters will be parsed so they
12529
 * don't need to be fed in again through xmlParseChunk.
12530
 * To allow content encoding detection, @size should be >= 4
12531
 * The value of @filename is used for fetching external entities
12532
 * and error/warning reports.
12533
 *
12534
 * Returns the new parser context or NULL
12535
 */
12536
12537
xmlParserCtxtPtr
12538
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12539
443k
                        const char *chunk, int size, const char *filename) {
12540
443k
    xmlParserCtxtPtr ctxt;
12541
443k
    xmlParserInputPtr inputStream;
12542
443k
    xmlParserInputBufferPtr buf;
12543
443k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12544
12545
    /*
12546
     * plug some encoding conversion routines
12547
     */
12548
443k
    if ((chunk != NULL) && (size >= 4))
12549
214k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12550
12551
443k
    buf = xmlAllocParserInputBuffer(enc);
12552
443k
    if (buf == NULL) return(NULL);
12553
12554
443k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12555
443k
    if (ctxt == NULL) {
12556
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12557
0
  xmlFreeParserInputBuffer(buf);
12558
0
  return(NULL);
12559
0
    }
12560
443k
    ctxt->dictNames = 1;
12561
443k
    if (filename == NULL) {
12562
221k
  ctxt->directory = NULL;
12563
221k
    } else {
12564
221k
        ctxt->directory = xmlParserGetDirectory(filename);
12565
221k
    }
12566
12567
443k
    inputStream = xmlNewInputStream(ctxt);
12568
443k
    if (inputStream == NULL) {
12569
0
  xmlFreeParserCtxt(ctxt);
12570
0
  xmlFreeParserInputBuffer(buf);
12571
0
  return(NULL);
12572
0
    }
12573
12574
443k
    if (filename == NULL)
12575
221k
  inputStream->filename = NULL;
12576
221k
    else {
12577
221k
  inputStream->filename = (char *)
12578
221k
      xmlCanonicPath((const xmlChar *) filename);
12579
221k
  if (inputStream->filename == NULL) {
12580
0
      xmlFreeParserCtxt(ctxt);
12581
0
      xmlFreeParserInputBuffer(buf);
12582
0
      return(NULL);
12583
0
  }
12584
221k
    }
12585
443k
    inputStream->buf = buf;
12586
443k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12587
443k
    inputPush(ctxt, inputStream);
12588
12589
    /*
12590
     * If the caller didn't provide an initial 'chunk' for determining
12591
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12592
     * that it can be automatically determined later
12593
     */
12594
443k
    if ((size == 0) || (chunk == NULL)) {
12595
229k
  ctxt->charset = XML_CHAR_ENCODING_NONE;
12596
229k
    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12597
214k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12598
214k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12599
12600
214k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12601
12602
214k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12603
#ifdef DEBUG_PUSH
12604
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12605
#endif
12606
214k
    }
12607
12608
443k
    if (enc != XML_CHAR_ENCODING_NONE) {
12609
95.9k
        xmlSwitchEncoding(ctxt, enc);
12610
95.9k
    }
12611
12612
443k
    return(ctxt);
12613
443k
}
12614
#endif /* LIBXML_PUSH_ENABLED */
12615
12616
/**
12617
 * xmlHaltParser:
12618
 * @ctxt:  an XML parser context
12619
 *
12620
 * Blocks further parser processing don't override error
12621
 * for internal use
12622
 */
12623
static void
12624
1.12M
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12625
1.12M
    if (ctxt == NULL)
12626
0
        return;
12627
1.12M
    ctxt->instate = XML_PARSER_EOF;
12628
1.12M
    ctxt->disableSAX = 1;
12629
1.13M
    while (ctxt->inputNr > 1)
12630
1.31k
        xmlFreeInputStream(inputPop(ctxt));
12631
1.12M
    if (ctxt->input != NULL) {
12632
        /*
12633
   * in case there was a specific allocation deallocate before
12634
   * overriding base
12635
   */
12636
1.12M
        if (ctxt->input->free != NULL) {
12637
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12638
0
      ctxt->input->free = NULL;
12639
0
  }
12640
1.12M
        if (ctxt->input->buf != NULL) {
12641
1.07M
            xmlFreeParserInputBuffer(ctxt->input->buf);
12642
1.07M
            ctxt->input->buf = NULL;
12643
1.07M
        }
12644
1.12M
  ctxt->input->cur = BAD_CAST"";
12645
1.12M
        ctxt->input->length = 0;
12646
1.12M
  ctxt->input->base = ctxt->input->cur;
12647
1.12M
        ctxt->input->end = ctxt->input->cur;
12648
1.12M
    }
12649
1.12M
}
12650
12651
/**
12652
 * xmlStopParser:
12653
 * @ctxt:  an XML parser context
12654
 *
12655
 * Blocks further parser processing
12656
 */
12657
void
12658
222k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12659
222k
    if (ctxt == NULL)
12660
0
        return;
12661
222k
    xmlHaltParser(ctxt);
12662
222k
    ctxt->errNo = XML_ERR_USER_STOP;
12663
222k
}
12664
12665
/**
12666
 * xmlCreateIOParserCtxt:
12667
 * @sax:  a SAX handler
12668
 * @user_data:  The user data returned on SAX callbacks
12669
 * @ioread:  an I/O read function
12670
 * @ioclose:  an I/O close function
12671
 * @ioctx:  an I/O handler
12672
 * @enc:  the charset encoding if known
12673
 *
12674
 * Create a parser context for using the XML parser with an existing
12675
 * I/O stream
12676
 *
12677
 * Returns the new parser context or NULL
12678
 */
12679
xmlParserCtxtPtr
12680
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12681
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12682
0
  void *ioctx, xmlCharEncoding enc) {
12683
0
    xmlParserCtxtPtr ctxt;
12684
0
    xmlParserInputPtr inputStream;
12685
0
    xmlParserInputBufferPtr buf;
12686
12687
0
    if (ioread == NULL) return(NULL);
12688
12689
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12690
0
    if (buf == NULL) {
12691
0
        if (ioclose != NULL)
12692
0
            ioclose(ioctx);
12693
0
        return (NULL);
12694
0
    }
12695
12696
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12697
0
    if (ctxt == NULL) {
12698
0
  xmlFreeParserInputBuffer(buf);
12699
0
  return(NULL);
12700
0
    }
12701
12702
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12703
0
    if (inputStream == NULL) {
12704
0
  xmlFreeParserCtxt(ctxt);
12705
0
  return(NULL);
12706
0
    }
12707
0
    inputPush(ctxt, inputStream);
12708
12709
0
    return(ctxt);
12710
0
}
12711
12712
#ifdef LIBXML_VALID_ENABLED
12713
/************************************************************************
12714
 *                  *
12715
 *    Front ends when parsing a DTD       *
12716
 *                  *
12717
 ************************************************************************/
12718
12719
/**
12720
 * xmlIOParseDTD:
12721
 * @sax:  the SAX handler block or NULL
12722
 * @input:  an Input Buffer
12723
 * @enc:  the charset encoding if known
12724
 *
12725
 * Load and parse a DTD
12726
 *
12727
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12728
 * @input will be freed by the function in any case.
12729
 */
12730
12731
xmlDtdPtr
12732
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12733
0
        xmlCharEncoding enc) {
12734
0
    xmlDtdPtr ret = NULL;
12735
0
    xmlParserCtxtPtr ctxt;
12736
0
    xmlParserInputPtr pinput = NULL;
12737
0
    xmlChar start[4];
12738
12739
0
    if (input == NULL)
12740
0
  return(NULL);
12741
12742
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12743
0
    if (ctxt == NULL) {
12744
0
        xmlFreeParserInputBuffer(input);
12745
0
  return(NULL);
12746
0
    }
12747
12748
    /* We are loading a DTD */
12749
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12750
12751
0
    xmlDetectSAX2(ctxt);
12752
12753
    /*
12754
     * generate a parser input from the I/O handler
12755
     */
12756
12757
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12758
0
    if (pinput == NULL) {
12759
0
        xmlFreeParserInputBuffer(input);
12760
0
  xmlFreeParserCtxt(ctxt);
12761
0
  return(NULL);
12762
0
    }
12763
12764
    /*
12765
     * plug some encoding conversion routines here.
12766
     */
12767
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12768
0
  xmlFreeParserCtxt(ctxt);
12769
0
  return(NULL);
12770
0
    }
12771
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12772
0
        xmlSwitchEncoding(ctxt, enc);
12773
0
    }
12774
12775
0
    pinput->filename = NULL;
12776
0
    pinput->line = 1;
12777
0
    pinput->col = 1;
12778
0
    pinput->base = ctxt->input->cur;
12779
0
    pinput->cur = ctxt->input->cur;
12780
0
    pinput->free = NULL;
12781
12782
    /*
12783
     * let's parse that entity knowing it's an external subset.
12784
     */
12785
0
    ctxt->inSubset = 2;
12786
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12787
0
    if (ctxt->myDoc == NULL) {
12788
0
  xmlErrMemory(ctxt, "New Doc failed");
12789
0
  return(NULL);
12790
0
    }
12791
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12792
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793
0
                                 BAD_CAST "none", BAD_CAST "none");
12794
12795
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12796
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12797
  /*
12798
   * Get the 4 first bytes and decode the charset
12799
   * if enc != XML_CHAR_ENCODING_NONE
12800
   * plug some encoding conversion routines.
12801
   */
12802
0
  start[0] = RAW;
12803
0
  start[1] = NXT(1);
12804
0
  start[2] = NXT(2);
12805
0
  start[3] = NXT(3);
12806
0
  enc = xmlDetectCharEncoding(start, 4);
12807
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12808
0
      xmlSwitchEncoding(ctxt, enc);
12809
0
  }
12810
0
    }
12811
12812
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12813
12814
0
    if (ctxt->myDoc != NULL) {
12815
0
  if (ctxt->wellFormed) {
12816
0
      ret = ctxt->myDoc->extSubset;
12817
0
      ctxt->myDoc->extSubset = NULL;
12818
0
      if (ret != NULL) {
12819
0
    xmlNodePtr tmp;
12820
12821
0
    ret->doc = NULL;
12822
0
    tmp = ret->children;
12823
0
    while (tmp != NULL) {
12824
0
        tmp->doc = NULL;
12825
0
        tmp = tmp->next;
12826
0
    }
12827
0
      }
12828
0
  } else {
12829
0
      ret = NULL;
12830
0
  }
12831
0
        xmlFreeDoc(ctxt->myDoc);
12832
0
        ctxt->myDoc = NULL;
12833
0
    }
12834
0
    xmlFreeParserCtxt(ctxt);
12835
12836
0
    return(ret);
12837
0
}
12838
12839
/**
12840
 * xmlSAXParseDTD:
12841
 * @sax:  the SAX handler block
12842
 * @ExternalID:  a NAME* containing the External ID of the DTD
12843
 * @SystemID:  a NAME* containing the URL to the DTD
12844
 *
12845
 * DEPRECATED: Don't use.
12846
 *
12847
 * Load and parse an external subset.
12848
 *
12849
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12850
 */
12851
12852
xmlDtdPtr
12853
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12854
0
                          const xmlChar *SystemID) {
12855
0
    xmlDtdPtr ret = NULL;
12856
0
    xmlParserCtxtPtr ctxt;
12857
0
    xmlParserInputPtr input = NULL;
12858
0
    xmlCharEncoding enc;
12859
0
    xmlChar* systemIdCanonic;
12860
12861
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12862
12863
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12864
0
    if (ctxt == NULL) {
12865
0
  return(NULL);
12866
0
    }
12867
12868
    /* We are loading a DTD */
12869
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12870
12871
    /*
12872
     * Canonicalise the system ID
12873
     */
12874
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12875
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12876
0
  xmlFreeParserCtxt(ctxt);
12877
0
  return(NULL);
12878
0
    }
12879
12880
    /*
12881
     * Ask the Entity resolver to load the damn thing
12882
     */
12883
12884
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12885
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12886
0
                                   systemIdCanonic);
12887
0
    if (input == NULL) {
12888
0
  xmlFreeParserCtxt(ctxt);
12889
0
  if (systemIdCanonic != NULL)
12890
0
      xmlFree(systemIdCanonic);
12891
0
  return(NULL);
12892
0
    }
12893
12894
    /*
12895
     * plug some encoding conversion routines here.
12896
     */
12897
0
    if (xmlPushInput(ctxt, input) < 0) {
12898
0
  xmlFreeParserCtxt(ctxt);
12899
0
  if (systemIdCanonic != NULL)
12900
0
      xmlFree(systemIdCanonic);
12901
0
  return(NULL);
12902
0
    }
12903
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12904
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12905
0
  xmlSwitchEncoding(ctxt, enc);
12906
0
    }
12907
12908
0
    if (input->filename == NULL)
12909
0
  input->filename = (char *) systemIdCanonic;
12910
0
    else
12911
0
  xmlFree(systemIdCanonic);
12912
0
    input->line = 1;
12913
0
    input->col = 1;
12914
0
    input->base = ctxt->input->cur;
12915
0
    input->cur = ctxt->input->cur;
12916
0
    input->free = NULL;
12917
12918
    /*
12919
     * let's parse that entity knowing it's an external subset.
12920
     */
12921
0
    ctxt->inSubset = 2;
12922
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12923
0
    if (ctxt->myDoc == NULL) {
12924
0
  xmlErrMemory(ctxt, "New Doc failed");
12925
0
  xmlFreeParserCtxt(ctxt);
12926
0
  return(NULL);
12927
0
    }
12928
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12929
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12930
0
                                 ExternalID, SystemID);
12931
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12932
12933
0
    if (ctxt->myDoc != NULL) {
12934
0
  if (ctxt->wellFormed) {
12935
0
      ret = ctxt->myDoc->extSubset;
12936
0
      ctxt->myDoc->extSubset = NULL;
12937
0
      if (ret != NULL) {
12938
0
    xmlNodePtr tmp;
12939
12940
0
    ret->doc = NULL;
12941
0
    tmp = ret->children;
12942
0
    while (tmp != NULL) {
12943
0
        tmp->doc = NULL;
12944
0
        tmp = tmp->next;
12945
0
    }
12946
0
      }
12947
0
  } else {
12948
0
      ret = NULL;
12949
0
  }
12950
0
        xmlFreeDoc(ctxt->myDoc);
12951
0
        ctxt->myDoc = NULL;
12952
0
    }
12953
0
    xmlFreeParserCtxt(ctxt);
12954
12955
0
    return(ret);
12956
0
}
12957
12958
12959
/**
12960
 * xmlParseDTD:
12961
 * @ExternalID:  a NAME* containing the External ID of the DTD
12962
 * @SystemID:  a NAME* containing the URL to the DTD
12963
 *
12964
 * Load and parse an external subset.
12965
 *
12966
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12967
 */
12968
12969
xmlDtdPtr
12970
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12971
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12972
0
}
12973
#endif /* LIBXML_VALID_ENABLED */
12974
12975
/************************************************************************
12976
 *                  *
12977
 *    Front ends when parsing an Entity     *
12978
 *                  *
12979
 ************************************************************************/
12980
12981
/**
12982
 * xmlParseCtxtExternalEntity:
12983
 * @ctx:  the existing parsing context
12984
 * @URL:  the URL for the entity to load
12985
 * @ID:  the System ID for the entity to load
12986
 * @lst:  the return value for the set of parsed nodes
12987
 *
12988
 * Parse an external general entity within an existing parsing context
12989
 * An external general parsed entity is well-formed if it matches the
12990
 * production labeled extParsedEnt.
12991
 *
12992
 * [78] extParsedEnt ::= TextDecl? content
12993
 *
12994
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12995
 *    the parser error code otherwise
12996
 */
12997
12998
int
12999
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13000
0
                 const xmlChar *ID, xmlNodePtr *lst) {
13001
0
    void *userData;
13002
13003
0
    if (ctx == NULL) return(-1);
13004
    /*
13005
     * If the user provided their own SAX callbacks, then reuse the
13006
     * userData callback field, otherwise the expected setup in a
13007
     * DOM builder is to have userData == ctxt
13008
     */
13009
0
    if (ctx->userData == ctx)
13010
0
        userData = NULL;
13011
0
    else
13012
0
        userData = ctx->userData;
13013
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
13014
0
                                         userData, ctx->depth + 1,
13015
0
                                         URL, ID, lst);
13016
0
}
13017
13018
/**
13019
 * xmlParseExternalEntityPrivate:
13020
 * @doc:  the document the chunk pertains to
13021
 * @oldctxt:  the previous parser context if available
13022
 * @sax:  the SAX handler block (possibly NULL)
13023
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13024
 * @depth:  Used for loop detection, use 0
13025
 * @URL:  the URL for the entity to load
13026
 * @ID:  the System ID for the entity to load
13027
 * @list:  the return value for the set of parsed nodes
13028
 *
13029
 * Private version of xmlParseExternalEntity()
13030
 *
13031
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13032
 *    the parser error code otherwise
13033
 */
13034
13035
static xmlParserErrors
13036
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13037
                xmlSAXHandlerPtr sax,
13038
          void *user_data, int depth, const xmlChar *URL,
13039
749k
          const xmlChar *ID, xmlNodePtr *list) {
13040
749k
    xmlParserCtxtPtr ctxt;
13041
749k
    xmlDocPtr newDoc;
13042
749k
    xmlNodePtr newRoot;
13043
749k
    xmlParserErrors ret = XML_ERR_OK;
13044
749k
    xmlChar start[4];
13045
749k
    xmlCharEncoding enc;
13046
13047
749k
    if (((depth > 40) &&
13048
749k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13049
749k
  (depth > 1024)) {
13050
1.43k
  return(XML_ERR_ENTITY_LOOP);
13051
1.43k
    }
13052
13053
747k
    if (list != NULL)
13054
744k
        *list = NULL;
13055
747k
    if ((URL == NULL) && (ID == NULL))
13056
111
  return(XML_ERR_INTERNAL_ERROR);
13057
747k
    if (doc == NULL)
13058
0
  return(XML_ERR_INTERNAL_ERROR);
13059
13060
747k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
13061
747k
                                             oldctxt);
13062
747k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13063
731k
    xmlDetectSAX2(ctxt);
13064
13065
731k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13066
731k
    if (newDoc == NULL) {
13067
0
  xmlFreeParserCtxt(ctxt);
13068
0
  return(XML_ERR_INTERNAL_ERROR);
13069
0
    }
13070
731k
    newDoc->properties = XML_DOC_INTERNAL;
13071
731k
    if (doc) {
13072
731k
        newDoc->intSubset = doc->intSubset;
13073
731k
        newDoc->extSubset = doc->extSubset;
13074
731k
        if (doc->dict) {
13075
474k
            newDoc->dict = doc->dict;
13076
474k
            xmlDictReference(newDoc->dict);
13077
474k
        }
13078
731k
        if (doc->URL != NULL) {
13079
464k
            newDoc->URL = xmlStrdup(doc->URL);
13080
464k
        }
13081
731k
    }
13082
731k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13083
731k
    if (newRoot == NULL) {
13084
0
  if (sax != NULL)
13085
0
  xmlFreeParserCtxt(ctxt);
13086
0
  newDoc->intSubset = NULL;
13087
0
  newDoc->extSubset = NULL;
13088
0
        xmlFreeDoc(newDoc);
13089
0
  return(XML_ERR_INTERNAL_ERROR);
13090
0
    }
13091
731k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13092
731k
    nodePush(ctxt, newDoc->children);
13093
731k
    if (doc == NULL) {
13094
0
        ctxt->myDoc = newDoc;
13095
731k
    } else {
13096
731k
        ctxt->myDoc = doc;
13097
731k
        newRoot->doc = doc;
13098
731k
    }
13099
13100
    /*
13101
     * Get the 4 first bytes and decode the charset
13102
     * if enc != XML_CHAR_ENCODING_NONE
13103
     * plug some encoding conversion routines.
13104
     */
13105
731k
    GROW;
13106
731k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13107
730k
  start[0] = RAW;
13108
730k
  start[1] = NXT(1);
13109
730k
  start[2] = NXT(2);
13110
730k
  start[3] = NXT(3);
13111
730k
  enc = xmlDetectCharEncoding(start, 4);
13112
730k
  if (enc != XML_CHAR_ENCODING_NONE) {
13113
13.5k
      xmlSwitchEncoding(ctxt, enc);
13114
13.5k
  }
13115
730k
    }
13116
13117
    /*
13118
     * Parse a possible text declaration first
13119
     */
13120
731k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13121
9.51k
  xmlParseTextDecl(ctxt);
13122
        /*
13123
         * An XML-1.0 document can't reference an entity not XML-1.0
13124
         */
13125
9.51k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13126
9.51k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13127
128
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13128
128
                           "Version mismatch between document and entity\n");
13129
128
        }
13130
9.51k
    }
13131
13132
731k
    ctxt->instate = XML_PARSER_CONTENT;
13133
731k
    ctxt->depth = depth;
13134
731k
    if (oldctxt != NULL) {
13135
731k
  ctxt->_private = oldctxt->_private;
13136
731k
  ctxt->loadsubset = oldctxt->loadsubset;
13137
731k
  ctxt->validate = oldctxt->validate;
13138
731k
  ctxt->valid = oldctxt->valid;
13139
731k
  ctxt->replaceEntities = oldctxt->replaceEntities;
13140
731k
        if (oldctxt->validate) {
13141
260k
            ctxt->vctxt.error = oldctxt->vctxt.error;
13142
260k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
13143
260k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
13144
260k
        }
13145
731k
  ctxt->external = oldctxt->external;
13146
731k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
13147
731k
        ctxt->dict = oldctxt->dict;
13148
731k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13149
731k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13150
731k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13151
731k
        ctxt->dictNames = oldctxt->dictNames;
13152
731k
        ctxt->attsDefault = oldctxt->attsDefault;
13153
731k
        ctxt->attsSpecial = oldctxt->attsSpecial;
13154
731k
        ctxt->linenumbers = oldctxt->linenumbers;
13155
731k
  ctxt->record_info = oldctxt->record_info;
13156
731k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13157
731k
  ctxt->node_seq.length = oldctxt->node_seq.length;
13158
731k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13159
731k
    } else {
13160
  /*
13161
   * Doing validity checking on chunk without context
13162
   * doesn't make sense
13163
   */
13164
0
  ctxt->_private = NULL;
13165
0
  ctxt->validate = 0;
13166
0
  ctxt->external = 2;
13167
0
  ctxt->loadsubset = 0;
13168
0
    }
13169
13170
731k
    xmlParseContent(ctxt);
13171
13172
731k
    if ((RAW == '<') && (NXT(1) == '/')) {
13173
186
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13174
731k
    } else if (RAW != 0) {
13175
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13176
0
    }
13177
731k
    if (ctxt->node != newDoc->children) {
13178
598k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13179
598k
    }
13180
13181
731k
    if (!ctxt->wellFormed) {
13182
728k
        if (ctxt->errNo == 0)
13183
0
      ret = XML_ERR_INTERNAL_ERROR;
13184
728k
  else
13185
728k
      ret = (xmlParserErrors)ctxt->errNo;
13186
728k
    } else {
13187
3.30k
  if (list != NULL) {
13188
2.87k
      xmlNodePtr cur;
13189
13190
      /*
13191
       * Return the newly created nodeset after unlinking it from
13192
       * they pseudo parent.
13193
       */
13194
2.87k
      cur = newDoc->children->children;
13195
2.87k
      *list = cur;
13196
5.72k
      while (cur != NULL) {
13197
2.84k
    cur->parent = NULL;
13198
2.84k
    cur = cur->next;
13199
2.84k
      }
13200
2.87k
            newDoc->children->children = NULL;
13201
2.87k
  }
13202
3.30k
  ret = XML_ERR_OK;
13203
3.30k
    }
13204
13205
    /*
13206
     * Record in the parent context the number of entities replacement
13207
     * done when parsing that reference.
13208
     */
13209
731k
    if (oldctxt != NULL)
13210
731k
        oldctxt->nbentities += ctxt->nbentities;
13211
13212
    /*
13213
     * Also record the size of the entity parsed
13214
     */
13215
731k
    if (ctxt->input != NULL && oldctxt != NULL) {
13216
731k
  oldctxt->sizeentities += ctxt->input->consumed;
13217
731k
  oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13218
731k
    }
13219
    /*
13220
     * And record the last error if any
13221
     */
13222
731k
    if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13223
728k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13224
13225
731k
    if (oldctxt != NULL) {
13226
731k
        ctxt->dict = NULL;
13227
731k
        ctxt->attsDefault = NULL;
13228
731k
        ctxt->attsSpecial = NULL;
13229
731k
        oldctxt->validate = ctxt->validate;
13230
731k
        oldctxt->valid = ctxt->valid;
13231
731k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13232
731k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13233
731k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13234
731k
    }
13235
731k
    ctxt->node_seq.maximum = 0;
13236
731k
    ctxt->node_seq.length = 0;
13237
731k
    ctxt->node_seq.buffer = NULL;
13238
731k
    xmlFreeParserCtxt(ctxt);
13239
731k
    newDoc->intSubset = NULL;
13240
731k
    newDoc->extSubset = NULL;
13241
731k
    xmlFreeDoc(newDoc);
13242
13243
731k
    return(ret);
13244
731k
}
13245
13246
#ifdef LIBXML_SAX1_ENABLED
13247
/**
13248
 * xmlParseExternalEntity:
13249
 * @doc:  the document the chunk pertains to
13250
 * @sax:  the SAX handler block (possibly NULL)
13251
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13252
 * @depth:  Used for loop detection, use 0
13253
 * @URL:  the URL for the entity to load
13254
 * @ID:  the System ID for the entity to load
13255
 * @lst:  the return value for the set of parsed nodes
13256
 *
13257
 * Parse an external general entity
13258
 * An external general parsed entity is well-formed if it matches the
13259
 * production labeled extParsedEnt.
13260
 *
13261
 * [78] extParsedEnt ::= TextDecl? content
13262
 *
13263
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13264
 *    the parser error code otherwise
13265
 */
13266
13267
int
13268
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13269
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13270
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13271
0
                           ID, lst));
13272
0
}
13273
13274
/**
13275
 * xmlParseBalancedChunkMemory:
13276
 * @doc:  the document the chunk pertains to (must not be NULL)
13277
 * @sax:  the SAX handler block (possibly NULL)
13278
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13279
 * @depth:  Used for loop detection, use 0
13280
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13281
 * @lst:  the return value for the set of parsed nodes
13282
 *
13283
 * Parse a well-balanced chunk of an XML document
13284
 * called by the parser
13285
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13286
 * the content production in the XML grammar:
13287
 *
13288
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13289
 *
13290
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13291
 *    the parser error code otherwise
13292
 */
13293
13294
int
13295
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13296
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13297
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13298
0
                                                depth, string, lst, 0 );
13299
0
}
13300
#endif /* LIBXML_SAX1_ENABLED */
13301
13302
/**
13303
 * xmlParseBalancedChunkMemoryInternal:
13304
 * @oldctxt:  the existing parsing context
13305
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13306
 * @user_data:  the user data field for the parser context
13307
 * @lst:  the return value for the set of parsed nodes
13308
 *
13309
 *
13310
 * Parse a well-balanced chunk of an XML document
13311
 * called by the parser
13312
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13313
 * the content production in the XML grammar:
13314
 *
13315
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13316
 *
13317
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13318
 * error code otherwise
13319
 *
13320
 * In case recover is set to 1, the nodelist will not be empty even if
13321
 * the parsed chunk is not well balanced.
13322
 */
13323
static xmlParserErrors
13324
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13325
91.6k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13326
91.6k
    xmlParserCtxtPtr ctxt;
13327
91.6k
    xmlDocPtr newDoc = NULL;
13328
91.6k
    xmlNodePtr newRoot;
13329
91.6k
    xmlSAXHandlerPtr oldsax = NULL;
13330
91.6k
    xmlNodePtr content = NULL;
13331
91.6k
    xmlNodePtr last = NULL;
13332
91.6k
    int size;
13333
91.6k
    xmlParserErrors ret = XML_ERR_OK;
13334
91.6k
#ifdef SAX2
13335
91.6k
    int i;
13336
91.6k
#endif
13337
13338
91.6k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13339
91.6k
        (oldctxt->depth >  1024)) {
13340
591
  return(XML_ERR_ENTITY_LOOP);
13341
591
    }
13342
13343
13344
91.0k
    if (lst != NULL)
13345
90.7k
        *lst = NULL;
13346
91.0k
    if (string == NULL)
13347
137
        return(XML_ERR_INTERNAL_ERROR);
13348
13349
90.9k
    size = xmlStrlen(string);
13350
13351
90.9k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13352
90.9k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13353
89.8k
    if (user_data != NULL)
13354
0
  ctxt->userData = user_data;
13355
89.8k
    else
13356
89.8k
  ctxt->userData = ctxt;
13357
89.8k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13358
89.8k
    ctxt->dict = oldctxt->dict;
13359
89.8k
    ctxt->input_id = oldctxt->input_id + 1;
13360
89.8k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13361
89.8k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13362
89.8k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13363
13364
89.8k
#ifdef SAX2
13365
    /* propagate namespaces down the entity */
13366
91.5k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13367
1.74k
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13368
1.74k
    }
13369
89.8k
#endif
13370
13371
89.8k
    oldsax = ctxt->sax;
13372
89.8k
    ctxt->sax = oldctxt->sax;
13373
89.8k
    xmlDetectSAX2(ctxt);
13374
89.8k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13375
89.8k
    ctxt->options = oldctxt->options;
13376
13377
89.8k
    ctxt->_private = oldctxt->_private;
13378
89.8k
    if (oldctxt->myDoc == NULL) {
13379
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13380
0
  if (newDoc == NULL) {
13381
0
      ctxt->sax = oldsax;
13382
0
      ctxt->dict = NULL;
13383
0
      xmlFreeParserCtxt(ctxt);
13384
0
      return(XML_ERR_INTERNAL_ERROR);
13385
0
  }
13386
0
  newDoc->properties = XML_DOC_INTERNAL;
13387
0
  newDoc->dict = ctxt->dict;
13388
0
  xmlDictReference(newDoc->dict);
13389
0
  ctxt->myDoc = newDoc;
13390
89.8k
    } else {
13391
89.8k
  ctxt->myDoc = oldctxt->myDoc;
13392
89.8k
        content = ctxt->myDoc->children;
13393
89.8k
  last = ctxt->myDoc->last;
13394
89.8k
    }
13395
89.8k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13396
89.8k
    if (newRoot == NULL) {
13397
0
  ctxt->sax = oldsax;
13398
0
  ctxt->dict = NULL;
13399
0
  xmlFreeParserCtxt(ctxt);
13400
0
  if (newDoc != NULL) {
13401
0
      xmlFreeDoc(newDoc);
13402
0
  }
13403
0
  return(XML_ERR_INTERNAL_ERROR);
13404
0
    }
13405
89.8k
    ctxt->myDoc->children = NULL;
13406
89.8k
    ctxt->myDoc->last = NULL;
13407
89.8k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13408
89.8k
    nodePush(ctxt, ctxt->myDoc->children);
13409
89.8k
    ctxt->instate = XML_PARSER_CONTENT;
13410
89.8k
    ctxt->depth = oldctxt->depth + 1;
13411
13412
89.8k
    ctxt->validate = 0;
13413
89.8k
    ctxt->loadsubset = oldctxt->loadsubset;
13414
89.8k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13415
  /*
13416
   * ID/IDREF registration will be done in xmlValidateElement below
13417
   */
13418
60.2k
  ctxt->loadsubset |= XML_SKIP_IDS;
13419
60.2k
    }
13420
89.8k
    ctxt->dictNames = oldctxt->dictNames;
13421
89.8k
    ctxt->attsDefault = oldctxt->attsDefault;
13422
89.8k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13423
13424
89.8k
    xmlParseContent(ctxt);
13425
89.8k
    if ((RAW == '<') && (NXT(1) == '/')) {
13426
212
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13427
89.6k
    } else if (RAW != 0) {
13428
27
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13429
27
    }
13430
89.8k
    if (ctxt->node != ctxt->myDoc->children) {
13431
45.3k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13432
45.3k
    }
13433
13434
89.8k
    if (!ctxt->wellFormed) {
13435
57.3k
        if (ctxt->errNo == 0)
13436
0
      ret = XML_ERR_INTERNAL_ERROR;
13437
57.3k
  else
13438
57.3k
      ret = (xmlParserErrors)ctxt->errNo;
13439
57.3k
    } else {
13440
32.5k
      ret = XML_ERR_OK;
13441
32.5k
    }
13442
13443
89.8k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13444
32.5k
  xmlNodePtr cur;
13445
13446
  /*
13447
   * Return the newly created nodeset after unlinking it from
13448
   * they pseudo parent.
13449
   */
13450
32.5k
  cur = ctxt->myDoc->children->children;
13451
32.5k
  *lst = cur;
13452
68.9k
  while (cur != NULL) {
13453
36.4k
#ifdef LIBXML_VALID_ENABLED
13454
36.4k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13455
36.4k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13456
36.4k
    (cur->type == XML_ELEMENT_NODE)) {
13457
1.94k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13458
1.94k
      oldctxt->myDoc, cur);
13459
1.94k
      }
13460
36.4k
#endif /* LIBXML_VALID_ENABLED */
13461
36.4k
      cur->parent = NULL;
13462
36.4k
      cur = cur->next;
13463
36.4k
  }
13464
32.5k
  ctxt->myDoc->children->children = NULL;
13465
32.5k
    }
13466
89.8k
    if (ctxt->myDoc != NULL) {
13467
89.8k
  xmlFreeNode(ctxt->myDoc->children);
13468
89.8k
        ctxt->myDoc->children = content;
13469
89.8k
        ctxt->myDoc->last = last;
13470
89.8k
    }
13471
13472
    /*
13473
     * Record in the parent context the number of entities replacement
13474
     * done when parsing that reference.
13475
     */
13476
89.8k
    if (oldctxt != NULL)
13477
89.8k
        oldctxt->nbentities += ctxt->nbentities;
13478
13479
    /*
13480
     * Also record the last error if any
13481
     */
13482
89.8k
    if (ctxt->lastError.code != XML_ERR_OK)
13483
57.4k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13484
13485
89.8k
    ctxt->sax = oldsax;
13486
89.8k
    ctxt->dict = NULL;
13487
89.8k
    ctxt->attsDefault = NULL;
13488
89.8k
    ctxt->attsSpecial = NULL;
13489
89.8k
    xmlFreeParserCtxt(ctxt);
13490
89.8k
    if (newDoc != NULL) {
13491
0
  xmlFreeDoc(newDoc);
13492
0
    }
13493
13494
89.8k
    return(ret);
13495
89.8k
}
13496
13497
/**
13498
 * xmlParseInNodeContext:
13499
 * @node:  the context node
13500
 * @data:  the input string
13501
 * @datalen:  the input string length in bytes
13502
 * @options:  a combination of xmlParserOption
13503
 * @lst:  the return value for the set of parsed nodes
13504
 *
13505
 * Parse a well-balanced chunk of an XML document
13506
 * within the context (DTD, namespaces, etc ...) of the given node.
13507
 *
13508
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13509
 * the content production in the XML grammar:
13510
 *
13511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13512
 *
13513
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13514
 * error code otherwise
13515
 */
13516
xmlParserErrors
13517
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13518
0
                      int options, xmlNodePtr *lst) {
13519
0
#ifdef SAX2
13520
0
    xmlParserCtxtPtr ctxt;
13521
0
    xmlDocPtr doc = NULL;
13522
0
    xmlNodePtr fake, cur;
13523
0
    int nsnr = 0;
13524
13525
0
    xmlParserErrors ret = XML_ERR_OK;
13526
13527
    /*
13528
     * check all input parameters, grab the document
13529
     */
13530
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13531
0
        return(XML_ERR_INTERNAL_ERROR);
13532
0
    switch (node->type) {
13533
0
        case XML_ELEMENT_NODE:
13534
0
        case XML_ATTRIBUTE_NODE:
13535
0
        case XML_TEXT_NODE:
13536
0
        case XML_CDATA_SECTION_NODE:
13537
0
        case XML_ENTITY_REF_NODE:
13538
0
        case XML_PI_NODE:
13539
0
        case XML_COMMENT_NODE:
13540
0
        case XML_DOCUMENT_NODE:
13541
0
        case XML_HTML_DOCUMENT_NODE:
13542
0
      break;
13543
0
  default:
13544
0
      return(XML_ERR_INTERNAL_ERROR);
13545
13546
0
    }
13547
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13548
0
           (node->type != XML_DOCUMENT_NODE) &&
13549
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13550
0
  node = node->parent;
13551
0
    if (node == NULL)
13552
0
  return(XML_ERR_INTERNAL_ERROR);
13553
0
    if (node->type == XML_ELEMENT_NODE)
13554
0
  doc = node->doc;
13555
0
    else
13556
0
        doc = (xmlDocPtr) node;
13557
0
    if (doc == NULL)
13558
0
  return(XML_ERR_INTERNAL_ERROR);
13559
13560
    /*
13561
     * allocate a context and set-up everything not related to the
13562
     * node position in the tree
13563
     */
13564
0
    if (doc->type == XML_DOCUMENT_NODE)
13565
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13566
0
#ifdef LIBXML_HTML_ENABLED
13567
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13568
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13569
        /*
13570
         * When parsing in context, it makes no sense to add implied
13571
         * elements like html/body/etc...
13572
         */
13573
0
        options |= HTML_PARSE_NOIMPLIED;
13574
0
    }
13575
0
#endif
13576
0
    else
13577
0
        return(XML_ERR_INTERNAL_ERROR);
13578
13579
0
    if (ctxt == NULL)
13580
0
        return(XML_ERR_NO_MEMORY);
13581
13582
    /*
13583
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13584
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13585
     * we must wait until the last moment to free the original one.
13586
     */
13587
0
    if (doc->dict != NULL) {
13588
0
        if (ctxt->dict != NULL)
13589
0
      xmlDictFree(ctxt->dict);
13590
0
  ctxt->dict = doc->dict;
13591
0
    } else
13592
0
        options |= XML_PARSE_NODICT;
13593
13594
0
    if (doc->encoding != NULL) {
13595
0
        xmlCharEncodingHandlerPtr hdlr;
13596
13597
0
        if (ctxt->encoding != NULL)
13598
0
      xmlFree((xmlChar *) ctxt->encoding);
13599
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13600
13601
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13602
0
        if (hdlr != NULL) {
13603
0
            xmlSwitchToEncoding(ctxt, hdlr);
13604
0
  } else {
13605
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13606
0
        }
13607
0
    }
13608
13609
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13610
0
    xmlDetectSAX2(ctxt);
13611
0
    ctxt->myDoc = doc;
13612
    /* parsing in context, i.e. as within existing content */
13613
0
    ctxt->input_id = 2;
13614
0
    ctxt->instate = XML_PARSER_CONTENT;
13615
13616
0
    fake = xmlNewDocComment(node->doc, NULL);
13617
0
    if (fake == NULL) {
13618
0
        xmlFreeParserCtxt(ctxt);
13619
0
  return(XML_ERR_NO_MEMORY);
13620
0
    }
13621
0
    xmlAddChild(node, fake);
13622
13623
0
    if (node->type == XML_ELEMENT_NODE) {
13624
0
  nodePush(ctxt, node);
13625
  /*
13626
   * initialize the SAX2 namespaces stack
13627
   */
13628
0
  cur = node;
13629
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13630
0
      xmlNsPtr ns = cur->nsDef;
13631
0
      const xmlChar *iprefix, *ihref;
13632
13633
0
      while (ns != NULL) {
13634
0
    if (ctxt->dict) {
13635
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13636
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13637
0
    } else {
13638
0
        iprefix = ns->prefix;
13639
0
        ihref = ns->href;
13640
0
    }
13641
13642
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13643
0
        nsPush(ctxt, iprefix, ihref);
13644
0
        nsnr++;
13645
0
    }
13646
0
    ns = ns->next;
13647
0
      }
13648
0
      cur = cur->parent;
13649
0
  }
13650
0
    }
13651
13652
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13653
  /*
13654
   * ID/IDREF registration will be done in xmlValidateElement below
13655
   */
13656
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13657
0
    }
13658
13659
0
#ifdef LIBXML_HTML_ENABLED
13660
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13661
0
        __htmlParseContent(ctxt);
13662
0
    else
13663
0
#endif
13664
0
  xmlParseContent(ctxt);
13665
13666
0
    nsPop(ctxt, nsnr);
13667
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13668
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13669
0
    } else if (RAW != 0) {
13670
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13671
0
    }
13672
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13673
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13674
0
  ctxt->wellFormed = 0;
13675
0
    }
13676
13677
0
    if (!ctxt->wellFormed) {
13678
0
        if (ctxt->errNo == 0)
13679
0
      ret = XML_ERR_INTERNAL_ERROR;
13680
0
  else
13681
0
      ret = (xmlParserErrors)ctxt->errNo;
13682
0
    } else {
13683
0
        ret = XML_ERR_OK;
13684
0
    }
13685
13686
    /*
13687
     * Return the newly created nodeset after unlinking it from
13688
     * the pseudo sibling.
13689
     */
13690
13691
0
    cur = fake->next;
13692
0
    fake->next = NULL;
13693
0
    node->last = fake;
13694
13695
0
    if (cur != NULL) {
13696
0
  cur->prev = NULL;
13697
0
    }
13698
13699
0
    *lst = cur;
13700
13701
0
    while (cur != NULL) {
13702
0
  cur->parent = NULL;
13703
0
  cur = cur->next;
13704
0
    }
13705
13706
0
    xmlUnlinkNode(fake);
13707
0
    xmlFreeNode(fake);
13708
13709
13710
0
    if (ret != XML_ERR_OK) {
13711
0
        xmlFreeNodeList(*lst);
13712
0
  *lst = NULL;
13713
0
    }
13714
13715
0
    if (doc->dict != NULL)
13716
0
        ctxt->dict = NULL;
13717
0
    xmlFreeParserCtxt(ctxt);
13718
13719
0
    return(ret);
13720
#else /* !SAX2 */
13721
    return(XML_ERR_INTERNAL_ERROR);
13722
#endif
13723
0
}
13724
13725
#ifdef LIBXML_SAX1_ENABLED
13726
/**
13727
 * xmlParseBalancedChunkMemoryRecover:
13728
 * @doc:  the document the chunk pertains to (must not be NULL)
13729
 * @sax:  the SAX handler block (possibly NULL)
13730
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13731
 * @depth:  Used for loop detection, use 0
13732
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13733
 * @lst:  the return value for the set of parsed nodes
13734
 * @recover: return nodes even if the data is broken (use 0)
13735
 *
13736
 *
13737
 * Parse a well-balanced chunk of an XML document
13738
 * called by the parser
13739
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13740
 * the content production in the XML grammar:
13741
 *
13742
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13743
 *
13744
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13745
 *    the parser error code otherwise
13746
 *
13747
 * In case recover is set to 1, the nodelist will not be empty even if
13748
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13749
 * some extent.
13750
 */
13751
int
13752
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13753
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13754
0
     int recover) {
13755
0
    xmlParserCtxtPtr ctxt;
13756
0
    xmlDocPtr newDoc;
13757
0
    xmlSAXHandlerPtr oldsax = NULL;
13758
0
    xmlNodePtr content, newRoot;
13759
0
    int size;
13760
0
    int ret = 0;
13761
13762
0
    if (depth > 40) {
13763
0
  return(XML_ERR_ENTITY_LOOP);
13764
0
    }
13765
13766
13767
0
    if (lst != NULL)
13768
0
        *lst = NULL;
13769
0
    if (string == NULL)
13770
0
        return(-1);
13771
13772
0
    size = xmlStrlen(string);
13773
13774
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13775
0
    if (ctxt == NULL) return(-1);
13776
0
    ctxt->userData = ctxt;
13777
0
    if (sax != NULL) {
13778
0
  oldsax = ctxt->sax;
13779
0
        ctxt->sax = sax;
13780
0
  if (user_data != NULL)
13781
0
      ctxt->userData = user_data;
13782
0
    }
13783
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13784
0
    if (newDoc == NULL) {
13785
0
  xmlFreeParserCtxt(ctxt);
13786
0
  return(-1);
13787
0
    }
13788
0
    newDoc->properties = XML_DOC_INTERNAL;
13789
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13790
0
        xmlDictFree(ctxt->dict);
13791
0
  ctxt->dict = doc->dict;
13792
0
  xmlDictReference(ctxt->dict);
13793
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13794
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13795
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13796
0
  ctxt->dictNames = 1;
13797
0
    } else {
13798
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13799
0
    }
13800
    /* doc == NULL is only supported for historic reasons */
13801
0
    if (doc != NULL) {
13802
0
  newDoc->intSubset = doc->intSubset;
13803
0
  newDoc->extSubset = doc->extSubset;
13804
0
    }
13805
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13806
0
    if (newRoot == NULL) {
13807
0
  if (sax != NULL)
13808
0
      ctxt->sax = oldsax;
13809
0
  xmlFreeParserCtxt(ctxt);
13810
0
  newDoc->intSubset = NULL;
13811
0
  newDoc->extSubset = NULL;
13812
0
        xmlFreeDoc(newDoc);
13813
0
  return(-1);
13814
0
    }
13815
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13816
0
    nodePush(ctxt, newRoot);
13817
    /* doc == NULL is only supported for historic reasons */
13818
0
    if (doc == NULL) {
13819
0
  ctxt->myDoc = newDoc;
13820
0
    } else {
13821
0
  ctxt->myDoc = newDoc;
13822
0
  newDoc->children->doc = doc;
13823
  /* Ensure that doc has XML spec namespace */
13824
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13825
0
  newDoc->oldNs = doc->oldNs;
13826
0
    }
13827
0
    ctxt->instate = XML_PARSER_CONTENT;
13828
0
    ctxt->input_id = 2;
13829
0
    ctxt->depth = depth;
13830
13831
    /*
13832
     * Doing validity checking on chunk doesn't make sense
13833
     */
13834
0
    ctxt->validate = 0;
13835
0
    ctxt->loadsubset = 0;
13836
0
    xmlDetectSAX2(ctxt);
13837
13838
0
    if ( doc != NULL ){
13839
0
        content = doc->children;
13840
0
        doc->children = NULL;
13841
0
        xmlParseContent(ctxt);
13842
0
        doc->children = content;
13843
0
    }
13844
0
    else {
13845
0
        xmlParseContent(ctxt);
13846
0
    }
13847
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13848
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13849
0
    } else if (RAW != 0) {
13850
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13851
0
    }
13852
0
    if (ctxt->node != newDoc->children) {
13853
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13854
0
    }
13855
13856
0
    if (!ctxt->wellFormed) {
13857
0
        if (ctxt->errNo == 0)
13858
0
      ret = 1;
13859
0
  else
13860
0
      ret = ctxt->errNo;
13861
0
    } else {
13862
0
      ret = 0;
13863
0
    }
13864
13865
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13866
0
  xmlNodePtr cur;
13867
13868
  /*
13869
   * Return the newly created nodeset after unlinking it from
13870
   * they pseudo parent.
13871
   */
13872
0
  cur = newDoc->children->children;
13873
0
  *lst = cur;
13874
0
  while (cur != NULL) {
13875
0
      xmlSetTreeDoc(cur, doc);
13876
0
      cur->parent = NULL;
13877
0
      cur = cur->next;
13878
0
  }
13879
0
  newDoc->children->children = NULL;
13880
0
    }
13881
13882
0
    if (sax != NULL)
13883
0
  ctxt->sax = oldsax;
13884
0
    xmlFreeParserCtxt(ctxt);
13885
0
    newDoc->intSubset = NULL;
13886
0
    newDoc->extSubset = NULL;
13887
    /* This leaks the namespace list if doc == NULL */
13888
0
    newDoc->oldNs = NULL;
13889
0
    xmlFreeDoc(newDoc);
13890
13891
0
    return(ret);
13892
0
}
13893
13894
/**
13895
 * xmlSAXParseEntity:
13896
 * @sax:  the SAX handler block
13897
 * @filename:  the filename
13898
 *
13899
 * DEPRECATED: Don't use.
13900
 *
13901
 * parse an XML external entity out of context and build a tree.
13902
 * It use the given SAX function block to handle the parsing callback.
13903
 * If sax is NULL, fallback to the default DOM tree building routines.
13904
 *
13905
 * [78] extParsedEnt ::= TextDecl? content
13906
 *
13907
 * This correspond to a "Well Balanced" chunk
13908
 *
13909
 * Returns the resulting document tree
13910
 */
13911
13912
xmlDocPtr
13913
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13914
0
    xmlDocPtr ret;
13915
0
    xmlParserCtxtPtr ctxt;
13916
13917
0
    ctxt = xmlCreateFileParserCtxt(filename);
13918
0
    if (ctxt == NULL) {
13919
0
  return(NULL);
13920
0
    }
13921
0
    if (sax != NULL) {
13922
0
  if (ctxt->sax != NULL)
13923
0
      xmlFree(ctxt->sax);
13924
0
        ctxt->sax = sax;
13925
0
        ctxt->userData = NULL;
13926
0
    }
13927
13928
0
    xmlParseExtParsedEnt(ctxt);
13929
13930
0
    if (ctxt->wellFormed)
13931
0
  ret = ctxt->myDoc;
13932
0
    else {
13933
0
        ret = NULL;
13934
0
        xmlFreeDoc(ctxt->myDoc);
13935
0
        ctxt->myDoc = NULL;
13936
0
    }
13937
0
    if (sax != NULL)
13938
0
        ctxt->sax = NULL;
13939
0
    xmlFreeParserCtxt(ctxt);
13940
13941
0
    return(ret);
13942
0
}
13943
13944
/**
13945
 * xmlParseEntity:
13946
 * @filename:  the filename
13947
 *
13948
 * parse an XML external entity out of context and build a tree.
13949
 *
13950
 * [78] extParsedEnt ::= TextDecl? content
13951
 *
13952
 * This correspond to a "Well Balanced" chunk
13953
 *
13954
 * Returns the resulting document tree
13955
 */
13956
13957
xmlDocPtr
13958
0
xmlParseEntity(const char *filename) {
13959
0
    return(xmlSAXParseEntity(NULL, filename));
13960
0
}
13961
#endif /* LIBXML_SAX1_ENABLED */
13962
13963
/**
13964
 * xmlCreateEntityParserCtxtInternal:
13965
 * @URL:  the entity URL
13966
 * @ID:  the entity PUBLIC ID
13967
 * @base:  a possible base for the target URI
13968
 * @pctx:  parser context used to set options on new context
13969
 *
13970
 * Create a parser context for an external entity
13971
 * Automatic support for ZLIB/Compress compressed document is provided
13972
 * by default if found at compile-time.
13973
 *
13974
 * Returns the new parser context or NULL
13975
 */
13976
static xmlParserCtxtPtr
13977
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13978
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13979
747k
        xmlParserCtxtPtr pctx) {
13980
747k
    xmlParserCtxtPtr ctxt;
13981
747k
    xmlParserInputPtr inputStream;
13982
747k
    char *directory = NULL;
13983
747k
    xmlChar *uri;
13984
13985
747k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13986
747k
    if (ctxt == NULL) {
13987
0
  return(NULL);
13988
0
    }
13989
13990
747k
    if (pctx != NULL) {
13991
747k
        ctxt->options = pctx->options;
13992
747k
        ctxt->_private = pctx->_private;
13993
  /*
13994
   * this is a subparser of pctx, so the input_id should be
13995
   * incremented to distinguish from main entity
13996
   */
13997
747k
  ctxt->input_id = pctx->input_id + 1;
13998
747k
    }
13999
14000
    /* Don't read from stdin. */
14001
747k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
14002
2
        URL = BAD_CAST "./-";
14003
14004
747k
    uri = xmlBuildURI(URL, base);
14005
14006
747k
    if (uri == NULL) {
14007
1.62k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14008
1.62k
  if (inputStream == NULL) {
14009
1.60k
      xmlFreeParserCtxt(ctxt);
14010
1.60k
      return(NULL);
14011
1.60k
  }
14012
14013
20
  inputPush(ctxt, inputStream);
14014
14015
20
  if ((ctxt->directory == NULL) && (directory == NULL))
14016
20
      directory = xmlParserGetDirectory((char *)URL);
14017
20
  if ((ctxt->directory == NULL) && (directory != NULL))
14018
20
      ctxt->directory = directory;
14019
746k
    } else {
14020
746k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14021
746k
  if (inputStream == NULL) {
14022
14.7k
      xmlFree(uri);
14023
14.7k
      xmlFreeParserCtxt(ctxt);
14024
14.7k
      return(NULL);
14025
14.7k
  }
14026
14027
731k
  inputPush(ctxt, inputStream);
14028
14029
731k
  if ((ctxt->directory == NULL) && (directory == NULL))
14030
731k
      directory = xmlParserGetDirectory((char *)uri);
14031
731k
  if ((ctxt->directory == NULL) && (directory != NULL))
14032
731k
      ctxt->directory = directory;
14033
731k
  xmlFree(uri);
14034
731k
    }
14035
731k
    return(ctxt);
14036
747k
}
14037
14038
/**
14039
 * xmlCreateEntityParserCtxt:
14040
 * @URL:  the entity URL
14041
 * @ID:  the entity PUBLIC ID
14042
 * @base:  a possible base for the target URI
14043
 *
14044
 * Create a parser context for an external entity
14045
 * Automatic support for ZLIB/Compress compressed document is provided
14046
 * by default if found at compile-time.
14047
 *
14048
 * Returns the new parser context or NULL
14049
 */
14050
xmlParserCtxtPtr
14051
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14052
0
                    const xmlChar *base) {
14053
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
14054
14055
0
}
14056
14057
/************************************************************************
14058
 *                  *
14059
 *    Front ends when parsing from a file     *
14060
 *                  *
14061
 ************************************************************************/
14062
14063
/**
14064
 * xmlCreateURLParserCtxt:
14065
 * @filename:  the filename or URL
14066
 * @options:  a combination of xmlParserOption
14067
 *
14068
 * Create a parser context for a file or URL content.
14069
 * Automatic support for ZLIB/Compress compressed document is provided
14070
 * by default if found at compile-time and for file accesses
14071
 *
14072
 * Returns the new parser context or NULL
14073
 */
14074
xmlParserCtxtPtr
14075
xmlCreateURLParserCtxt(const char *filename, int options)
14076
0
{
14077
0
    xmlParserCtxtPtr ctxt;
14078
0
    xmlParserInputPtr inputStream;
14079
0
    char *directory = NULL;
14080
14081
0
    ctxt = xmlNewParserCtxt();
14082
0
    if (ctxt == NULL) {
14083
0
  xmlErrMemory(NULL, "cannot allocate parser context");
14084
0
  return(NULL);
14085
0
    }
14086
14087
0
    if (options)
14088
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14089
0
    ctxt->linenumbers = 1;
14090
14091
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14092
0
    if (inputStream == NULL) {
14093
0
  xmlFreeParserCtxt(ctxt);
14094
0
  return(NULL);
14095
0
    }
14096
14097
0
    inputPush(ctxt, inputStream);
14098
0
    if ((ctxt->directory == NULL) && (directory == NULL))
14099
0
        directory = xmlParserGetDirectory(filename);
14100
0
    if ((ctxt->directory == NULL) && (directory != NULL))
14101
0
        ctxt->directory = directory;
14102
14103
0
    return(ctxt);
14104
0
}
14105
14106
/**
14107
 * xmlCreateFileParserCtxt:
14108
 * @filename:  the filename
14109
 *
14110
 * Create a parser context for a file content.
14111
 * Automatic support for ZLIB/Compress compressed document is provided
14112
 * by default if found at compile-time.
14113
 *
14114
 * Returns the new parser context or NULL
14115
 */
14116
xmlParserCtxtPtr
14117
xmlCreateFileParserCtxt(const char *filename)
14118
0
{
14119
0
    return(xmlCreateURLParserCtxt(filename, 0));
14120
0
}
14121
14122
#ifdef LIBXML_SAX1_ENABLED
14123
/**
14124
 * xmlSAXParseFileWithData:
14125
 * @sax:  the SAX handler block
14126
 * @filename:  the filename
14127
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14128
 *             documents
14129
 * @data:  the userdata
14130
 *
14131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14132
 *
14133
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14134
 * compressed document is provided by default if found at compile-time.
14135
 * It use the given SAX function block to handle the parsing callback.
14136
 * If sax is NULL, fallback to the default DOM tree building routines.
14137
 *
14138
 * User data (void *) is stored within the parser context in the
14139
 * context's _private member, so it is available nearly everywhere in libxml
14140
 *
14141
 * Returns the resulting document tree
14142
 */
14143
14144
xmlDocPtr
14145
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14146
0
                        int recovery, void *data) {
14147
0
    xmlDocPtr ret;
14148
0
    xmlParserCtxtPtr ctxt;
14149
14150
0
    xmlInitParser();
14151
14152
0
    ctxt = xmlCreateFileParserCtxt(filename);
14153
0
    if (ctxt == NULL) {
14154
0
  return(NULL);
14155
0
    }
14156
0
    if (sax != NULL) {
14157
0
  if (ctxt->sax != NULL)
14158
0
      xmlFree(ctxt->sax);
14159
0
        ctxt->sax = sax;
14160
0
    }
14161
0
    xmlDetectSAX2(ctxt);
14162
0
    if (data!=NULL) {
14163
0
  ctxt->_private = data;
14164
0
    }
14165
14166
0
    if (ctxt->directory == NULL)
14167
0
        ctxt->directory = xmlParserGetDirectory(filename);
14168
14169
0
    ctxt->recovery = recovery;
14170
14171
0
    xmlParseDocument(ctxt);
14172
14173
0
    if ((ctxt->wellFormed) || recovery) {
14174
0
        ret = ctxt->myDoc;
14175
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14176
0
      if (ctxt->input->buf->compressed > 0)
14177
0
    ret->compression = 9;
14178
0
      else
14179
0
    ret->compression = ctxt->input->buf->compressed;
14180
0
  }
14181
0
    }
14182
0
    else {
14183
0
       ret = NULL;
14184
0
       xmlFreeDoc(ctxt->myDoc);
14185
0
       ctxt->myDoc = NULL;
14186
0
    }
14187
0
    if (sax != NULL)
14188
0
        ctxt->sax = NULL;
14189
0
    xmlFreeParserCtxt(ctxt);
14190
14191
0
    return(ret);
14192
0
}
14193
14194
/**
14195
 * xmlSAXParseFile:
14196
 * @sax:  the SAX handler block
14197
 * @filename:  the filename
14198
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14199
 *             documents
14200
 *
14201
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14202
 *
14203
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14204
 * compressed document is provided by default if found at compile-time.
14205
 * It use the given SAX function block to handle the parsing callback.
14206
 * If sax is NULL, fallback to the default DOM tree building routines.
14207
 *
14208
 * Returns the resulting document tree
14209
 */
14210
14211
xmlDocPtr
14212
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14213
0
                          int recovery) {
14214
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14215
0
}
14216
14217
/**
14218
 * xmlRecoverDoc:
14219
 * @cur:  a pointer to an array of xmlChar
14220
 *
14221
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14222
 *
14223
 * parse an XML in-memory document and build a tree.
14224
 * In the case the document is not Well Formed, a attempt to build a
14225
 * tree is tried anyway
14226
 *
14227
 * Returns the resulting document tree or NULL in case of failure
14228
 */
14229
14230
xmlDocPtr
14231
0
xmlRecoverDoc(const xmlChar *cur) {
14232
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14233
0
}
14234
14235
/**
14236
 * xmlParseFile:
14237
 * @filename:  the filename
14238
 *
14239
 * DEPRECATED: Use xmlReadFile.
14240
 *
14241
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14242
 * compressed document is provided by default if found at compile-time.
14243
 *
14244
 * Returns the resulting document tree if the file was wellformed,
14245
 * NULL otherwise.
14246
 */
14247
14248
xmlDocPtr
14249
0
xmlParseFile(const char *filename) {
14250
0
    return(xmlSAXParseFile(NULL, filename, 0));
14251
0
}
14252
14253
/**
14254
 * xmlRecoverFile:
14255
 * @filename:  the filename
14256
 *
14257
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14258
 *
14259
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14260
 * compressed document is provided by default if found at compile-time.
14261
 * In the case the document is not Well Formed, it attempts to build
14262
 * a tree anyway
14263
 *
14264
 * Returns the resulting document tree or NULL in case of failure
14265
 */
14266
14267
xmlDocPtr
14268
0
xmlRecoverFile(const char *filename) {
14269
0
    return(xmlSAXParseFile(NULL, filename, 1));
14270
0
}
14271
14272
14273
/**
14274
 * xmlSetupParserForBuffer:
14275
 * @ctxt:  an XML parser context
14276
 * @buffer:  a xmlChar * buffer
14277
 * @filename:  a file name
14278
 *
14279
 * DEPRECATED: Don't use.
14280
 *
14281
 * Setup the parser context to parse a new buffer; Clears any prior
14282
 * contents from the parser context. The buffer parameter must not be
14283
 * NULL, but the filename parameter can be
14284
 */
14285
void
14286
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14287
                             const char* filename)
14288
0
{
14289
0
    xmlParserInputPtr input;
14290
14291
0
    if ((ctxt == NULL) || (buffer == NULL))
14292
0
        return;
14293
14294
0
    input = xmlNewInputStream(ctxt);
14295
0
    if (input == NULL) {
14296
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14297
0
        xmlClearParserCtxt(ctxt);
14298
0
        return;
14299
0
    }
14300
14301
0
    xmlClearParserCtxt(ctxt);
14302
0
    if (filename != NULL)
14303
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14304
0
    input->base = buffer;
14305
0
    input->cur = buffer;
14306
0
    input->end = &buffer[xmlStrlen(buffer)];
14307
0
    inputPush(ctxt, input);
14308
0
}
14309
14310
/**
14311
 * xmlSAXUserParseFile:
14312
 * @sax:  a SAX handler
14313
 * @user_data:  The user data returned on SAX callbacks
14314
 * @filename:  a file name
14315
 *
14316
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14317
 *
14318
 * parse an XML file and call the given SAX handler routines.
14319
 * Automatic support for ZLIB/Compress compressed document is provided
14320
 *
14321
 * Returns 0 in case of success or a error number otherwise
14322
 */
14323
int
14324
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14325
0
                    const char *filename) {
14326
0
    int ret = 0;
14327
0
    xmlParserCtxtPtr ctxt;
14328
14329
0
    ctxt = xmlCreateFileParserCtxt(filename);
14330
0
    if (ctxt == NULL) return -1;
14331
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14332
0
  xmlFree(ctxt->sax);
14333
0
    ctxt->sax = sax;
14334
0
    xmlDetectSAX2(ctxt);
14335
14336
0
    if (user_data != NULL)
14337
0
  ctxt->userData = user_data;
14338
14339
0
    xmlParseDocument(ctxt);
14340
14341
0
    if (ctxt->wellFormed)
14342
0
  ret = 0;
14343
0
    else {
14344
0
        if (ctxt->errNo != 0)
14345
0
      ret = ctxt->errNo;
14346
0
  else
14347
0
      ret = -1;
14348
0
    }
14349
0
    if (sax != NULL)
14350
0
  ctxt->sax = NULL;
14351
0
    if (ctxt->myDoc != NULL) {
14352
0
        xmlFreeDoc(ctxt->myDoc);
14353
0
  ctxt->myDoc = NULL;
14354
0
    }
14355
0
    xmlFreeParserCtxt(ctxt);
14356
14357
0
    return ret;
14358
0
}
14359
#endif /* LIBXML_SAX1_ENABLED */
14360
14361
/************************************************************************
14362
 *                  *
14363
 *    Front ends when parsing from memory     *
14364
 *                  *
14365
 ************************************************************************/
14366
14367
/**
14368
 * xmlCreateMemoryParserCtxt:
14369
 * @buffer:  a pointer to a char array
14370
 * @size:  the size of the array
14371
 *
14372
 * Create a parser context for an XML in-memory document.
14373
 *
14374
 * Returns the new parser context or NULL
14375
 */
14376
xmlParserCtxtPtr
14377
313k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14378
313k
    xmlParserCtxtPtr ctxt;
14379
313k
    xmlParserInputPtr input;
14380
313k
    xmlParserInputBufferPtr buf;
14381
14382
313k
    if (buffer == NULL)
14383
0
  return(NULL);
14384
313k
    if (size <= 0)
14385
4.02k
  return(NULL);
14386
14387
309k
    ctxt = xmlNewParserCtxt();
14388
309k
    if (ctxt == NULL)
14389
0
  return(NULL);
14390
14391
    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14392
309k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14393
309k
    if (buf == NULL) {
14394
0
  xmlFreeParserCtxt(ctxt);
14395
0
  return(NULL);
14396
0
    }
14397
14398
309k
    input = xmlNewInputStream(ctxt);
14399
309k
    if (input == NULL) {
14400
0
  xmlFreeParserInputBuffer(buf);
14401
0
  xmlFreeParserCtxt(ctxt);
14402
0
  return(NULL);
14403
0
    }
14404
14405
309k
    input->filename = NULL;
14406
309k
    input->buf = buf;
14407
309k
    xmlBufResetInput(input->buf->buffer, input);
14408
14409
309k
    inputPush(ctxt, input);
14410
309k
    return(ctxt);
14411
309k
}
14412
14413
#ifdef LIBXML_SAX1_ENABLED
14414
/**
14415
 * xmlSAXParseMemoryWithData:
14416
 * @sax:  the SAX handler block
14417
 * @buffer:  an pointer to a char array
14418
 * @size:  the size of the array
14419
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14420
 *             documents
14421
 * @data:  the userdata
14422
 *
14423
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14424
 *
14425
 * parse an XML in-memory block and use the given SAX function block
14426
 * to handle the parsing callback. If sax is NULL, fallback to the default
14427
 * DOM tree building routines.
14428
 *
14429
 * User data (void *) is stored within the parser context in the
14430
 * context's _private member, so it is available nearly everywhere in libxml
14431
 *
14432
 * Returns the resulting document tree
14433
 */
14434
14435
xmlDocPtr
14436
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14437
0
            int size, int recovery, void *data) {
14438
0
    xmlDocPtr ret;
14439
0
    xmlParserCtxtPtr ctxt;
14440
14441
0
    xmlInitParser();
14442
14443
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14444
0
    if (ctxt == NULL) return(NULL);
14445
0
    if (sax != NULL) {
14446
0
  if (ctxt->sax != NULL)
14447
0
      xmlFree(ctxt->sax);
14448
0
        ctxt->sax = sax;
14449
0
    }
14450
0
    xmlDetectSAX2(ctxt);
14451
0
    if (data!=NULL) {
14452
0
  ctxt->_private=data;
14453
0
    }
14454
14455
0
    ctxt->recovery = recovery;
14456
14457
0
    xmlParseDocument(ctxt);
14458
14459
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14460
0
    else {
14461
0
       ret = NULL;
14462
0
       xmlFreeDoc(ctxt->myDoc);
14463
0
       ctxt->myDoc = NULL;
14464
0
    }
14465
0
    if (sax != NULL)
14466
0
  ctxt->sax = NULL;
14467
0
    xmlFreeParserCtxt(ctxt);
14468
14469
0
    return(ret);
14470
0
}
14471
14472
/**
14473
 * xmlSAXParseMemory:
14474
 * @sax:  the SAX handler block
14475
 * @buffer:  an pointer to a char array
14476
 * @size:  the size of the array
14477
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14478
 *             documents
14479
 *
14480
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14481
 *
14482
 * parse an XML in-memory block and use the given SAX function block
14483
 * to handle the parsing callback. If sax is NULL, fallback to the default
14484
 * DOM tree building routines.
14485
 *
14486
 * Returns the resulting document tree
14487
 */
14488
xmlDocPtr
14489
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14490
0
            int size, int recovery) {
14491
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14492
0
}
14493
14494
/**
14495
 * xmlParseMemory:
14496
 * @buffer:  an pointer to a char array
14497
 * @size:  the size of the array
14498
 *
14499
 * DEPRECATED: Use xmlReadMemory.
14500
 *
14501
 * parse an XML in-memory block and build a tree.
14502
 *
14503
 * Returns the resulting document tree
14504
 */
14505
14506
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14507
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14508
0
}
14509
14510
/**
14511
 * xmlRecoverMemory:
14512
 * @buffer:  an pointer to a char array
14513
 * @size:  the size of the array
14514
 *
14515
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14516
 *
14517
 * parse an XML in-memory block and build a tree.
14518
 * In the case the document is not Well Formed, an attempt to
14519
 * build a tree is tried anyway
14520
 *
14521
 * Returns the resulting document tree or NULL in case of error
14522
 */
14523
14524
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14525
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14526
0
}
14527
14528
/**
14529
 * xmlSAXUserParseMemory:
14530
 * @sax:  a SAX handler
14531
 * @user_data:  The user data returned on SAX callbacks
14532
 * @buffer:  an in-memory XML document input
14533
 * @size:  the length of the XML document in bytes
14534
 *
14535
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14536
 *
14537
 * parse an XML in-memory buffer and call the given SAX handler routines.
14538
 *
14539
 * Returns 0 in case of success or a error number otherwise
14540
 */
14541
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14542
0
        const char *buffer, int size) {
14543
0
    int ret = 0;
14544
0
    xmlParserCtxtPtr ctxt;
14545
14546
0
    xmlInitParser();
14547
14548
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14549
0
    if (ctxt == NULL) return -1;
14550
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14551
0
        xmlFree(ctxt->sax);
14552
0
    ctxt->sax = sax;
14553
0
    xmlDetectSAX2(ctxt);
14554
14555
0
    if (user_data != NULL)
14556
0
  ctxt->userData = user_data;
14557
14558
0
    xmlParseDocument(ctxt);
14559
14560
0
    if (ctxt->wellFormed)
14561
0
  ret = 0;
14562
0
    else {
14563
0
        if (ctxt->errNo != 0)
14564
0
      ret = ctxt->errNo;
14565
0
  else
14566
0
      ret = -1;
14567
0
    }
14568
0
    if (sax != NULL)
14569
0
        ctxt->sax = NULL;
14570
0
    if (ctxt->myDoc != NULL) {
14571
0
        xmlFreeDoc(ctxt->myDoc);
14572
0
  ctxt->myDoc = NULL;
14573
0
    }
14574
0
    xmlFreeParserCtxt(ctxt);
14575
14576
0
    return ret;
14577
0
}
14578
#endif /* LIBXML_SAX1_ENABLED */
14579
14580
/**
14581
 * xmlCreateDocParserCtxt:
14582
 * @cur:  a pointer to an array of xmlChar
14583
 *
14584
 * Creates a parser context for an XML in-memory document.
14585
 *
14586
 * Returns the new parser context or NULL
14587
 */
14588
xmlParserCtxtPtr
14589
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14590
0
    int len;
14591
14592
0
    if (cur == NULL)
14593
0
  return(NULL);
14594
0
    len = xmlStrlen(cur);
14595
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14596
0
}
14597
14598
#ifdef LIBXML_SAX1_ENABLED
14599
/**
14600
 * xmlSAXParseDoc:
14601
 * @sax:  the SAX handler block
14602
 * @cur:  a pointer to an array of xmlChar
14603
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14604
 *             documents
14605
 *
14606
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14607
 *
14608
 * parse an XML in-memory document and build a tree.
14609
 * It use the given SAX function block to handle the parsing callback.
14610
 * If sax is NULL, fallback to the default DOM tree building routines.
14611
 *
14612
 * Returns the resulting document tree
14613
 */
14614
14615
xmlDocPtr
14616
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14617
0
    xmlDocPtr ret;
14618
0
    xmlParserCtxtPtr ctxt;
14619
0
    xmlSAXHandlerPtr oldsax = NULL;
14620
14621
0
    if (cur == NULL) return(NULL);
14622
14623
14624
0
    ctxt = xmlCreateDocParserCtxt(cur);
14625
0
    if (ctxt == NULL) return(NULL);
14626
0
    if (sax != NULL) {
14627
0
        oldsax = ctxt->sax;
14628
0
        ctxt->sax = sax;
14629
0
        ctxt->userData = NULL;
14630
0
    }
14631
0
    xmlDetectSAX2(ctxt);
14632
14633
0
    xmlParseDocument(ctxt);
14634
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14635
0
    else {
14636
0
       ret = NULL;
14637
0
       xmlFreeDoc(ctxt->myDoc);
14638
0
       ctxt->myDoc = NULL;
14639
0
    }
14640
0
    if (sax != NULL)
14641
0
  ctxt->sax = oldsax;
14642
0
    xmlFreeParserCtxt(ctxt);
14643
14644
0
    return(ret);
14645
0
}
14646
14647
/**
14648
 * xmlParseDoc:
14649
 * @cur:  a pointer to an array of xmlChar
14650
 *
14651
 * DEPRECATED: Use xmlReadDoc.
14652
 *
14653
 * parse an XML in-memory document and build a tree.
14654
 *
14655
 * Returns the resulting document tree
14656
 */
14657
14658
xmlDocPtr
14659
0
xmlParseDoc(const xmlChar *cur) {
14660
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14661
0
}
14662
#endif /* LIBXML_SAX1_ENABLED */
14663
14664
#ifdef LIBXML_LEGACY_ENABLED
14665
/************************************************************************
14666
 *                  *
14667
 *  Specific function to keep track of entities references    *
14668
 *  and used by the XSLT debugger         *
14669
 *                  *
14670
 ************************************************************************/
14671
14672
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14673
14674
/**
14675
 * xmlAddEntityReference:
14676
 * @ent : A valid entity
14677
 * @firstNode : A valid first node for children of entity
14678
 * @lastNode : A valid last node of children entity
14679
 *
14680
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14681
 */
14682
static void
14683
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14684
                      xmlNodePtr lastNode)
14685
{
14686
    if (xmlEntityRefFunc != NULL) {
14687
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14688
    }
14689
}
14690
14691
14692
/**
14693
 * xmlSetEntityReferenceFunc:
14694
 * @func: A valid function
14695
 *
14696
 * Set the function to call call back when a xml reference has been made
14697
 */
14698
void
14699
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14700
{
14701
    xmlEntityRefFunc = func;
14702
}
14703
#endif /* LIBXML_LEGACY_ENABLED */
14704
14705
/************************************************************************
14706
 *                  *
14707
 *        Miscellaneous       *
14708
 *                  *
14709
 ************************************************************************/
14710
14711
static int xmlParserInitialized = 0;
14712
14713
/**
14714
 * xmlInitParser:
14715
 *
14716
 * Initialization function for the XML parser.
14717
 * This is not reentrant. Call once before processing in case of
14718
 * use in multithreaded programs.
14719
 */
14720
14721
void
14722
2.40M
xmlInitParser(void) {
14723
2.40M
    if (xmlParserInitialized != 0)
14724
2.40M
  return;
14725
14726
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14727
    if (xmlFree == free)
14728
        atexit(xmlCleanupParser);
14729
#endif
14730
14731
3.10k
#ifdef LIBXML_THREAD_ENABLED
14732
3.10k
    __xmlGlobalInitMutexLock();
14733
3.10k
    if (xmlParserInitialized == 0) {
14734
3.10k
#endif
14735
3.10k
  xmlInitThreads();
14736
3.10k
  xmlInitGlobals();
14737
3.10k
  xmlInitMemory();
14738
3.10k
        xmlInitializeDict();
14739
3.10k
  xmlInitCharEncodingHandlers();
14740
3.10k
  xmlDefaultSAXHandlerInit();
14741
3.10k
  xmlRegisterDefaultInputCallbacks();
14742
3.10k
#ifdef LIBXML_OUTPUT_ENABLED
14743
3.10k
  xmlRegisterDefaultOutputCallbacks();
14744
3.10k
#endif /* LIBXML_OUTPUT_ENABLED */
14745
3.10k
#ifdef LIBXML_HTML_ENABLED
14746
3.10k
  htmlInitAutoClose();
14747
3.10k
  htmlDefaultSAXHandlerInit();
14748
3.10k
#endif
14749
3.10k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14750
3.10k
  xmlXPathInit();
14751
3.10k
#endif
14752
3.10k
  xmlParserInitialized = 1;
14753
3.10k
#ifdef LIBXML_THREAD_ENABLED
14754
3.10k
    }
14755
3.10k
    __xmlGlobalInitMutexUnlock();
14756
3.10k
#endif
14757
3.10k
}
14758
14759
/**
14760
 * xmlCleanupParser:
14761
 *
14762
 * This function name is somewhat misleading. It does not clean up
14763
 * parser state, it cleans up memory allocated by the library itself.
14764
 * It is a cleanup function for the XML library. It tries to reclaim all
14765
 * related global memory allocated for the library processing.
14766
 * It doesn't deallocate any document related memory. One should
14767
 * call xmlCleanupParser() only when the process has finished using
14768
 * the library and all XML/HTML documents built with it.
14769
 * See also xmlInitParser() which has the opposite function of preparing
14770
 * the library for operations.
14771
 *
14772
 * WARNING: if your application is multithreaded or has plugin support
14773
 *          calling this may crash the application if another thread or
14774
 *          a plugin is still using libxml2. It's sometimes very hard to
14775
 *          guess if libxml2 is in use in the application, some libraries
14776
 *          or plugins may use it without notice. In case of doubt abstain
14777
 *          from calling this function or do it just before calling exit()
14778
 *          to avoid leak reports from valgrind !
14779
 */
14780
14781
void
14782
0
xmlCleanupParser(void) {
14783
0
    if (!xmlParserInitialized)
14784
0
  return;
14785
14786
0
    xmlCleanupCharEncodingHandlers();
14787
0
#ifdef LIBXML_CATALOG_ENABLED
14788
0
    xmlCatalogCleanup();
14789
0
#endif
14790
0
    xmlDictCleanup();
14791
0
    xmlCleanupInputCallbacks();
14792
0
#ifdef LIBXML_OUTPUT_ENABLED
14793
0
    xmlCleanupOutputCallbacks();
14794
0
#endif
14795
0
#ifdef LIBXML_SCHEMAS_ENABLED
14796
0
    xmlSchemaCleanupTypes();
14797
0
    xmlRelaxNGCleanupTypes();
14798
0
#endif
14799
0
    xmlCleanupGlobals();
14800
0
    xmlCleanupThreads(); /* must be last if called not from the main thread */
14801
0
    xmlCleanupMemory();
14802
0
    xmlParserInitialized = 0;
14803
0
}
14804
14805
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14806
    !defined(_WIN32)
14807
static void
14808
ATTRIBUTE_DESTRUCTOR
14809
xmlDestructor(void) {
14810
    /*
14811
     * Calling custom deallocation functions in a destructor can cause
14812
     * problems, for example with Nokogiri.
14813
     */
14814
    if (xmlFree == free)
14815
        xmlCleanupParser();
14816
}
14817
#endif
14818
14819
/************************************************************************
14820
 *                  *
14821
 *  New set (2.6.0) of simpler and more flexible APIs   *
14822
 *                  *
14823
 ************************************************************************/
14824
14825
/**
14826
 * DICT_FREE:
14827
 * @str:  a string
14828
 *
14829
 * Free a string if it is not owned by the "dict" dictionary in the
14830
 * current scope
14831
 */
14832
#define DICT_FREE(str)            \
14833
0
  if ((str) && ((!dict) ||       \
14834
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14835
0
      xmlFree((char *)(str));
14836
14837
/**
14838
 * xmlCtxtReset:
14839
 * @ctxt: an XML parser context
14840
 *
14841
 * Reset a parser context
14842
 */
14843
void
14844
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14845
0
{
14846
0
    xmlParserInputPtr input;
14847
0
    xmlDictPtr dict;
14848
14849
0
    if (ctxt == NULL)
14850
0
        return;
14851
14852
0
    dict = ctxt->dict;
14853
14854
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14855
0
        xmlFreeInputStream(input);
14856
0
    }
14857
0
    ctxt->inputNr = 0;
14858
0
    ctxt->input = NULL;
14859
14860
0
    ctxt->spaceNr = 0;
14861
0
    if (ctxt->spaceTab != NULL) {
14862
0
  ctxt->spaceTab[0] = -1;
14863
0
  ctxt->space = &ctxt->spaceTab[0];
14864
0
    } else {
14865
0
        ctxt->space = NULL;
14866
0
    }
14867
14868
14869
0
    ctxt->nodeNr = 0;
14870
0
    ctxt->node = NULL;
14871
14872
0
    ctxt->nameNr = 0;
14873
0
    ctxt->name = NULL;
14874
14875
0
    ctxt->nsNr = 0;
14876
14877
0
    DICT_FREE(ctxt->version);
14878
0
    ctxt->version = NULL;
14879
0
    DICT_FREE(ctxt->encoding);
14880
0
    ctxt->encoding = NULL;
14881
0
    DICT_FREE(ctxt->directory);
14882
0
    ctxt->directory = NULL;
14883
0
    DICT_FREE(ctxt->extSubURI);
14884
0
    ctxt->extSubURI = NULL;
14885
0
    DICT_FREE(ctxt->extSubSystem);
14886
0
    ctxt->extSubSystem = NULL;
14887
0
    if (ctxt->myDoc != NULL)
14888
0
        xmlFreeDoc(ctxt->myDoc);
14889
0
    ctxt->myDoc = NULL;
14890
14891
0
    ctxt->standalone = -1;
14892
0
    ctxt->hasExternalSubset = 0;
14893
0
    ctxt->hasPErefs = 0;
14894
0
    ctxt->html = 0;
14895
0
    ctxt->external = 0;
14896
0
    ctxt->instate = XML_PARSER_START;
14897
0
    ctxt->token = 0;
14898
14899
0
    ctxt->wellFormed = 1;
14900
0
    ctxt->nsWellFormed = 1;
14901
0
    ctxt->disableSAX = 0;
14902
0
    ctxt->valid = 1;
14903
#if 0
14904
    ctxt->vctxt.userData = ctxt;
14905
    ctxt->vctxt.error = xmlParserValidityError;
14906
    ctxt->vctxt.warning = xmlParserValidityWarning;
14907
#endif
14908
0
    ctxt->record_info = 0;
14909
0
    ctxt->checkIndex = 0;
14910
0
    ctxt->inSubset = 0;
14911
0
    ctxt->errNo = XML_ERR_OK;
14912
0
    ctxt->depth = 0;
14913
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14914
0
    ctxt->catalogs = NULL;
14915
0
    ctxt->nbentities = 0;
14916
0
    ctxt->sizeentities = 0;
14917
0
    ctxt->sizeentcopy = 0;
14918
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14919
14920
0
    if (ctxt->attsDefault != NULL) {
14921
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14922
0
        ctxt->attsDefault = NULL;
14923
0
    }
14924
0
    if (ctxt->attsSpecial != NULL) {
14925
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14926
0
        ctxt->attsSpecial = NULL;
14927
0
    }
14928
14929
0
#ifdef LIBXML_CATALOG_ENABLED
14930
0
    if (ctxt->catalogs != NULL)
14931
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14932
0
#endif
14933
0
    if (ctxt->lastError.code != XML_ERR_OK)
14934
0
        xmlResetError(&ctxt->lastError);
14935
0
}
14936
14937
/**
14938
 * xmlCtxtResetPush:
14939
 * @ctxt: an XML parser context
14940
 * @chunk:  a pointer to an array of chars
14941
 * @size:  number of chars in the array
14942
 * @filename:  an optional file name or URI
14943
 * @encoding:  the document encoding, or NULL
14944
 *
14945
 * Reset a push parser context
14946
 *
14947
 * Returns 0 in case of success and 1 in case of error
14948
 */
14949
int
14950
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14951
                 int size, const char *filename, const char *encoding)
14952
0
{
14953
0
    xmlParserInputPtr inputStream;
14954
0
    xmlParserInputBufferPtr buf;
14955
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14956
14957
0
    if (ctxt == NULL)
14958
0
        return(1);
14959
14960
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14961
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14962
14963
0
    buf = xmlAllocParserInputBuffer(enc);
14964
0
    if (buf == NULL)
14965
0
        return(1);
14966
14967
0
    if (ctxt == NULL) {
14968
0
        xmlFreeParserInputBuffer(buf);
14969
0
        return(1);
14970
0
    }
14971
14972
0
    xmlCtxtReset(ctxt);
14973
14974
0
    if (filename == NULL) {
14975
0
        ctxt->directory = NULL;
14976
0
    } else {
14977
0
        ctxt->directory = xmlParserGetDirectory(filename);
14978
0
    }
14979
14980
0
    inputStream = xmlNewInputStream(ctxt);
14981
0
    if (inputStream == NULL) {
14982
0
        xmlFreeParserInputBuffer(buf);
14983
0
        return(1);
14984
0
    }
14985
14986
0
    if (filename == NULL)
14987
0
        inputStream->filename = NULL;
14988
0
    else
14989
0
        inputStream->filename = (char *)
14990
0
            xmlCanonicPath((const xmlChar *) filename);
14991
0
    inputStream->buf = buf;
14992
0
    xmlBufResetInput(buf->buffer, inputStream);
14993
14994
0
    inputPush(ctxt, inputStream);
14995
14996
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14997
0
        (ctxt->input->buf != NULL)) {
14998
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14999
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
15000
15001
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15002
15003
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15004
#ifdef DEBUG_PUSH
15005
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15006
#endif
15007
0
    }
15008
15009
0
    if (encoding != NULL) {
15010
0
        xmlCharEncodingHandlerPtr hdlr;
15011
15012
0
        if (ctxt->encoding != NULL)
15013
0
      xmlFree((xmlChar *) ctxt->encoding);
15014
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15015
15016
0
        hdlr = xmlFindCharEncodingHandler(encoding);
15017
0
        if (hdlr != NULL) {
15018
0
            xmlSwitchToEncoding(ctxt, hdlr);
15019
0
  } else {
15020
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15021
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
15022
0
        }
15023
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
15024
0
        xmlSwitchEncoding(ctxt, enc);
15025
0
    }
15026
15027
0
    return(0);
15028
0
}
15029
15030
15031
/**
15032
 * xmlCtxtUseOptionsInternal:
15033
 * @ctxt: an XML parser context
15034
 * @options:  a combination of xmlParserOption
15035
 * @encoding:  the user provided encoding to use
15036
 *
15037
 * Applies the options to the parser context
15038
 *
15039
 * Returns 0 in case of success, the set of unknown or unimplemented options
15040
 *         in case of error.
15041
 */
15042
static int
15043
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15044
787k
{
15045
787k
    if (ctxt == NULL)
15046
0
        return(-1);
15047
787k
    if (encoding != NULL) {
15048
0
        if (ctxt->encoding != NULL)
15049
0
      xmlFree((xmlChar *) ctxt->encoding);
15050
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15051
0
    }
15052
787k
    if (options & XML_PARSE_RECOVER) {
15053
407k
        ctxt->recovery = 1;
15054
407k
        options -= XML_PARSE_RECOVER;
15055
407k
  ctxt->options |= XML_PARSE_RECOVER;
15056
407k
    } else
15057
379k
        ctxt->recovery = 0;
15058
787k
    if (options & XML_PARSE_DTDLOAD) {
15059
594k
        ctxt->loadsubset = XML_DETECT_IDS;
15060
594k
        options -= XML_PARSE_DTDLOAD;
15061
594k
  ctxt->options |= XML_PARSE_DTDLOAD;
15062
594k
    } else
15063
192k
        ctxt->loadsubset = 0;
15064
787k
    if (options & XML_PARSE_DTDATTR) {
15065
251k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15066
251k
        options -= XML_PARSE_DTDATTR;
15067
251k
  ctxt->options |= XML_PARSE_DTDATTR;
15068
251k
    }
15069
787k
    if (options & XML_PARSE_NOENT) {
15070
425k
        ctxt->replaceEntities = 1;
15071
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
15072
425k
        options -= XML_PARSE_NOENT;
15073
425k
  ctxt->options |= XML_PARSE_NOENT;
15074
425k
    } else
15075
361k
        ctxt->replaceEntities = 0;
15076
787k
    if (options & XML_PARSE_PEDANTIC) {
15077
94.2k
        ctxt->pedantic = 1;
15078
94.2k
        options -= XML_PARSE_PEDANTIC;
15079
94.2k
  ctxt->options |= XML_PARSE_PEDANTIC;
15080
94.2k
    } else
15081
692k
        ctxt->pedantic = 0;
15082
787k
    if (options & XML_PARSE_NOBLANKS) {
15083
262k
        ctxt->keepBlanks = 0;
15084
262k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15085
262k
        options -= XML_PARSE_NOBLANKS;
15086
262k
  ctxt->options |= XML_PARSE_NOBLANKS;
15087
262k
    } else
15088
524k
        ctxt->keepBlanks = 1;
15089
787k
    if (options & XML_PARSE_DTDVALID) {
15090
216k
        ctxt->validate = 1;
15091
216k
        if (options & XML_PARSE_NOWARNING)
15092
112k
            ctxt->vctxt.warning = NULL;
15093
216k
        if (options & XML_PARSE_NOERROR)
15094
128k
            ctxt->vctxt.error = NULL;
15095
216k
        options -= XML_PARSE_DTDVALID;
15096
216k
  ctxt->options |= XML_PARSE_DTDVALID;
15097
216k
    } else
15098
570k
        ctxt->validate = 0;
15099
787k
    if (options & XML_PARSE_NOWARNING) {
15100
270k
        ctxt->sax->warning = NULL;
15101
270k
        options -= XML_PARSE_NOWARNING;
15102
270k
    }
15103
787k
    if (options & XML_PARSE_NOERROR) {
15104
308k
        ctxt->sax->error = NULL;
15105
308k
        ctxt->sax->fatalError = NULL;
15106
308k
        options -= XML_PARSE_NOERROR;
15107
308k
    }
15108
787k
#ifdef LIBXML_SAX1_ENABLED
15109
787k
    if (options & XML_PARSE_SAX1) {
15110
279k
        ctxt->sax->startElement = xmlSAX2StartElement;
15111
279k
        ctxt->sax->endElement = xmlSAX2EndElement;
15112
279k
        ctxt->sax->startElementNs = NULL;
15113
279k
        ctxt->sax->endElementNs = NULL;
15114
279k
        ctxt->sax->initialized = 1;
15115
279k
        options -= XML_PARSE_SAX1;
15116
279k
  ctxt->options |= XML_PARSE_SAX1;
15117
279k
    }
15118
787k
#endif /* LIBXML_SAX1_ENABLED */
15119
787k
    if (options & XML_PARSE_NODICT) {
15120
267k
        ctxt->dictNames = 0;
15121
267k
        options -= XML_PARSE_NODICT;
15122
267k
  ctxt->options |= XML_PARSE_NODICT;
15123
519k
    } else {
15124
519k
        ctxt->dictNames = 1;
15125
519k
    }
15126
787k
    if (options & XML_PARSE_NOCDATA) {
15127
363k
        ctxt->sax->cdataBlock = NULL;
15128
363k
        options -= XML_PARSE_NOCDATA;
15129
363k
  ctxt->options |= XML_PARSE_NOCDATA;
15130
363k
    }
15131
787k
    if (options & XML_PARSE_NSCLEAN) {
15132
379k
  ctxt->options |= XML_PARSE_NSCLEAN;
15133
379k
        options -= XML_PARSE_NSCLEAN;
15134
379k
    }
15135
787k
    if (options & XML_PARSE_NONET) {
15136
305k
  ctxt->options |= XML_PARSE_NONET;
15137
305k
        options -= XML_PARSE_NONET;
15138
305k
    }
15139
787k
    if (options & XML_PARSE_COMPACT) {
15140
418k
  ctxt->options |= XML_PARSE_COMPACT;
15141
418k
        options -= XML_PARSE_COMPACT;
15142
418k
    }
15143
787k
    if (options & XML_PARSE_OLD10) {
15144
282k
  ctxt->options |= XML_PARSE_OLD10;
15145
282k
        options -= XML_PARSE_OLD10;
15146
282k
    }
15147
787k
    if (options & XML_PARSE_NOBASEFIX) {
15148
283k
  ctxt->options |= XML_PARSE_NOBASEFIX;
15149
283k
        options -= XML_PARSE_NOBASEFIX;
15150
283k
    }
15151
787k
    if (options & XML_PARSE_HUGE) {
15152
267k
  ctxt->options |= XML_PARSE_HUGE;
15153
267k
        options -= XML_PARSE_HUGE;
15154
267k
        if (ctxt->dict != NULL)
15155
267k
            xmlDictSetLimit(ctxt->dict, 0);
15156
267k
    }
15157
787k
    if (options & XML_PARSE_OLDSAX) {
15158
242k
  ctxt->options |= XML_PARSE_OLDSAX;
15159
242k
        options -= XML_PARSE_OLDSAX;
15160
242k
    }
15161
787k
    if (options & XML_PARSE_IGNORE_ENC) {
15162
356k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
15163
356k
        options -= XML_PARSE_IGNORE_ENC;
15164
356k
    }
15165
787k
    if (options & XML_PARSE_BIG_LINES) {
15166
262k
  ctxt->options |= XML_PARSE_BIG_LINES;
15167
262k
        options -= XML_PARSE_BIG_LINES;
15168
262k
    }
15169
787k
    ctxt->linenumbers = 1;
15170
787k
    return (options);
15171
787k
}
15172
15173
/**
15174
 * xmlCtxtUseOptions:
15175
 * @ctxt: an XML parser context
15176
 * @options:  a combination of xmlParserOption
15177
 *
15178
 * Applies the options to the parser context
15179
 *
15180
 * Returns 0 in case of success, the set of unknown or unimplemented options
15181
 *         in case of error.
15182
 */
15183
int
15184
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15185
567k
{
15186
567k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15187
567k
}
15188
15189
/**
15190
 * xmlDoRead:
15191
 * @ctxt:  an XML parser context
15192
 * @URL:  the base URL to use for the document
15193
 * @encoding:  the document encoding, or NULL
15194
 * @options:  a combination of xmlParserOption
15195
 * @reuse:  keep the context for reuse
15196
 *
15197
 * Common front-end for the xmlRead functions
15198
 *
15199
 * Returns the resulting document tree or NULL
15200
 */
15201
static xmlDocPtr
15202
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15203
          int options, int reuse)
15204
219k
{
15205
219k
    xmlDocPtr ret;
15206
15207
219k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15208
219k
    if (encoding != NULL) {
15209
0
        xmlCharEncodingHandlerPtr hdlr;
15210
15211
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15212
0
  if (hdlr != NULL)
15213
0
      xmlSwitchToEncoding(ctxt, hdlr);
15214
0
    }
15215
219k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15216
219k
        (ctxt->input->filename == NULL))
15217
219k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15218
219k
    xmlParseDocument(ctxt);
15219
219k
    if ((ctxt->wellFormed) || ctxt->recovery)
15220
125k
        ret = ctxt->myDoc;
15221
93.9k
    else {
15222
93.9k
        ret = NULL;
15223
93.9k
  if (ctxt->myDoc != NULL) {
15224
83.2k
      xmlFreeDoc(ctxt->myDoc);
15225
83.2k
  }
15226
93.9k
    }
15227
219k
    ctxt->myDoc = NULL;
15228
219k
    if (!reuse) {
15229
219k
  xmlFreeParserCtxt(ctxt);
15230
219k
    }
15231
15232
219k
    return (ret);
15233
219k
}
15234
15235
/**
15236
 * xmlReadDoc:
15237
 * @cur:  a pointer to a zero terminated string
15238
 * @URL:  the base URL to use for the document
15239
 * @encoding:  the document encoding, or NULL
15240
 * @options:  a combination of xmlParserOption
15241
 *
15242
 * parse an XML in-memory document and build a tree.
15243
 *
15244
 * Returns the resulting document tree
15245
 */
15246
xmlDocPtr
15247
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15248
0
{
15249
0
    xmlParserCtxtPtr ctxt;
15250
15251
0
    if (cur == NULL)
15252
0
        return (NULL);
15253
0
    xmlInitParser();
15254
15255
0
    ctxt = xmlCreateDocParserCtxt(cur);
15256
0
    if (ctxt == NULL)
15257
0
        return (NULL);
15258
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15259
0
}
15260
15261
/**
15262
 * xmlReadFile:
15263
 * @filename:  a file or URL
15264
 * @encoding:  the document encoding, or NULL
15265
 * @options:  a combination of xmlParserOption
15266
 *
15267
 * parse an XML file from the filesystem or the network.
15268
 *
15269
 * Returns the resulting document tree
15270
 */
15271
xmlDocPtr
15272
xmlReadFile(const char *filename, const char *encoding, int options)
15273
0
{
15274
0
    xmlParserCtxtPtr ctxt;
15275
15276
0
    xmlInitParser();
15277
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15278
0
    if (ctxt == NULL)
15279
0
        return (NULL);
15280
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15281
0
}
15282
15283
/**
15284
 * xmlReadMemory:
15285
 * @buffer:  a pointer to a char array
15286
 * @size:  the size of the array
15287
 * @URL:  the base URL to use for the document
15288
 * @encoding:  the document encoding, or NULL
15289
 * @options:  a combination of xmlParserOption
15290
 *
15291
 * parse an XML in-memory document and build a tree.
15292
 *
15293
 * Returns the resulting document tree
15294
 */
15295
xmlDocPtr
15296
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15297
222k
{
15298
222k
    xmlParserCtxtPtr ctxt;
15299
15300
222k
    xmlInitParser();
15301
222k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15302
222k
    if (ctxt == NULL)
15303
2.96k
        return (NULL);
15304
219k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15305
222k
}
15306
15307
/**
15308
 * xmlReadFd:
15309
 * @fd:  an open file descriptor
15310
 * @URL:  the base URL to use for the document
15311
 * @encoding:  the document encoding, or NULL
15312
 * @options:  a combination of xmlParserOption
15313
 *
15314
 * parse an XML from a file descriptor and build a tree.
15315
 * NOTE that the file descriptor will not be closed when the
15316
 *      reader is closed or reset.
15317
 *
15318
 * Returns the resulting document tree
15319
 */
15320
xmlDocPtr
15321
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15322
0
{
15323
0
    xmlParserCtxtPtr ctxt;
15324
0
    xmlParserInputBufferPtr input;
15325
0
    xmlParserInputPtr stream;
15326
15327
0
    if (fd < 0)
15328
0
        return (NULL);
15329
0
    xmlInitParser();
15330
15331
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15332
0
    if (input == NULL)
15333
0
        return (NULL);
15334
0
    input->closecallback = NULL;
15335
0
    ctxt = xmlNewParserCtxt();
15336
0
    if (ctxt == NULL) {
15337
0
        xmlFreeParserInputBuffer(input);
15338
0
        return (NULL);
15339
0
    }
15340
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15341
0
    if (stream == NULL) {
15342
0
        xmlFreeParserInputBuffer(input);
15343
0
  xmlFreeParserCtxt(ctxt);
15344
0
        return (NULL);
15345
0
    }
15346
0
    inputPush(ctxt, stream);
15347
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15348
0
}
15349
15350
/**
15351
 * xmlReadIO:
15352
 * @ioread:  an I/O read function
15353
 * @ioclose:  an I/O close function
15354
 * @ioctx:  an I/O handler
15355
 * @URL:  the base URL to use for the document
15356
 * @encoding:  the document encoding, or NULL
15357
 * @options:  a combination of xmlParserOption
15358
 *
15359
 * parse an XML document from I/O functions and source and build a tree.
15360
 *
15361
 * Returns the resulting document tree
15362
 */
15363
xmlDocPtr
15364
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15365
          void *ioctx, const char *URL, const char *encoding, int options)
15366
0
{
15367
0
    xmlParserCtxtPtr ctxt;
15368
0
    xmlParserInputBufferPtr input;
15369
0
    xmlParserInputPtr stream;
15370
15371
0
    if (ioread == NULL)
15372
0
        return (NULL);
15373
0
    xmlInitParser();
15374
15375
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15376
0
                                         XML_CHAR_ENCODING_NONE);
15377
0
    if (input == NULL) {
15378
0
        if (ioclose != NULL)
15379
0
            ioclose(ioctx);
15380
0
        return (NULL);
15381
0
    }
15382
0
    ctxt = xmlNewParserCtxt();
15383
0
    if (ctxt == NULL) {
15384
0
        xmlFreeParserInputBuffer(input);
15385
0
        return (NULL);
15386
0
    }
15387
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15388
0
    if (stream == NULL) {
15389
0
        xmlFreeParserInputBuffer(input);
15390
0
  xmlFreeParserCtxt(ctxt);
15391
0
        return (NULL);
15392
0
    }
15393
0
    inputPush(ctxt, stream);
15394
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15395
0
}
15396
15397
/**
15398
 * xmlCtxtReadDoc:
15399
 * @ctxt:  an XML parser context
15400
 * @cur:  a pointer to a zero terminated string
15401
 * @URL:  the base URL to use for the document
15402
 * @encoding:  the document encoding, or NULL
15403
 * @options:  a combination of xmlParserOption
15404
 *
15405
 * parse an XML in-memory document and build a tree.
15406
 * This reuses the existing @ctxt parser context
15407
 *
15408
 * Returns the resulting document tree
15409
 */
15410
xmlDocPtr
15411
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15412
               const char *URL, const char *encoding, int options)
15413
0
{
15414
0
    if (cur == NULL)
15415
0
        return (NULL);
15416
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15417
0
                              encoding, options));
15418
0
}
15419
15420
/**
15421
 * xmlCtxtReadFile:
15422
 * @ctxt:  an XML parser context
15423
 * @filename:  a file or URL
15424
 * @encoding:  the document encoding, or NULL
15425
 * @options:  a combination of xmlParserOption
15426
 *
15427
 * parse an XML file from the filesystem or the network.
15428
 * This reuses the existing @ctxt parser context
15429
 *
15430
 * Returns the resulting document tree
15431
 */
15432
xmlDocPtr
15433
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15434
                const char *encoding, int options)
15435
0
{
15436
0
    xmlParserInputPtr stream;
15437
15438
0
    if (filename == NULL)
15439
0
        return (NULL);
15440
0
    if (ctxt == NULL)
15441
0
        return (NULL);
15442
0
    xmlInitParser();
15443
15444
0
    xmlCtxtReset(ctxt);
15445
15446
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15447
0
    if (stream == NULL) {
15448
0
        return (NULL);
15449
0
    }
15450
0
    inputPush(ctxt, stream);
15451
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15452
0
}
15453
15454
/**
15455
 * xmlCtxtReadMemory:
15456
 * @ctxt:  an XML parser context
15457
 * @buffer:  a pointer to a char array
15458
 * @size:  the size of the array
15459
 * @URL:  the base URL to use for the document
15460
 * @encoding:  the document encoding, or NULL
15461
 * @options:  a combination of xmlParserOption
15462
 *
15463
 * parse an XML in-memory document and build a tree.
15464
 * This reuses the existing @ctxt parser context
15465
 *
15466
 * Returns the resulting document tree
15467
 */
15468
xmlDocPtr
15469
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15470
                  const char *URL, const char *encoding, int options)
15471
0
{
15472
0
    xmlParserInputBufferPtr input;
15473
0
    xmlParserInputPtr stream;
15474
15475
0
    if (ctxt == NULL)
15476
0
        return (NULL);
15477
0
    if (buffer == NULL)
15478
0
        return (NULL);
15479
0
    xmlInitParser();
15480
15481
0
    xmlCtxtReset(ctxt);
15482
15483
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15484
0
    if (input == NULL) {
15485
0
  return(NULL);
15486
0
    }
15487
15488
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15489
0
    if (stream == NULL) {
15490
0
  xmlFreeParserInputBuffer(input);
15491
0
  return(NULL);
15492
0
    }
15493
15494
0
    inputPush(ctxt, stream);
15495
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15496
0
}
15497
15498
/**
15499
 * xmlCtxtReadFd:
15500
 * @ctxt:  an XML parser context
15501
 * @fd:  an open file descriptor
15502
 * @URL:  the base URL to use for the document
15503
 * @encoding:  the document encoding, or NULL
15504
 * @options:  a combination of xmlParserOption
15505
 *
15506
 * parse an XML from a file descriptor and build a tree.
15507
 * This reuses the existing @ctxt parser context
15508
 * NOTE that the file descriptor will not be closed when the
15509
 *      reader is closed or reset.
15510
 *
15511
 * Returns the resulting document tree
15512
 */
15513
xmlDocPtr
15514
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15515
              const char *URL, const char *encoding, int options)
15516
0
{
15517
0
    xmlParserInputBufferPtr input;
15518
0
    xmlParserInputPtr stream;
15519
15520
0
    if (fd < 0)
15521
0
        return (NULL);
15522
0
    if (ctxt == NULL)
15523
0
        return (NULL);
15524
0
    xmlInitParser();
15525
15526
0
    xmlCtxtReset(ctxt);
15527
15528
15529
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15530
0
    if (input == NULL)
15531
0
        return (NULL);
15532
0
    input->closecallback = NULL;
15533
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15534
0
    if (stream == NULL) {
15535
0
        xmlFreeParserInputBuffer(input);
15536
0
        return (NULL);
15537
0
    }
15538
0
    inputPush(ctxt, stream);
15539
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15540
0
}
15541
15542
/**
15543
 * xmlCtxtReadIO:
15544
 * @ctxt:  an XML parser context
15545
 * @ioread:  an I/O read function
15546
 * @ioclose:  an I/O close function
15547
 * @ioctx:  an I/O handler
15548
 * @URL:  the base URL to use for the document
15549
 * @encoding:  the document encoding, or NULL
15550
 * @options:  a combination of xmlParserOption
15551
 *
15552
 * parse an XML document from I/O functions and source and build a tree.
15553
 * This reuses the existing @ctxt parser context
15554
 *
15555
 * Returns the resulting document tree
15556
 */
15557
xmlDocPtr
15558
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15559
              xmlInputCloseCallback ioclose, void *ioctx,
15560
        const char *URL,
15561
              const char *encoding, int options)
15562
0
{
15563
0
    xmlParserInputBufferPtr input;
15564
0
    xmlParserInputPtr stream;
15565
15566
0
    if (ioread == NULL)
15567
0
        return (NULL);
15568
0
    if (ctxt == NULL)
15569
0
        return (NULL);
15570
0
    xmlInitParser();
15571
15572
0
    xmlCtxtReset(ctxt);
15573
15574
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15575
0
                                         XML_CHAR_ENCODING_NONE);
15576
0
    if (input == NULL) {
15577
0
        if (ioclose != NULL)
15578
0
            ioclose(ioctx);
15579
0
        return (NULL);
15580
0
    }
15581
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15582
0
    if (stream == NULL) {
15583
0
        xmlFreeParserInputBuffer(input);
15584
0
        return (NULL);
15585
0
    }
15586
0
    inputPush(ctxt, stream);
15587
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15588
0
}
15589