Coverage Report

Created: 2023-12-13 20:03

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/enc.h"
80
#include "private/error.h"
81
#include "private/html.h"
82
#include "private/io.h"
83
#include "private/parser.h"
84
#include "private/threads.h"
85
86
struct _xmlStartTag {
87
    const xmlChar *prefix;
88
    const xmlChar *URI;
89
    int line;
90
    int nsNr;
91
};
92
93
static void
94
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
95
96
static xmlParserCtxtPtr
97
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
98
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
99
        xmlParserCtxtPtr pctx);
100
101
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
8.87M
#define XML_MAX_HUGE_LENGTH 1000000000
116
117
4.65k
#define XML_PARSER_BIG_ENTITY 1000
118
#define XML_PARSER_LOT_ENTITY 5000
119
120
/*
121
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
122
 *    replacement over the size in byte of the input indicates that you have
123
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
124
 *    replacement per byte of input.
125
 */
126
532k
#define XML_PARSER_NON_LINEAR 10
127
128
/*
129
 * xmlParserEntityCheck
130
 *
131
 * Function to check non-linear entity expansion behaviour
132
 * This is here to detect and stop exponential linear entity expansion
133
 * This is not a limitation of the parser but a safety
134
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
135
 * parser option.
136
 */
137
static int
138
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
139
                     xmlEntityPtr ent, size_t replacement)
140
8.87M
{
141
8.87M
    size_t consumed = 0;
142
8.87M
    int i;
143
144
8.87M
    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
145
8.06M
        return (0);
146
812k
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
147
6.67k
        return (1);
148
149
    /*
150
     * This may look absurd but is needed to detect
151
     * entities problems
152
     */
153
805k
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
154
805k
  (ent->content != NULL) && (ent->checked == 0) &&
155
805k
  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
156
20.6k
  unsigned long oldnbent = ctxt->nbentities, diff;
157
20.6k
  xmlChar *rep;
158
159
20.6k
  ent->checked = 1;
160
161
20.6k
        ++ctxt->depth;
162
20.6k
  rep = xmlStringDecodeEntities(ctxt, ent->content,
163
20.6k
          XML_SUBSTITUTE_REF, 0, 0, 0);
164
20.6k
        --ctxt->depth;
165
20.6k
  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
166
1.79k
      ent->content[0] = 0;
167
1.79k
  }
168
169
20.6k
        diff = ctxt->nbentities - oldnbent + 1;
170
20.6k
        if (diff > INT_MAX / 2)
171
0
            diff = INT_MAX / 2;
172
20.6k
  ent->checked = diff * 2;
173
20.6k
  if (rep != NULL) {
174
19.1k
      if (xmlStrchr(rep, '<'))
175
8.29k
    ent->checked |= 1;
176
19.1k
      xmlFree(rep);
177
19.1k
      rep = NULL;
178
19.1k
  }
179
20.6k
    }
180
181
    /*
182
     * Prevent entity exponential check, not just replacement while
183
     * parsing the DTD
184
     * The check is potentially costly so do that only once in a thousand
185
     */
186
805k
    if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
187
805k
        (ctxt->nbentities % 1024 == 0)) {
188
0
  for (i = 0;i < ctxt->inputNr;i++) {
189
0
      consumed += ctxt->inputTab[i]->consumed +
190
0
                 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
191
0
  }
192
0
  if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
193
0
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
194
0
      ctxt->instate = XML_PARSER_EOF;
195
0
      return (1);
196
0
  }
197
0
  consumed = 0;
198
0
    }
199
200
201
202
805k
    if (replacement != 0) {
203
36.8k
  if (replacement < XML_MAX_TEXT_LENGTH)
204
36.8k
      return(0);
205
206
        /*
207
   * If the volume of entity copy reaches 10 times the
208
   * amount of parsed data and over the large text threshold
209
   * then that's very likely to be an abuse.
210
   */
211
0
        if (ctxt->input != NULL) {
212
0
      consumed = ctxt->input->consumed +
213
0
                 (ctxt->input->cur - ctxt->input->base);
214
0
  }
215
0
        consumed += ctxt->sizeentities;
216
217
0
        if (replacement < XML_PARSER_NON_LINEAR * consumed)
218
0
      return(0);
219
768k
    } else if (size != 0) {
220
        /*
221
         * Do the check based on the replacement size of the entity
222
         */
223
4.65k
        if (size < XML_PARSER_BIG_ENTITY)
224
3.71k
      return(0);
225
226
        /*
227
         * A limit on the amount of text data reasonably used
228
         */
229
937
        if (ctxt->input != NULL) {
230
937
            consumed = ctxt->input->consumed +
231
937
                (ctxt->input->cur - ctxt->input->base);
232
937
        }
233
937
        consumed += ctxt->sizeentities;
234
235
937
        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
236
937
      (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
237
573
            return (0);
238
764k
    } else if (ent != NULL) {
239
        /*
240
         * use the number of parsed entities in the replacement
241
         */
242
530k
        size = ent->checked / 2;
243
244
        /*
245
         * The amount of data parsed counting entities size only once
246
         */
247
530k
        if (ctxt->input != NULL) {
248
530k
            consumed = ctxt->input->consumed +
249
530k
                (ctxt->input->cur - ctxt->input->base);
250
530k
        }
251
530k
        consumed += ctxt->sizeentities;
252
253
        /*
254
         * Check the density of entities for the amount of data
255
   * knowing an entity reference will take at least 3 bytes
256
         */
257
530k
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
258
530k
            return (0);
259
530k
    } else {
260
        /*
261
         * strange we got no data for checking
262
         */
263
233k
  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
264
233k
       (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
265
233k
      (ctxt->nbentities <= 10000))
266
233k
      return (0);
267
233k
    }
268
453
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
269
453
    return (1);
270
805k
}
271
272
/**
273
 * xmlParserMaxDepth:
274
 *
275
 * arbitrary depth limit for the XML documents that we allow to
276
 * process. This is not a limitation of the parser but a safety
277
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
278
 * parser option.
279
 */
280
unsigned int xmlParserMaxDepth = 256;
281
282
283
284
#define SAX2 1
285
176M
#define XML_PARSER_BIG_BUFFER_SIZE 300
286
401M
#define XML_PARSER_BUFFER_SIZE 100
287
708k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
288
289
/**
290
 * XML_PARSER_CHUNK_SIZE
291
 *
292
 * When calling GROW that's the minimal amount of data
293
 * the parser expected to have received. It is not a hard
294
 * limit but an optimization when reading strings like Names
295
 * It is not strictly needed as long as inputs available characters
296
 * are followed by 0, which should be provided by the I/O level
297
 */
298
44.8M
#define XML_PARSER_CHUNK_SIZE 100
299
300
/*
301
 * List of XML prefixed PI allowed by W3C specs
302
 */
303
304
static const char* const xmlW3CPIs[] = {
305
    "xml-stylesheet",
306
    "xml-model",
307
    NULL
308
};
309
310
311
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
312
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
313
                                              const xmlChar **str);
314
315
static xmlParserErrors
316
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
317
                xmlSAXHandlerPtr sax,
318
          void *user_data, int depth, const xmlChar *URL,
319
          const xmlChar *ID, xmlNodePtr *list);
320
321
static int
322
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
323
                          const char *encoding);
324
#ifdef LIBXML_LEGACY_ENABLED
325
static void
326
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
327
                      xmlNodePtr lastNode);
328
#endif /* LIBXML_LEGACY_ENABLED */
329
330
static xmlParserErrors
331
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
332
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
333
334
static int
335
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
336
337
/************************************************************************
338
 *                  *
339
 *    Some factorized error routines        *
340
 *                  *
341
 ************************************************************************/
342
343
/**
344
 * xmlErrAttributeDup:
345
 * @ctxt:  an XML parser context
346
 * @prefix:  the attribute prefix
347
 * @localname:  the attribute localname
348
 *
349
 * Handle a redefinition of attribute error
350
 */
351
static void
352
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
353
                   const xmlChar * localname)
354
32.6k
{
355
32.6k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
356
32.6k
        (ctxt->instate == XML_PARSER_EOF))
357
0
  return;
358
32.6k
    if (ctxt != NULL)
359
32.6k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
360
361
32.6k
    if (prefix == NULL)
362
25.9k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
363
25.9k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
364
25.9k
                        (const char *) localname, NULL, NULL, 0, 0,
365
25.9k
                        "Attribute %s redefined\n", localname);
366
6.74k
    else
367
6.74k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
368
6.74k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
369
6.74k
                        (const char *) prefix, (const char *) localname,
370
6.74k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
371
6.74k
                        localname);
372
32.6k
    if (ctxt != NULL) {
373
32.6k
  ctxt->wellFormed = 0;
374
32.6k
  if (ctxt->recovery == 0)
375
22.8k
      ctxt->disableSAX = 1;
376
32.6k
    }
377
32.6k
}
378
379
/**
380
 * xmlFatalErr:
381
 * @ctxt:  an XML parser context
382
 * @error:  the error number
383
 * @extra:  extra information string
384
 *
385
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
386
 */
387
static void
388
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
389
6.35M
{
390
6.35M
    const char *errmsg;
391
392
6.35M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
393
6.35M
        (ctxt->instate == XML_PARSER_EOF))
394
588k
  return;
395
5.76M
    switch (error) {
396
72.2k
        case XML_ERR_INVALID_HEX_CHARREF:
397
72.2k
            errmsg = "CharRef: invalid hexadecimal value";
398
72.2k
            break;
399
120k
        case XML_ERR_INVALID_DEC_CHARREF:
400
120k
            errmsg = "CharRef: invalid decimal value";
401
120k
            break;
402
0
        case XML_ERR_INVALID_CHARREF:
403
0
            errmsg = "CharRef: invalid value";
404
0
            break;
405
215k
        case XML_ERR_INTERNAL_ERROR:
406
215k
            errmsg = "internal error";
407
215k
            break;
408
0
        case XML_ERR_PEREF_AT_EOF:
409
0
            errmsg = "PEReference at end of document";
410
0
            break;
411
0
        case XML_ERR_PEREF_IN_PROLOG:
412
0
            errmsg = "PEReference in prolog";
413
0
            break;
414
0
        case XML_ERR_PEREF_IN_EPILOG:
415
0
            errmsg = "PEReference in epilog";
416
0
            break;
417
0
        case XML_ERR_PEREF_NO_NAME:
418
0
            errmsg = "PEReference: no name";
419
0
            break;
420
189k
        case XML_ERR_PEREF_SEMICOL_MISSING:
421
189k
            errmsg = "PEReference: expecting ';'";
422
189k
            break;
423
685k
        case XML_ERR_ENTITY_LOOP:
424
685k
            errmsg = "Detected an entity reference loop";
425
685k
            break;
426
0
        case XML_ERR_ENTITY_NOT_STARTED:
427
0
            errmsg = "EntityValue: \" or ' expected";
428
0
            break;
429
522
        case XML_ERR_ENTITY_PE_INTERNAL:
430
522
            errmsg = "PEReferences forbidden in internal subset";
431
522
            break;
432
2.29k
        case XML_ERR_ENTITY_NOT_FINISHED:
433
2.29k
            errmsg = "EntityValue: \" or ' expected";
434
2.29k
            break;
435
140k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
436
140k
            errmsg = "AttValue: \" or ' expected";
437
140k
            break;
438
219k
        case XML_ERR_LT_IN_ATTRIBUTE:
439
219k
            errmsg = "Unescaped '<' not allowed in attributes values";
440
219k
            break;
441
8.22k
        case XML_ERR_LITERAL_NOT_STARTED:
442
8.22k
            errmsg = "SystemLiteral \" or ' expected";
443
8.22k
            break;
444
8.16k
        case XML_ERR_LITERAL_NOT_FINISHED:
445
8.16k
            errmsg = "Unfinished System or Public ID \" or ' expected";
446
8.16k
            break;
447
83.3k
        case XML_ERR_MISPLACED_CDATA_END:
448
83.3k
            errmsg = "Sequence ']]>' not allowed in content";
449
83.3k
            break;
450
7.41k
        case XML_ERR_URI_REQUIRED:
451
7.41k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
452
7.41k
            break;
453
835
        case XML_ERR_PUBID_REQUIRED:
454
835
            errmsg = "PUBLIC, the Public Identifier is missing";
455
835
            break;
456
52.4k
        case XML_ERR_HYPHEN_IN_COMMENT:
457
52.4k
            errmsg = "Comment must not contain '--' (double-hyphen)";
458
52.4k
            break;
459
39.5k
        case XML_ERR_PI_NOT_STARTED:
460
39.5k
            errmsg = "xmlParsePI : no target name";
461
39.5k
            break;
462
7.50k
        case XML_ERR_RESERVED_XML_NAME:
463
7.50k
            errmsg = "Invalid PI name";
464
7.50k
            break;
465
1.05k
        case XML_ERR_NOTATION_NOT_STARTED:
466
1.05k
            errmsg = "NOTATION: Name expected here";
467
1.05k
            break;
468
4.00k
        case XML_ERR_NOTATION_NOT_FINISHED:
469
4.00k
            errmsg = "'>' required to close NOTATION declaration";
470
4.00k
            break;
471
21.4k
        case XML_ERR_VALUE_REQUIRED:
472
21.4k
            errmsg = "Entity value required";
473
21.4k
            break;
474
591
        case XML_ERR_URI_FRAGMENT:
475
591
            errmsg = "Fragment not allowed";
476
591
            break;
477
35.9k
        case XML_ERR_ATTLIST_NOT_STARTED:
478
35.9k
            errmsg = "'(' required to start ATTLIST enumeration";
479
35.9k
            break;
480
3.79k
        case XML_ERR_NMTOKEN_REQUIRED:
481
3.79k
            errmsg = "NmToken expected in ATTLIST enumeration";
482
3.79k
            break;
483
8.80k
        case XML_ERR_ATTLIST_NOT_FINISHED:
484
8.80k
            errmsg = "')' required to finish ATTLIST enumeration";
485
8.80k
            break;
486
3.04k
        case XML_ERR_MIXED_NOT_STARTED:
487
3.04k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
488
3.04k
            break;
489
0
        case XML_ERR_PCDATA_REQUIRED:
490
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
491
0
            break;
492
20.7k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
493
20.7k
            errmsg = "ContentDecl : Name or '(' expected";
494
20.7k
            break;
495
23.6k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
496
23.6k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
497
23.6k
            break;
498
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
499
0
            errmsg =
500
0
                "PEReference: forbidden within markup decl in internal subset";
501
0
            break;
502
1.09M
        case XML_ERR_GT_REQUIRED:
503
1.09M
            errmsg = "expected '>'";
504
1.09M
            break;
505
169
        case XML_ERR_CONDSEC_INVALID:
506
169
            errmsg = "XML conditional section '[' expected";
507
169
            break;
508
12.8k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
509
12.8k
            errmsg = "Content error in the external subset";
510
12.8k
            break;
511
1.06k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
512
1.06k
            errmsg =
513
1.06k
                "conditional section INCLUDE or IGNORE keyword expected";
514
1.06k
            break;
515
1.53k
        case XML_ERR_CONDSEC_NOT_FINISHED:
516
1.53k
            errmsg = "XML conditional section not closed";
517
1.53k
            break;
518
128
        case XML_ERR_XMLDECL_NOT_STARTED:
519
128
            errmsg = "Text declaration '<?xml' required";
520
128
            break;
521
72.1k
        case XML_ERR_XMLDECL_NOT_FINISHED:
522
72.1k
            errmsg = "parsing XML declaration: '?>' expected";
523
72.1k
            break;
524
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
525
0
            errmsg = "external parsed entities cannot be standalone";
526
0
            break;
527
1.02M
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
528
1.02M
            errmsg = "EntityRef: expecting ';'";
529
1.02M
            break;
530
79.2k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
531
79.2k
            errmsg = "DOCTYPE improperly terminated";
532
79.2k
            break;
533
0
        case XML_ERR_LTSLASH_REQUIRED:
534
0
            errmsg = "EndTag: '</' not found";
535
0
            break;
536
5.19k
        case XML_ERR_EQUAL_REQUIRED:
537
5.19k
            errmsg = "expected '='";
538
5.19k
            break;
539
22.0k
        case XML_ERR_STRING_NOT_CLOSED:
540
22.0k
            errmsg = "String not closed expecting \" or '";
541
22.0k
            break;
542
5.20k
        case XML_ERR_STRING_NOT_STARTED:
543
5.20k
            errmsg = "String not started expecting ' or \"";
544
5.20k
            break;
545
1.03k
        case XML_ERR_ENCODING_NAME:
546
1.03k
            errmsg = "Invalid XML encoding name";
547
1.03k
            break;
548
2.11k
        case XML_ERR_STANDALONE_VALUE:
549
2.11k
            errmsg = "standalone accepts only 'yes' or 'no'";
550
2.11k
            break;
551
30.5k
        case XML_ERR_DOCUMENT_EMPTY:
552
30.5k
            errmsg = "Document is empty";
553
30.5k
            break;
554
143k
        case XML_ERR_DOCUMENT_END:
555
143k
            errmsg = "Extra content at the end of the document";
556
143k
            break;
557
1.22M
        case XML_ERR_NOT_WELL_BALANCED:
558
1.22M
            errmsg = "chunk is not well balanced";
559
1.22M
            break;
560
0
        case XML_ERR_EXTRA_CONTENT:
561
0
            errmsg = "extra content at the end of well balanced chunk";
562
0
            break;
563
35.3k
        case XML_ERR_VERSION_MISSING:
564
35.3k
            errmsg = "Malformed declaration expecting version";
565
35.3k
            break;
566
22
        case XML_ERR_NAME_TOO_LONG:
567
22
            errmsg = "Name too long";
568
22
            break;
569
#if 0
570
        case:
571
            errmsg = "";
572
            break;
573
#endif
574
23.3k
        default:
575
23.3k
            errmsg = "Unregistered error message";
576
5.76M
    }
577
5.76M
    if (ctxt != NULL)
578
5.76M
  ctxt->errNo = error;
579
5.76M
    if (info == NULL) {
580
5.54M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581
5.54M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
582
5.54M
                        errmsg);
583
5.54M
    } else {
584
215k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
585
215k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
586
215k
                        errmsg, info);
587
215k
    }
588
5.76M
    if (ctxt != NULL) {
589
5.76M
  ctxt->wellFormed = 0;
590
5.76M
  if (ctxt->recovery == 0)
591
4.57M
      ctxt->disableSAX = 1;
592
5.76M
    }
593
5.76M
}
594
595
/**
596
 * xmlFatalErrMsg:
597
 * @ctxt:  an XML parser context
598
 * @error:  the error number
599
 * @msg:  the error message
600
 *
601
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
602
 */
603
static void LIBXML_ATTR_FORMAT(3,0)
604
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
605
               const char *msg)
606
11.1M
{
607
11.1M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
608
11.1M
        (ctxt->instate == XML_PARSER_EOF))
609
0
  return;
610
11.1M
    if (ctxt != NULL)
611
11.1M
  ctxt->errNo = error;
612
11.1M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
613
11.1M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
614
11.1M
    if (ctxt != NULL) {
615
11.1M
  ctxt->wellFormed = 0;
616
11.1M
  if (ctxt->recovery == 0)
617
8.59M
      ctxt->disableSAX = 1;
618
11.1M
    }
619
11.1M
}
620
621
/**
622
 * xmlWarningMsg:
623
 * @ctxt:  an XML parser context
624
 * @error:  the error number
625
 * @msg:  the error message
626
 * @str1:  extra data
627
 * @str2:  extra data
628
 *
629
 * Handle a warning.
630
 */
631
static void LIBXML_ATTR_FORMAT(3,0)
632
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633
              const char *msg, const xmlChar *str1, const xmlChar *str2)
634
330k
{
635
330k
    xmlStructuredErrorFunc schannel = NULL;
636
637
330k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638
330k
        (ctxt->instate == XML_PARSER_EOF))
639
0
  return;
640
330k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
641
330k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
642
178k
        schannel = ctxt->sax->serror;
643
330k
    if (ctxt != NULL) {
644
330k
        __xmlRaiseError(schannel,
645
330k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
646
330k
                    ctxt->userData,
647
330k
                    ctxt, NULL, XML_FROM_PARSER, error,
648
330k
                    XML_ERR_WARNING, NULL, 0,
649
330k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
650
330k
        msg, (const char *) str1, (const char *) str2);
651
330k
    } else {
652
0
        __xmlRaiseError(schannel, NULL, NULL,
653
0
                    ctxt, NULL, XML_FROM_PARSER, error,
654
0
                    XML_ERR_WARNING, NULL, 0,
655
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
656
0
        msg, (const char *) str1, (const char *) str2);
657
0
    }
658
330k
}
659
660
/**
661
 * xmlValidityError:
662
 * @ctxt:  an XML parser context
663
 * @error:  the error number
664
 * @msg:  the error message
665
 * @str1:  extra data
666
 *
667
 * Handle a validity error.
668
 */
669
static void LIBXML_ATTR_FORMAT(3,0)
670
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
671
              const char *msg, const xmlChar *str1, const xmlChar *str2)
672
21.0k
{
673
21.0k
    xmlStructuredErrorFunc schannel = NULL;
674
675
21.0k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676
21.0k
        (ctxt->instate == XML_PARSER_EOF))
677
0
  return;
678
21.0k
    if (ctxt != NULL) {
679
21.0k
  ctxt->errNo = error;
680
21.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
681
8.83k
      schannel = ctxt->sax->serror;
682
21.0k
    }
683
21.0k
    if (ctxt != NULL) {
684
21.0k
        __xmlRaiseError(schannel,
685
21.0k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
686
21.0k
                    ctxt, NULL, XML_FROM_DTD, error,
687
21.0k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
688
21.0k
        (const char *) str2, NULL, 0, 0,
689
21.0k
        msg, (const char *) str1, (const char *) str2);
690
21.0k
  ctxt->valid = 0;
691
21.0k
    } else {
692
0
        __xmlRaiseError(schannel, NULL, NULL,
693
0
                    ctxt, NULL, XML_FROM_DTD, error,
694
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
695
0
        (const char *) str2, NULL, 0, 0,
696
0
        msg, (const char *) str1, (const char *) str2);
697
0
    }
698
21.0k
}
699
700
/**
701
 * xmlFatalErrMsgInt:
702
 * @ctxt:  an XML parser context
703
 * @error:  the error number
704
 * @msg:  the error message
705
 * @val:  an integer value
706
 *
707
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
708
 */
709
static void LIBXML_ATTR_FORMAT(3,0)
710
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
711
                  const char *msg, int val)
712
3.93M
{
713
3.93M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714
3.93M
        (ctxt->instate == XML_PARSER_EOF))
715
0
  return;
716
3.93M
    if (ctxt != NULL)
717
3.93M
  ctxt->errNo = error;
718
3.93M
    __xmlRaiseError(NULL, NULL, NULL,
719
3.93M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
720
3.93M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
721
3.93M
    if (ctxt != NULL) {
722
3.93M
  ctxt->wellFormed = 0;
723
3.93M
  if (ctxt->recovery == 0)
724
3.45M
      ctxt->disableSAX = 1;
725
3.93M
    }
726
3.93M
}
727
728
/**
729
 * xmlFatalErrMsgStrIntStr:
730
 * @ctxt:  an XML parser context
731
 * @error:  the error number
732
 * @msg:  the error message
733
 * @str1:  an string info
734
 * @val:  an integer value
735
 * @str2:  an string info
736
 *
737
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
738
 */
739
static void LIBXML_ATTR_FORMAT(3,0)
740
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
741
                  const char *msg, const xmlChar *str1, int val,
742
      const xmlChar *str2)
743
4.94M
{
744
4.94M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
745
4.94M
        (ctxt->instate == XML_PARSER_EOF))
746
0
  return;
747
4.94M
    if (ctxt != NULL)
748
4.94M
  ctxt->errNo = error;
749
4.94M
    __xmlRaiseError(NULL, NULL, NULL,
750
4.94M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
751
4.94M
                    NULL, 0, (const char *) str1, (const char *) str2,
752
4.94M
        NULL, val, 0, msg, str1, val, str2);
753
4.94M
    if (ctxt != NULL) {
754
4.94M
  ctxt->wellFormed = 0;
755
4.94M
  if (ctxt->recovery == 0)
756
4.37M
      ctxt->disableSAX = 1;
757
4.94M
    }
758
4.94M
}
759
760
/**
761
 * xmlFatalErrMsgStr:
762
 * @ctxt:  an XML parser context
763
 * @error:  the error number
764
 * @msg:  the error message
765
 * @val:  a string value
766
 *
767
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
768
 */
769
static void LIBXML_ATTR_FORMAT(3,0)
770
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
771
                  const char *msg, const xmlChar * val)
772
4.27M
{
773
4.27M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
774
4.27M
        (ctxt->instate == XML_PARSER_EOF))
775
0
  return;
776
4.27M
    if (ctxt != NULL)
777
4.27M
  ctxt->errNo = error;
778
4.27M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
779
4.27M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
780
4.27M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
781
4.27M
                    val);
782
4.27M
    if (ctxt != NULL) {
783
4.27M
  ctxt->wellFormed = 0;
784
4.27M
  if (ctxt->recovery == 0)
785
3.38M
      ctxt->disableSAX = 1;
786
4.27M
    }
787
4.27M
}
788
789
/**
790
 * xmlErrMsgStr:
791
 * @ctxt:  an XML parser context
792
 * @error:  the error number
793
 * @msg:  the error message
794
 * @val:  a string value
795
 *
796
 * Handle a non fatal parser error
797
 */
798
static void LIBXML_ATTR_FORMAT(3,0)
799
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
800
                  const char *msg, const xmlChar * val)
801
54.8k
{
802
54.8k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
803
54.8k
        (ctxt->instate == XML_PARSER_EOF))
804
0
  return;
805
54.8k
    if (ctxt != NULL)
806
54.8k
  ctxt->errNo = error;
807
54.8k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
808
54.8k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
809
54.8k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
810
54.8k
                    val);
811
54.8k
}
812
813
/**
814
 * xmlNsErr:
815
 * @ctxt:  an XML parser context
816
 * @error:  the error number
817
 * @msg:  the message
818
 * @info1:  extra information string
819
 * @info2:  extra information string
820
 *
821
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
822
 */
823
static void LIBXML_ATTR_FORMAT(3,0)
824
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
825
         const char *msg,
826
         const xmlChar * info1, const xmlChar * info2,
827
         const xmlChar * info3)
828
763k
{
829
763k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
830
763k
        (ctxt->instate == XML_PARSER_EOF))
831
0
  return;
832
763k
    if (ctxt != NULL)
833
763k
  ctxt->errNo = error;
834
763k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
835
763k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
836
763k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
837
763k
                    info1, info2, info3);
838
763k
    if (ctxt != NULL)
839
763k
  ctxt->nsWellFormed = 0;
840
763k
}
841
842
/**
843
 * xmlNsWarn
844
 * @ctxt:  an XML parser context
845
 * @error:  the error number
846
 * @msg:  the message
847
 * @info1:  extra information string
848
 * @info2:  extra information string
849
 *
850
 * Handle a namespace warning error
851
 */
852
static void LIBXML_ATTR_FORMAT(3,0)
853
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
854
         const char *msg,
855
         const xmlChar * info1, const xmlChar * info2,
856
         const xmlChar * info3)
857
6.25k
{
858
6.25k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
859
6.25k
        (ctxt->instate == XML_PARSER_EOF))
860
0
  return;
861
6.25k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
862
6.25k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
863
6.25k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
864
6.25k
                    info1, info2, info3);
865
6.25k
}
866
867
/************************************************************************
868
 *                  *
869
 *    Library wide options          *
870
 *                  *
871
 ************************************************************************/
872
873
/**
874
  * xmlHasFeature:
875
  * @feature: the feature to be examined
876
  *
877
  * Examines if the library has been compiled with a given feature.
878
  *
879
  * Returns a non-zero value if the feature exist, otherwise zero.
880
  * Returns zero (0) if the feature does not exist or an unknown
881
  * unknown feature is requested, non-zero otherwise.
882
  */
883
int
884
xmlHasFeature(xmlFeature feature)
885
0
{
886
0
    switch (feature) {
887
0
  case XML_WITH_THREAD:
888
0
#ifdef LIBXML_THREAD_ENABLED
889
0
      return(1);
890
#else
891
      return(0);
892
#endif
893
0
        case XML_WITH_TREE:
894
0
#ifdef LIBXML_TREE_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_OUTPUT:
900
0
#ifdef LIBXML_OUTPUT_ENABLED
901
0
            return(1);
902
#else
903
            return(0);
904
#endif
905
0
        case XML_WITH_PUSH:
906
0
#ifdef LIBXML_PUSH_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_READER:
912
0
#ifdef LIBXML_READER_ENABLED
913
0
            return(1);
914
#else
915
            return(0);
916
#endif
917
0
        case XML_WITH_PATTERN:
918
0
#ifdef LIBXML_PATTERN_ENABLED
919
0
            return(1);
920
#else
921
            return(0);
922
#endif
923
0
        case XML_WITH_WRITER:
924
0
#ifdef LIBXML_WRITER_ENABLED
925
0
            return(1);
926
#else
927
            return(0);
928
#endif
929
0
        case XML_WITH_SAX1:
930
0
#ifdef LIBXML_SAX1_ENABLED
931
0
            return(1);
932
#else
933
            return(0);
934
#endif
935
0
        case XML_WITH_FTP:
936
#ifdef LIBXML_FTP_ENABLED
937
            return(1);
938
#else
939
0
            return(0);
940
0
#endif
941
0
        case XML_WITH_HTTP:
942
#ifdef LIBXML_HTTP_ENABLED
943
            return(1);
944
#else
945
0
            return(0);
946
0
#endif
947
0
        case XML_WITH_VALID:
948
0
#ifdef LIBXML_VALID_ENABLED
949
0
            return(1);
950
#else
951
            return(0);
952
#endif
953
0
        case XML_WITH_HTML:
954
0
#ifdef LIBXML_HTML_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_LEGACY:
960
#ifdef LIBXML_LEGACY_ENABLED
961
            return(1);
962
#else
963
0
            return(0);
964
0
#endif
965
0
        case XML_WITH_C14N:
966
0
#ifdef LIBXML_C14N_ENABLED
967
0
            return(1);
968
#else
969
            return(0);
970
#endif
971
0
        case XML_WITH_CATALOG:
972
0
#ifdef LIBXML_CATALOG_ENABLED
973
0
            return(1);
974
#else
975
            return(0);
976
#endif
977
0
        case XML_WITH_XPATH:
978
0
#ifdef LIBXML_XPATH_ENABLED
979
0
            return(1);
980
#else
981
            return(0);
982
#endif
983
0
        case XML_WITH_XPTR:
984
0
#ifdef LIBXML_XPTR_ENABLED
985
0
            return(1);
986
#else
987
            return(0);
988
#endif
989
0
        case XML_WITH_XINCLUDE:
990
0
#ifdef LIBXML_XINCLUDE_ENABLED
991
0
            return(1);
992
#else
993
            return(0);
994
#endif
995
0
        case XML_WITH_ICONV:
996
0
#ifdef LIBXML_ICONV_ENABLED
997
0
            return(1);
998
#else
999
            return(0);
1000
#endif
1001
0
        case XML_WITH_ISO8859X:
1002
0
#ifdef LIBXML_ISO8859X_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_UNICODE:
1008
0
#ifdef LIBXML_UNICODE_ENABLED
1009
0
            return(1);
1010
#else
1011
            return(0);
1012
#endif
1013
0
        case XML_WITH_REGEXP:
1014
0
#ifdef LIBXML_REGEXP_ENABLED
1015
0
            return(1);
1016
#else
1017
            return(0);
1018
#endif
1019
0
        case XML_WITH_AUTOMATA:
1020
0
#ifdef LIBXML_AUTOMATA_ENABLED
1021
0
            return(1);
1022
#else
1023
            return(0);
1024
#endif
1025
0
        case XML_WITH_EXPR:
1026
#ifdef LIBXML_EXPR_ENABLED
1027
            return(1);
1028
#else
1029
0
            return(0);
1030
0
#endif
1031
0
        case XML_WITH_SCHEMAS:
1032
0
#ifdef LIBXML_SCHEMAS_ENABLED
1033
0
            return(1);
1034
#else
1035
            return(0);
1036
#endif
1037
0
        case XML_WITH_SCHEMATRON:
1038
0
#ifdef LIBXML_SCHEMATRON_ENABLED
1039
0
            return(1);
1040
#else
1041
            return(0);
1042
#endif
1043
0
        case XML_WITH_MODULES:
1044
0
#ifdef LIBXML_MODULES_ENABLED
1045
0
            return(1);
1046
#else
1047
            return(0);
1048
#endif
1049
0
        case XML_WITH_DEBUG:
1050
#ifdef LIBXML_DEBUG_ENABLED
1051
            return(1);
1052
#else
1053
0
            return(0);
1054
0
#endif
1055
0
        case XML_WITH_DEBUG_MEM:
1056
#ifdef DEBUG_MEMORY_LOCATION
1057
            return(1);
1058
#else
1059
0
            return(0);
1060
0
#endif
1061
0
        case XML_WITH_DEBUG_RUN:
1062
0
            return(0);
1063
0
        case XML_WITH_ZLIB:
1064
0
#ifdef LIBXML_ZLIB_ENABLED
1065
0
            return(1);
1066
#else
1067
            return(0);
1068
#endif
1069
0
        case XML_WITH_LZMA:
1070
0
#ifdef LIBXML_LZMA_ENABLED
1071
0
            return(1);
1072
#else
1073
            return(0);
1074
#endif
1075
0
        case XML_WITH_ICU:
1076
#ifdef LIBXML_ICU_ENABLED
1077
            return(1);
1078
#else
1079
0
            return(0);
1080
0
#endif
1081
0
        default:
1082
0
      break;
1083
0
     }
1084
0
     return(0);
1085
0
}
1086
1087
/************************************************************************
1088
 *                  *
1089
 *    SAX2 defaulted attributes handling      *
1090
 *                  *
1091
 ************************************************************************/
1092
1093
/**
1094
 * xmlDetectSAX2:
1095
 * @ctxt:  an XML parser context
1096
 *
1097
 * Do the SAX2 detection and specific initialization
1098
 */
1099
static void
1100
3.38M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1101
3.38M
    xmlSAXHandlerPtr sax;
1102
1103
    /* Avoid unused variable warning if features are disabled. */
1104
3.38M
    (void) sax;
1105
1106
3.38M
    if (ctxt == NULL) return;
1107
3.38M
    sax = ctxt->sax;
1108
3.38M
#ifdef LIBXML_SAX1_ENABLED
1109
3.38M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1110
3.38M
        ((sax->startElementNs != NULL) ||
1111
912k
         (sax->endElementNs != NULL) ||
1112
912k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1113
912k
        ctxt->sax2 = 1;
1114
#else
1115
    ctxt->sax2 = 1;
1116
#endif /* LIBXML_SAX1_ENABLED */
1117
1118
3.38M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1119
3.38M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1120
3.38M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1121
3.38M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1122
3.38M
    (ctxt->str_xml_ns == NULL)) {
1123
0
        xmlErrMemory(ctxt, NULL);
1124
0
    }
1125
3.38M
}
1126
1127
typedef struct _xmlDefAttrs xmlDefAttrs;
1128
typedef xmlDefAttrs *xmlDefAttrsPtr;
1129
struct _xmlDefAttrs {
1130
    int nbAttrs;  /* number of defaulted attributes on that element */
1131
    int maxAttrs;       /* the size of the array */
1132
#if __STDC_VERSION__ >= 199901L
1133
    /* Using a C99 flexible array member avoids UBSan errors. */
1134
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1135
#else
1136
    const xmlChar *values[5];
1137
#endif
1138
};
1139
1140
/**
1141
 * xmlAttrNormalizeSpace:
1142
 * @src: the source string
1143
 * @dst: the target string
1144
 *
1145
 * Normalize the space in non CDATA attribute values:
1146
 * If the attribute type is not CDATA, then the XML processor MUST further
1147
 * process the normalized attribute value by discarding any leading and
1148
 * trailing space (#x20) characters, and by replacing sequences of space
1149
 * (#x20) characters by a single space (#x20) character.
1150
 * Note that the size of dst need to be at least src, and if one doesn't need
1151
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1152
 * passing src as dst is just fine.
1153
 *
1154
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1155
 *         is needed.
1156
 */
1157
static xmlChar *
1158
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1159
97.6k
{
1160
97.6k
    if ((src == NULL) || (dst == NULL))
1161
0
        return(NULL);
1162
1163
116k
    while (*src == 0x20) src++;
1164
1.83M
    while (*src != 0) {
1165
1.73M
  if (*src == 0x20) {
1166
356k
      while (*src == 0x20) src++;
1167
95.1k
      if (*src != 0)
1168
77.3k
    *dst++ = 0x20;
1169
1.63M
  } else {
1170
1.63M
      *dst++ = *src++;
1171
1.63M
  }
1172
1.73M
    }
1173
97.6k
    *dst = 0;
1174
97.6k
    if (dst == src)
1175
66.9k
       return(NULL);
1176
30.7k
    return(dst);
1177
97.6k
}
1178
1179
/**
1180
 * xmlAttrNormalizeSpace2:
1181
 * @src: the source string
1182
 *
1183
 * Normalize the space in non CDATA attribute values, a slightly more complex
1184
 * front end to avoid allocation problems when running on attribute values
1185
 * coming from the input.
1186
 *
1187
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1188
 *         is needed.
1189
 */
1190
static const xmlChar *
1191
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1192
33.4k
{
1193
33.4k
    int i;
1194
33.4k
    int remove_head = 0;
1195
33.4k
    int need_realloc = 0;
1196
33.4k
    const xmlChar *cur;
1197
1198
33.4k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1199
0
        return(NULL);
1200
33.4k
    i = *len;
1201
33.4k
    if (i <= 0)
1202
1.11k
        return(NULL);
1203
1204
32.2k
    cur = src;
1205
43.8k
    while (*cur == 0x20) {
1206
11.5k
        cur++;
1207
11.5k
  remove_head++;
1208
11.5k
    }
1209
680k
    while (*cur != 0) {
1210
659k
  if (*cur == 0x20) {
1211
51.6k
      cur++;
1212
51.6k
      if ((*cur == 0x20) || (*cur == 0)) {
1213
11.2k
          need_realloc = 1;
1214
11.2k
    break;
1215
11.2k
      }
1216
51.6k
  } else
1217
608k
      cur++;
1218
659k
    }
1219
32.2k
    if (need_realloc) {
1220
11.2k
        xmlChar *ret;
1221
1222
11.2k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1223
11.2k
  if (ret == NULL) {
1224
0
      xmlErrMemory(ctxt, NULL);
1225
0
      return(NULL);
1226
0
  }
1227
11.2k
  xmlAttrNormalizeSpace(ret, ret);
1228
11.2k
  *len = strlen((const char *)ret);
1229
11.2k
        return(ret);
1230
21.0k
    } else if (remove_head) {
1231
632
        *len -= remove_head;
1232
632
        memmove(src, src + remove_head, 1 + *len);
1233
632
  return(src);
1234
632
    }
1235
20.3k
    return(NULL);
1236
32.2k
}
1237
1238
/**
1239
 * xmlAddDefAttrs:
1240
 * @ctxt:  an XML parser context
1241
 * @fullname:  the element fullname
1242
 * @fullattr:  the attribute fullname
1243
 * @value:  the attribute value
1244
 *
1245
 * Add a defaulted attribute for an element
1246
 */
1247
static void
1248
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1249
               const xmlChar *fullname,
1250
               const xmlChar *fullattr,
1251
67.1k
               const xmlChar *value) {
1252
67.1k
    xmlDefAttrsPtr defaults;
1253
67.1k
    int len;
1254
67.1k
    const xmlChar *name;
1255
67.1k
    const xmlChar *prefix;
1256
1257
    /*
1258
     * Allows to detect attribute redefinitions
1259
     */
1260
67.1k
    if (ctxt->attsSpecial != NULL) {
1261
43.2k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1262
8.77k
      return;
1263
43.2k
    }
1264
1265
58.3k
    if (ctxt->attsDefault == NULL) {
1266
24.0k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1267
24.0k
  if (ctxt->attsDefault == NULL)
1268
0
      goto mem_error;
1269
24.0k
    }
1270
1271
    /*
1272
     * split the element name into prefix:localname , the string found
1273
     * are within the DTD and then not associated to namespace names.
1274
     */
1275
58.3k
    name = xmlSplitQName3(fullname, &len);
1276
58.3k
    if (name == NULL) {
1277
47.0k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1278
47.0k
  prefix = NULL;
1279
47.0k
    } else {
1280
11.2k
        name = xmlDictLookup(ctxt->dict, name, -1);
1281
11.2k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1282
11.2k
    }
1283
1284
    /*
1285
     * make sure there is some storage
1286
     */
1287
58.3k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1288
58.3k
    if (defaults == NULL) {
1289
29.3k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1290
29.3k
                     (4 * 5) * sizeof(const xmlChar *));
1291
29.3k
  if (defaults == NULL)
1292
0
      goto mem_error;
1293
29.3k
  defaults->nbAttrs = 0;
1294
29.3k
  defaults->maxAttrs = 4;
1295
29.3k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1296
29.3k
                          defaults, NULL) < 0) {
1297
0
      xmlFree(defaults);
1298
0
      goto mem_error;
1299
0
  }
1300
29.3k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1301
4.17k
        xmlDefAttrsPtr temp;
1302
1303
4.17k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1304
4.17k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1305
4.17k
  if (temp == NULL)
1306
0
      goto mem_error;
1307
4.17k
  defaults = temp;
1308
4.17k
  defaults->maxAttrs *= 2;
1309
4.17k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1310
4.17k
                          defaults, NULL) < 0) {
1311
0
      xmlFree(defaults);
1312
0
      goto mem_error;
1313
0
  }
1314
4.17k
    }
1315
1316
    /*
1317
     * Split the element name into prefix:localname , the string found
1318
     * are within the DTD and hen not associated to namespace names.
1319
     */
1320
58.3k
    name = xmlSplitQName3(fullattr, &len);
1321
58.3k
    if (name == NULL) {
1322
42.2k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1323
42.2k
  prefix = NULL;
1324
42.2k
    } else {
1325
16.1k
        name = xmlDictLookup(ctxt->dict, name, -1);
1326
16.1k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1327
16.1k
    }
1328
1329
58.3k
    defaults->values[5 * defaults->nbAttrs] = name;
1330
58.3k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1331
    /* intern the string and precompute the end */
1332
58.3k
    len = xmlStrlen(value);
1333
58.3k
    value = xmlDictLookup(ctxt->dict, value, len);
1334
58.3k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1335
58.3k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1336
58.3k
    if (ctxt->external)
1337
2.72k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1338
55.6k
    else
1339
55.6k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1340
58.3k
    defaults->nbAttrs++;
1341
1342
58.3k
    return;
1343
1344
0
mem_error:
1345
0
    xmlErrMemory(ctxt, NULL);
1346
0
    return;
1347
58.3k
}
1348
1349
/**
1350
 * xmlAddSpecialAttr:
1351
 * @ctxt:  an XML parser context
1352
 * @fullname:  the element fullname
1353
 * @fullattr:  the attribute fullname
1354
 * @type:  the attribute type
1355
 *
1356
 * Register this attribute type
1357
 */
1358
static void
1359
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1360
      const xmlChar *fullname,
1361
      const xmlChar *fullattr,
1362
      int type)
1363
285k
{
1364
285k
    if (ctxt->attsSpecial == NULL) {
1365
51.5k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1366
51.5k
  if (ctxt->attsSpecial == NULL)
1367
0
      goto mem_error;
1368
51.5k
    }
1369
1370
285k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1371
79.7k
        return;
1372
1373
205k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1374
205k
                     (void *) (ptrdiff_t) type);
1375
205k
    return;
1376
1377
0
mem_error:
1378
0
    xmlErrMemory(ctxt, NULL);
1379
0
    return;
1380
285k
}
1381
1382
/**
1383
 * xmlCleanSpecialAttrCallback:
1384
 *
1385
 * Removes CDATA attributes from the special attribute table
1386
 */
1387
static void
1388
xmlCleanSpecialAttrCallback(void *payload, void *data,
1389
                            const xmlChar *fullname, const xmlChar *fullattr,
1390
204k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1391
204k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1392
1393
204k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1394
88.2k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1395
88.2k
    }
1396
204k
}
1397
1398
/**
1399
 * xmlCleanSpecialAttr:
1400
 * @ctxt:  an XML parser context
1401
 *
1402
 * Trim the list of attributes defined to remove all those of type
1403
 * CDATA as they are not special. This call should be done when finishing
1404
 * to parse the DTD and before starting to parse the document root.
1405
 */
1406
static void
1407
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1408
296k
{
1409
296k
    if (ctxt->attsSpecial == NULL)
1410
245k
        return;
1411
1412
51.0k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1413
1414
51.0k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1415
11.5k
        xmlHashFree(ctxt->attsSpecial, NULL);
1416
11.5k
        ctxt->attsSpecial = NULL;
1417
11.5k
    }
1418
51.0k
    return;
1419
296k
}
1420
1421
/**
1422
 * xmlCheckLanguageID:
1423
 * @lang:  pointer to the string value
1424
 *
1425
 * Checks that the value conforms to the LanguageID production:
1426
 *
1427
 * NOTE: this is somewhat deprecated, those productions were removed from
1428
 *       the XML Second edition.
1429
 *
1430
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1431
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1432
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1433
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1434
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1435
 * [38] Subcode ::= ([a-z] | [A-Z])+
1436
 *
1437
 * The current REC reference the successors of RFC 1766, currently 5646
1438
 *
1439
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1440
 * langtag       = language
1441
 *                 ["-" script]
1442
 *                 ["-" region]
1443
 *                 *("-" variant)
1444
 *                 *("-" extension)
1445
 *                 ["-" privateuse]
1446
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1447
 *                 ["-" extlang]       ; sometimes followed by
1448
 *                                     ; extended language subtags
1449
 *               / 4ALPHA              ; or reserved for future use
1450
 *               / 5*8ALPHA            ; or registered language subtag
1451
 *
1452
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1453
 *                 *2("-" 3ALPHA)      ; permanently reserved
1454
 *
1455
 * script        = 4ALPHA              ; ISO 15924 code
1456
 *
1457
 * region        = 2ALPHA              ; ISO 3166-1 code
1458
 *               / 3DIGIT              ; UN M.49 code
1459
 *
1460
 * variant       = 5*8alphanum         ; registered variants
1461
 *               / (DIGIT 3alphanum)
1462
 *
1463
 * extension     = singleton 1*("-" (2*8alphanum))
1464
 *
1465
 *                                     ; Single alphanumerics
1466
 *                                     ; "x" reserved for private use
1467
 * singleton     = DIGIT               ; 0 - 9
1468
 *               / %x41-57             ; A - W
1469
 *               / %x59-5A             ; Y - Z
1470
 *               / %x61-77             ; a - w
1471
 *               / %x79-7A             ; y - z
1472
 *
1473
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1474
 * The parser below doesn't try to cope with extension or privateuse
1475
 * that could be added but that's not interoperable anyway
1476
 *
1477
 * Returns 1 if correct 0 otherwise
1478
 **/
1479
int
1480
xmlCheckLanguageID(const xmlChar * lang)
1481
23.9k
{
1482
23.9k
    const xmlChar *cur = lang, *nxt;
1483
1484
23.9k
    if (cur == NULL)
1485
900
        return (0);
1486
23.0k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1487
23.0k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1488
23.0k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1489
23.0k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1490
        /*
1491
         * Still allow IANA code and user code which were coming
1492
         * from the previous version of the XML-1.0 specification
1493
         * it's deprecated but we should not fail
1494
         */
1495
1.54k
        cur += 2;
1496
15.3k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1497
15.3k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1498
13.7k
            cur++;
1499
1.54k
        return(cur[0] == 0);
1500
1.54k
    }
1501
21.5k
    nxt = cur;
1502
99.1k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1503
99.1k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1504
77.6k
           nxt++;
1505
21.5k
    if (nxt - cur >= 4) {
1506
        /*
1507
         * Reserved
1508
         */
1509
1.55k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1510
1.14k
            return(0);
1511
408
        return(1);
1512
1.55k
    }
1513
19.9k
    if (nxt - cur < 2)
1514
1.94k
        return(0);
1515
    /* we got an ISO 639 code */
1516
18.0k
    if (nxt[0] == 0)
1517
8.39k
        return(1);
1518
9.65k
    if (nxt[0] != '-')
1519
919
        return(0);
1520
1521
8.73k
    nxt++;
1522
8.73k
    cur = nxt;
1523
    /* now we can have extlang or script or region or variant */
1524
8.73k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1525
1.17k
        goto region_m49;
1526
1527
40.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1528
40.5k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1529
32.9k
           nxt++;
1530
7.55k
    if (nxt - cur == 4)
1531
1.99k
        goto script;
1532
5.56k
    if (nxt - cur == 2)
1533
1.27k
        goto region;
1534
4.28k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535
989
        goto variant;
1536
3.29k
    if (nxt - cur != 3)
1537
627
        return(0);
1538
    /* we parsed an extlang */
1539
2.67k
    if (nxt[0] == 0)
1540
327
        return(1);
1541
2.34k
    if (nxt[0] != '-')
1542
312
        return(0);
1543
1544
2.03k
    nxt++;
1545
2.03k
    cur = nxt;
1546
    /* now we can have script or region or variant */
1547
2.03k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1548
293
        goto region_m49;
1549
1550
29.4k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1551
29.4k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1552
27.6k
           nxt++;
1553
1.74k
    if (nxt - cur == 2)
1554
358
        goto region;
1555
1.38k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1556
380
        goto variant;
1557
1.00k
    if (nxt - cur != 4)
1558
411
        return(0);
1559
    /* we parsed a script */
1560
2.58k
script:
1561
2.58k
    if (nxt[0] == 0)
1562
259
        return(1);
1563
2.32k
    if (nxt[0] != '-')
1564
649
        return(0);
1565
1566
1.67k
    nxt++;
1567
1.67k
    cur = nxt;
1568
    /* now we can have region or variant */
1569
1.67k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1570
320
        goto region_m49;
1571
1572
23.1k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1573
23.1k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1574
21.7k
           nxt++;
1575
1576
1.35k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1577
482
        goto variant;
1578
876
    if (nxt - cur != 2)
1579
562
        return(0);
1580
    /* we parsed a region */
1581
2.73k
region:
1582
2.73k
    if (nxt[0] == 0)
1583
376
        return(1);
1584
2.35k
    if (nxt[0] != '-')
1585
1.10k
        return(0);
1586
1587
1.24k
    nxt++;
1588
1.24k
    cur = nxt;
1589
    /* now we can just have a variant */
1590
12.3k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1591
12.3k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1592
11.1k
           nxt++;
1593
1594
1.24k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1595
706
        return(0);
1596
1597
    /* we parsed a variant */
1598
2.39k
variant:
1599
2.39k
    if (nxt[0] == 0)
1600
561
        return(1);
1601
1.83k
    if (nxt[0] != '-')
1602
1.41k
        return(0);
1603
    /* extensions and private use subtags not checked */
1604
417
    return (1);
1605
1606
1.78k
region_m49:
1607
1.78k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1608
1.78k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1609
786
        nxt += 3;
1610
786
        goto region;
1611
786
    }
1612
1.00k
    return(0);
1613
1.78k
}
1614
1615
/************************************************************************
1616
 *                  *
1617
 *    Parser stacks related functions and macros    *
1618
 *                  *
1619
 ************************************************************************/
1620
1621
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1622
                                            const xmlChar ** str);
1623
1624
#ifdef SAX2
1625
/**
1626
 * nsPush:
1627
 * @ctxt:  an XML parser context
1628
 * @prefix:  the namespace prefix or NULL
1629
 * @URL:  the namespace name
1630
 *
1631
 * Pushes a new parser namespace on top of the ns stack
1632
 *
1633
 * Returns -1 in case of error, -2 if the namespace should be discarded
1634
 *     and the index in the stack otherwise.
1635
 */
1636
static int
1637
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1638
2.27M
{
1639
2.27M
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1640
194k
        int i;
1641
241k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1642
83.3k
      if (ctxt->nsTab[i] == prefix) {
1643
    /* in scope */
1644
36.3k
          if (ctxt->nsTab[i + 1] == URL)
1645
20.2k
        return(-2);
1646
    /* out of scope keep it */
1647
16.0k
    break;
1648
36.3k
      }
1649
83.3k
  }
1650
194k
    }
1651
2.24M
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1652
200k
  ctxt->nsMax = 10;
1653
200k
  ctxt->nsNr = 0;
1654
200k
  ctxt->nsTab = (const xmlChar **)
1655
200k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1656
200k
  if (ctxt->nsTab == NULL) {
1657
0
      xmlErrMemory(ctxt, NULL);
1658
0
      ctxt->nsMax = 0;
1659
0
            return (-1);
1660
0
  }
1661
2.04M
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1662
47.1k
        const xmlChar ** tmp;
1663
47.1k
        ctxt->nsMax *= 2;
1664
47.1k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1665
47.1k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1666
47.1k
        if (tmp == NULL) {
1667
0
            xmlErrMemory(ctxt, NULL);
1668
0
      ctxt->nsMax /= 2;
1669
0
            return (-1);
1670
0
        }
1671
47.1k
  ctxt->nsTab = tmp;
1672
47.1k
    }
1673
2.24M
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1674
2.24M
    ctxt->nsTab[ctxt->nsNr++] = URL;
1675
2.24M
    return (ctxt->nsNr);
1676
2.24M
}
1677
/**
1678
 * nsPop:
1679
 * @ctxt: an XML parser context
1680
 * @nr:  the number to pop
1681
 *
1682
 * Pops the top @nr parser prefix/namespace from the ns stack
1683
 *
1684
 * Returns the number of namespaces removed
1685
 */
1686
static int
1687
nsPop(xmlParserCtxtPtr ctxt, int nr)
1688
116k
{
1689
116k
    int i;
1690
1691
116k
    if (ctxt->nsTab == NULL) return(0);
1692
116k
    if (ctxt->nsNr < nr) {
1693
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1694
0
        nr = ctxt->nsNr;
1695
0
    }
1696
116k
    if (ctxt->nsNr <= 0)
1697
0
        return (0);
1698
1699
367k
    for (i = 0;i < nr;i++) {
1700
251k
         ctxt->nsNr--;
1701
251k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1702
251k
    }
1703
116k
    return(nr);
1704
116k
}
1705
#endif
1706
1707
static int
1708
324k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1709
324k
    const xmlChar **atts;
1710
324k
    int *attallocs;
1711
324k
    int maxatts;
1712
1713
324k
    if (ctxt->atts == NULL) {
1714
324k
  maxatts = 55; /* allow for 10 attrs by default */
1715
324k
  atts = (const xmlChar **)
1716
324k
         xmlMalloc(maxatts * sizeof(xmlChar *));
1717
324k
  if (atts == NULL) goto mem_error;
1718
324k
  ctxt->atts = atts;
1719
324k
  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1720
324k
  if (attallocs == NULL) goto mem_error;
1721
324k
  ctxt->attallocs = attallocs;
1722
324k
  ctxt->maxatts = maxatts;
1723
324k
    } else if (nr + 5 > ctxt->maxatts) {
1724
321
  maxatts = (nr + 5) * 2;
1725
321
  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1726
321
             maxatts * sizeof(const xmlChar *));
1727
321
  if (atts == NULL) goto mem_error;
1728
321
  ctxt->atts = atts;
1729
321
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1730
321
                               (maxatts / 5) * sizeof(int));
1731
321
  if (attallocs == NULL) goto mem_error;
1732
321
  ctxt->attallocs = attallocs;
1733
321
  ctxt->maxatts = maxatts;
1734
321
    }
1735
324k
    return(ctxt->maxatts);
1736
0
mem_error:
1737
0
    xmlErrMemory(ctxt, NULL);
1738
0
    return(-1);
1739
324k
}
1740
1741
/**
1742
 * inputPush:
1743
 * @ctxt:  an XML parser context
1744
 * @value:  the parser input
1745
 *
1746
 * Pushes a new parser input on top of the input stack
1747
 *
1748
 * Returns -1 in case of error, the index in the stack otherwise
1749
 */
1750
int
1751
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1752
4.11M
{
1753
4.11M
    if ((ctxt == NULL) || (value == NULL))
1754
0
        return(-1);
1755
4.11M
    if (ctxt->inputNr >= ctxt->inputMax) {
1756
10.9k
        ctxt->inputMax *= 2;
1757
10.9k
        ctxt->inputTab =
1758
10.9k
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1759
10.9k
                                             ctxt->inputMax *
1760
10.9k
                                             sizeof(ctxt->inputTab[0]));
1761
10.9k
        if (ctxt->inputTab == NULL) {
1762
0
            xmlErrMemory(ctxt, NULL);
1763
0
      ctxt->inputMax /= 2;
1764
0
            return (-1);
1765
0
        }
1766
10.9k
    }
1767
4.11M
    ctxt->inputTab[ctxt->inputNr] = value;
1768
4.11M
    ctxt->input = value;
1769
4.11M
    return (ctxt->inputNr++);
1770
4.11M
}
1771
/**
1772
 * inputPop:
1773
 * @ctxt: an XML parser context
1774
 *
1775
 * Pops the top parser input from the input stack
1776
 *
1777
 * Returns the input just removed
1778
 */
1779
xmlParserInputPtr
1780
inputPop(xmlParserCtxtPtr ctxt)
1781
10.9M
{
1782
10.9M
    xmlParserInputPtr ret;
1783
1784
10.9M
    if (ctxt == NULL)
1785
0
        return(NULL);
1786
10.9M
    if (ctxt->inputNr <= 0)
1787
6.89M
        return (NULL);
1788
4.08M
    ctxt->inputNr--;
1789
4.08M
    if (ctxt->inputNr > 0)
1790
839k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1791
3.24M
    else
1792
3.24M
        ctxt->input = NULL;
1793
4.08M
    ret = ctxt->inputTab[ctxt->inputNr];
1794
4.08M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1795
4.08M
    return (ret);
1796
10.9M
}
1797
/**
1798
 * nodePush:
1799
 * @ctxt:  an XML parser context
1800
 * @value:  the element node
1801
 *
1802
 * Pushes a new element node on top of the node stack
1803
 *
1804
 * Returns -1 in case of error, the index in the stack otherwise
1805
 */
1806
int
1807
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1808
12.5M
{
1809
12.5M
    if (ctxt == NULL) return(0);
1810
12.5M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1811
41.2k
        xmlNodePtr *tmp;
1812
1813
41.2k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1814
41.2k
                                      ctxt->nodeMax * 2 *
1815
41.2k
                                      sizeof(ctxt->nodeTab[0]));
1816
41.2k
        if (tmp == NULL) {
1817
0
            xmlErrMemory(ctxt, NULL);
1818
0
            return (-1);
1819
0
        }
1820
41.2k
        ctxt->nodeTab = tmp;
1821
41.2k
  ctxt->nodeMax *= 2;
1822
41.2k
    }
1823
12.5M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1824
12.5M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1825
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1826
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1827
0
        xmlParserMaxDepth);
1828
0
  xmlHaltParser(ctxt);
1829
0
  return(-1);
1830
0
    }
1831
12.5M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1832
12.5M
    ctxt->node = value;
1833
12.5M
    return (ctxt->nodeNr++);
1834
12.5M
}
1835
1836
/**
1837
 * nodePop:
1838
 * @ctxt: an XML parser context
1839
 *
1840
 * Pops the top element node from the node stack
1841
 *
1842
 * Returns the node just removed
1843
 */
1844
xmlNodePtr
1845
nodePop(xmlParserCtxtPtr ctxt)
1846
7.40M
{
1847
7.40M
    xmlNodePtr ret;
1848
1849
7.40M
    if (ctxt == NULL) return(NULL);
1850
7.40M
    if (ctxt->nodeNr <= 0)
1851
262k
        return (NULL);
1852
7.13M
    ctxt->nodeNr--;
1853
7.13M
    if (ctxt->nodeNr > 0)
1854
6.51M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1855
624k
    else
1856
624k
        ctxt->node = NULL;
1857
7.13M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1858
7.13M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1859
7.13M
    return (ret);
1860
7.40M
}
1861
1862
/**
1863
 * nameNsPush:
1864
 * @ctxt:  an XML parser context
1865
 * @value:  the element name
1866
 * @prefix:  the element prefix
1867
 * @URI:  the element namespace name
1868
 * @line:  the current line number for error messages
1869
 * @nsNr:  the number of namespaces pushed on the namespace table
1870
 *
1871
 * Pushes a new element name/prefix/URL on top of the name stack
1872
 *
1873
 * Returns -1 in case of error, the index in the stack otherwise
1874
 */
1875
static int
1876
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1877
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1878
14.9M
{
1879
14.9M
    xmlStartTag *tag;
1880
1881
14.9M
    if (ctxt->nameNr >= ctxt->nameMax) {
1882
67.5k
        const xmlChar * *tmp;
1883
67.5k
        xmlStartTag *tmp2;
1884
67.5k
        ctxt->nameMax *= 2;
1885
67.5k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1886
67.5k
                                    ctxt->nameMax *
1887
67.5k
                                    sizeof(ctxt->nameTab[0]));
1888
67.5k
        if (tmp == NULL) {
1889
0
      ctxt->nameMax /= 2;
1890
0
      goto mem_error;
1891
0
        }
1892
67.5k
  ctxt->nameTab = tmp;
1893
67.5k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1894
67.5k
                                    ctxt->nameMax *
1895
67.5k
                                    sizeof(ctxt->pushTab[0]));
1896
67.5k
        if (tmp2 == NULL) {
1897
0
      ctxt->nameMax /= 2;
1898
0
      goto mem_error;
1899
0
        }
1900
67.5k
  ctxt->pushTab = tmp2;
1901
14.8M
    } else if (ctxt->pushTab == NULL) {
1902
2.86M
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1903
2.86M
                                            sizeof(ctxt->pushTab[0]));
1904
2.86M
        if (ctxt->pushTab == NULL)
1905
0
            goto mem_error;
1906
2.86M
    }
1907
14.9M
    ctxt->nameTab[ctxt->nameNr] = value;
1908
14.9M
    ctxt->name = value;
1909
14.9M
    tag = &ctxt->pushTab[ctxt->nameNr];
1910
14.9M
    tag->prefix = prefix;
1911
14.9M
    tag->URI = URI;
1912
14.9M
    tag->line = line;
1913
14.9M
    tag->nsNr = nsNr;
1914
14.9M
    return (ctxt->nameNr++);
1915
0
mem_error:
1916
0
    xmlErrMemory(ctxt, NULL);
1917
0
    return (-1);
1918
14.9M
}
1919
#ifdef LIBXML_PUSH_ENABLED
1920
/**
1921
 * nameNsPop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element/prefix/URI name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
static const xmlChar *
1929
nameNsPop(xmlParserCtxtPtr ctxt)
1930
411k
{
1931
411k
    const xmlChar *ret;
1932
1933
411k
    if (ctxt->nameNr <= 0)
1934
0
        return (NULL);
1935
411k
    ctxt->nameNr--;
1936
411k
    if (ctxt->nameNr > 0)
1937
372k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
38.7k
    else
1939
38.7k
        ctxt->name = NULL;
1940
411k
    ret = ctxt->nameTab[ctxt->nameNr];
1941
411k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
411k
    return (ret);
1943
411k
}
1944
#endif /* LIBXML_PUSH_ENABLED */
1945
1946
/**
1947
 * namePush:
1948
 * @ctxt:  an XML parser context
1949
 * @value:  the element name
1950
 *
1951
 * Pushes a new element name on top of the name stack
1952
 *
1953
 * Returns -1 in case of error, the index in the stack otherwise
1954
 */
1955
int
1956
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1957
0
{
1958
0
    if (ctxt == NULL) return (-1);
1959
1960
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1961
0
        const xmlChar * *tmp;
1962
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1963
0
                                    ctxt->nameMax * 2 *
1964
0
                                    sizeof(ctxt->nameTab[0]));
1965
0
        if (tmp == NULL) {
1966
0
      goto mem_error;
1967
0
        }
1968
0
  ctxt->nameTab = tmp;
1969
0
        ctxt->nameMax *= 2;
1970
0
    }
1971
0
    ctxt->nameTab[ctxt->nameNr] = value;
1972
0
    ctxt->name = value;
1973
0
    return (ctxt->nameNr++);
1974
0
mem_error:
1975
0
    xmlErrMemory(ctxt, NULL);
1976
0
    return (-1);
1977
0
}
1978
/**
1979
 * namePop:
1980
 * @ctxt: an XML parser context
1981
 *
1982
 * Pops the top element name from the name stack
1983
 *
1984
 * Returns the name just removed
1985
 */
1986
const xmlChar *
1987
namePop(xmlParserCtxtPtr ctxt)
1988
9.95M
{
1989
9.95M
    const xmlChar *ret;
1990
1991
9.95M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1992
0
        return (NULL);
1993
9.95M
    ctxt->nameNr--;
1994
9.95M
    if (ctxt->nameNr > 0)
1995
7.12M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1996
2.82M
    else
1997
2.82M
        ctxt->name = NULL;
1998
9.95M
    ret = ctxt->nameTab[ctxt->nameNr];
1999
9.95M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2000
9.95M
    return (ret);
2001
9.95M
}
2002
2003
18.4M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2004
18.4M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2005
75.7k
        int *tmp;
2006
2007
75.7k
  ctxt->spaceMax *= 2;
2008
75.7k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2009
75.7k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2010
75.7k
        if (tmp == NULL) {
2011
0
      xmlErrMemory(ctxt, NULL);
2012
0
      ctxt->spaceMax /=2;
2013
0
      return(-1);
2014
0
  }
2015
75.7k
  ctxt->spaceTab = tmp;
2016
75.7k
    }
2017
18.4M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2018
18.4M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2019
18.4M
    return(ctxt->spaceNr++);
2020
18.4M
}
2021
2022
14.2M
static int spacePop(xmlParserCtxtPtr ctxt) {
2023
14.2M
    int ret;
2024
14.2M
    if (ctxt->spaceNr <= 0) return(0);
2025
14.2M
    ctxt->spaceNr--;
2026
14.2M
    if (ctxt->spaceNr > 0)
2027
14.2M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2028
39.8k
    else
2029
39.8k
        ctxt->space = &ctxt->spaceTab[0];
2030
14.2M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2031
14.2M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2032
14.2M
    return(ret);
2033
14.2M
}
2034
2035
/*
2036
 * Macros for accessing the content. Those should be used only by the parser,
2037
 * and not exported.
2038
 *
2039
 * Dirty macros, i.e. one often need to make assumption on the context to
2040
 * use them
2041
 *
2042
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2043
 *           To be used with extreme caution since operations consuming
2044
 *           characters may move the input buffer to a different location !
2045
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2046
 *           This should be used internally by the parser
2047
 *           only to compare to ASCII values otherwise it would break when
2048
 *           running with UTF-8 encoding.
2049
 *   RAW     same as CUR but in the input buffer, bypass any token
2050
 *           extraction that may have been done
2051
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2052
 *           to compare on ASCII based substring.
2053
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2054
 *           strings without newlines within the parser.
2055
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2056
 *           defined char within the parser.
2057
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2058
 *
2059
 *   NEXT    Skip to the next character, this does the proper decoding
2060
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2061
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2062
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2063
 *           to the number of xmlChars used for the encoding [0-5].
2064
 *   CUR_SCHAR  same but operate on a string instead of the context
2065
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2066
 *            the index
2067
 *   GROW, SHRINK  handling of input buffers
2068
 */
2069
2070
248M
#define RAW (*ctxt->input->cur)
2071
48.9M
#define CUR (*ctxt->input->cur)
2072
164M
#define NXT(val) ctxt->input->cur[(val)]
2073
15.9M
#define CUR_PTR ctxt->input->cur
2074
609k
#define BASE_PTR ctxt->input->base
2075
2076
#define CMP4( s, c1, c2, c3, c4 ) \
2077
126M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2078
63.8M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2079
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2080
121M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2081
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2082
115M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2083
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2084
113M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2085
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2086
110M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2087
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2088
54.4M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2089
54.4M
    ((unsigned char *) s)[ 8 ] == c9 )
2090
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2091
193k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2092
193k
    ((unsigned char *) s)[ 9 ] == c10 )
2093
2094
18.0M
#define SKIP(val) do {             \
2095
18.0M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2096
18.0M
    if (*ctxt->input->cur == 0)           \
2097
18.0M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2098
18.0M
  } while (0)
2099
2100
41.7k
#define SKIPL(val) do {             \
2101
41.7k
    int skipl;                \
2102
5.58M
    for(skipl=0; skipl<val; skipl++) {         \
2103
5.54M
  if (*(ctxt->input->cur) == '\n') {       \
2104
45.7k
  ctxt->input->line++; ctxt->input->col = 1;      \
2105
5.49M
  } else ctxt->input->col++;         \
2106
5.54M
  ctxt->input->cur++;           \
2107
5.54M
    }                  \
2108
41.7k
    if (*ctxt->input->cur == 0)           \
2109
41.7k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2110
41.7k
  } while (0)
2111
2112
108M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2113
108M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2114
108M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2115
108M
  xmlSHRINK (ctxt);
2116
2117
728k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2118
728k
    xmlParserInputShrink(ctxt->input);
2119
728k
    if (*ctxt->input->cur == 0)
2120
86.8k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2121
728k
}
2122
2123
313M
#define GROW if ((ctxt->progressive == 0) &&       \
2124
313M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2125
313M
  xmlGROW (ctxt);
2126
2127
101M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2128
101M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2129
101M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2130
2131
101M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2132
101M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2133
101M
         ((ctxt->input->buf) &&
2134
0
          (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2135
101M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2136
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2137
0
        xmlHaltParser(ctxt);
2138
0
  return;
2139
0
    }
2140
101M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2141
101M
    if ((ctxt->input->cur > ctxt->input->end) ||
2142
101M
        (ctxt->input->cur < ctxt->input->base)) {
2143
0
        xmlHaltParser(ctxt);
2144
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2145
0
  return;
2146
0
    }
2147
101M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2148
4.03M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2149
101M
}
2150
2151
58.3M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2152
2153
65.4M
#define NEXT xmlNextChar(ctxt)
2154
2155
29.4M
#define NEXT1 {               \
2156
29.4M
  ctxt->input->col++;           \
2157
29.4M
  ctxt->input->cur++;           \
2158
29.4M
  if (*ctxt->input->cur == 0)         \
2159
29.4M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2160
29.4M
    }
2161
2162
367M
#define NEXTL(l) do {             \
2163
367M
    if (*(ctxt->input->cur) == '\n') {         \
2164
6.73M
  ctxt->input->line++; ctxt->input->col = 1;      \
2165
360M
    } else ctxt->input->col++;           \
2166
367M
    ctxt->input->cur += l;        \
2167
367M
  } while (0)
2168
2169
381M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2170
417M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2171
2172
#define COPY_BUF(l,b,i,v)           \
2173
730M
    if (l == 1) b[i++] = v;           \
2174
730M
    else i += xmlCopyCharMultiByte(&b[i],v)
2175
2176
#define CUR_CONSUMED \
2177
133M
    (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2178
2179
/**
2180
 * xmlSkipBlankChars:
2181
 * @ctxt:  the XML parser context
2182
 *
2183
 * skip all blanks character found at that point in the input streams.
2184
 * It pops up finished entities in the process if allowable at that point.
2185
 *
2186
 * Returns the number of space chars skipped
2187
 */
2188
2189
int
2190
58.3M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2191
58.3M
    int res = 0;
2192
2193
    /*
2194
     * It's Okay to use CUR/NEXT here since all the blanks are on
2195
     * the ASCII range.
2196
     */
2197
58.3M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2198
58.3M
        (ctxt->instate == XML_PARSER_START)) {
2199
47.5M
  const xmlChar *cur;
2200
  /*
2201
   * if we are in the document content, go really fast
2202
   */
2203
47.5M
  cur = ctxt->input->cur;
2204
47.5M
  while (IS_BLANK_CH(*cur)) {
2205
27.5M
      if (*cur == '\n') {
2206
1.43M
    ctxt->input->line++; ctxt->input->col = 1;
2207
26.1M
      } else {
2208
26.1M
    ctxt->input->col++;
2209
26.1M
      }
2210
27.5M
      cur++;
2211
27.5M
      if (res < INT_MAX)
2212
27.5M
    res++;
2213
27.5M
      if (*cur == 0) {
2214
201k
    ctxt->input->cur = cur;
2215
201k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2216
201k
    cur = ctxt->input->cur;
2217
201k
      }
2218
27.5M
  }
2219
47.5M
  ctxt->input->cur = cur;
2220
47.5M
    } else {
2221
10.8M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2222
2223
27.9M
  while (1) {
2224
27.9M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2225
16.0M
    NEXT;
2226
16.0M
      } else if (CUR == '%') {
2227
                /*
2228
                 * Need to handle support of entities branching here
2229
                 */
2230
1.24M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2231
187k
                    break;
2232
1.05M
          xmlParsePEReference(ctxt);
2233
10.6M
            } else if (CUR == 0) {
2234
94.8k
                if (ctxt->inputNr <= 1)
2235
35.1k
                    break;
2236
59.6k
                xmlPopInput(ctxt);
2237
10.5M
            } else {
2238
10.5M
                break;
2239
10.5M
            }
2240
2241
            /*
2242
             * Also increase the counter when entering or exiting a PERef.
2243
             * The spec says: "When a parameter-entity reference is recognized
2244
             * in the DTD and included, its replacement text MUST be enlarged
2245
             * by the attachment of one leading and one following space (#x20)
2246
             * character."
2247
             */
2248
17.1M
      if (res < INT_MAX)
2249
17.1M
    res++;
2250
17.1M
        }
2251
10.8M
    }
2252
58.3M
    return(res);
2253
58.3M
}
2254
2255
/************************************************************************
2256
 *                  *
2257
 *    Commodity functions to handle entities      *
2258
 *                  *
2259
 ************************************************************************/
2260
2261
/**
2262
 * xmlPopInput:
2263
 * @ctxt:  an XML parser context
2264
 *
2265
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2266
 *          pop it and return the next char.
2267
 *
2268
 * Returns the current xmlChar in the parser context
2269
 */
2270
xmlChar
2271
76.6k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2272
76.6k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2273
76.6k
    if (xmlParserDebugEntities)
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Popping input %d\n", ctxt->inputNr);
2276
76.6k
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2277
76.6k
        (ctxt->instate != XML_PARSER_EOF))
2278
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2279
0
                    "Unfinished entity outside the DTD");
2280
76.6k
    xmlFreeInputStream(inputPop(ctxt));
2281
76.6k
    if (*ctxt->input->cur == 0)
2282
1.00k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2283
76.6k
    return(CUR);
2284
76.6k
}
2285
2286
/**
2287
 * xmlPushInput:
2288
 * @ctxt:  an XML parser context
2289
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2290
 *
2291
 * xmlPushInput: switch to a new input stream which is stacked on top
2292
 *               of the previous one(s).
2293
 * Returns -1 in case of error or the index in the input stack
2294
 */
2295
int
2296
872k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2297
872k
    int ret;
2298
872k
    if (input == NULL) return(-1);
2299
2300
871k
    if (xmlParserDebugEntities) {
2301
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2302
0
      xmlGenericError(xmlGenericErrorContext,
2303
0
        "%s(%d): ", ctxt->input->filename,
2304
0
        ctxt->input->line);
2305
0
  xmlGenericError(xmlGenericErrorContext,
2306
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2307
0
    }
2308
871k
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2309
871k
        (ctxt->inputNr > 1024)) {
2310
2.83k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2311
764k
        while (ctxt->inputNr > 1)
2312
761k
            xmlFreeInputStream(inputPop(ctxt));
2313
2.83k
  return(-1);
2314
2.83k
    }
2315
869k
    ret = inputPush(ctxt, input);
2316
869k
    if (ctxt->instate == XML_PARSER_EOF)
2317
0
        return(-1);
2318
869k
    GROW;
2319
869k
    return(ret);
2320
869k
}
2321
2322
/**
2323
 * xmlParseCharRef:
2324
 * @ctxt:  an XML parser context
2325
 *
2326
 * DEPRECATED: Internal function, don't use.
2327
 *
2328
 * parse Reference declarations
2329
 *
2330
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2331
 *                  '&#x' [0-9a-fA-F]+ ';'
2332
 *
2333
 * [ WFC: Legal Character ]
2334
 * Characters referred to using character references must match the
2335
 * production for Char.
2336
 *
2337
 * Returns the value parsed (as an int), 0 in case of error
2338
 */
2339
int
2340
1.58M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2341
1.58M
    int val = 0;
2342
1.58M
    int count = 0;
2343
2344
    /*
2345
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2346
     */
2347
1.58M
    if ((RAW == '&') && (NXT(1) == '#') &&
2348
1.58M
        (NXT(2) == 'x')) {
2349
364k
  SKIP(3);
2350
364k
  GROW;
2351
1.13M
  while (RAW != ';') { /* loop blocked by count */
2352
840k
      if (count++ > 20) {
2353
25.9k
    count = 0;
2354
25.9k
    GROW;
2355
25.9k
                if (ctxt->instate == XML_PARSER_EOF)
2356
0
                    return(0);
2357
25.9k
      }
2358
840k
      if ((RAW >= '0') && (RAW <= '9'))
2359
517k
          val = val * 16 + (CUR - '0');
2360
322k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2361
219k
          val = val * 16 + (CUR - 'a') + 10;
2362
103k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2363
33.7k
          val = val * 16 + (CUR - 'A') + 10;
2364
69.9k
      else {
2365
69.9k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2366
69.9k
    val = 0;
2367
69.9k
    break;
2368
69.9k
      }
2369
770k
      if (val > 0x110000)
2370
287k
          val = 0x110000;
2371
2372
770k
      NEXT;
2373
770k
      count++;
2374
770k
  }
2375
364k
  if (RAW == ';') {
2376
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2377
294k
      ctxt->input->col++;
2378
294k
      ctxt->input->cur++;
2379
294k
  }
2380
1.21M
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2381
1.21M
  SKIP(2);
2382
1.21M
  GROW;
2383
4.84M
  while (RAW != ';') { /* loop blocked by count */
2384
3.74M
      if (count++ > 20) {
2385
35.7k
    count = 0;
2386
35.7k
    GROW;
2387
35.7k
                if (ctxt->instate == XML_PARSER_EOF)
2388
0
                    return(0);
2389
35.7k
      }
2390
3.74M
      if ((RAW >= '0') && (RAW <= '9'))
2391
3.62M
          val = val * 10 + (CUR - '0');
2392
118k
      else {
2393
118k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2394
118k
    val = 0;
2395
118k
    break;
2396
118k
      }
2397
3.62M
      if (val > 0x110000)
2398
383k
          val = 0x110000;
2399
2400
3.62M
      NEXT;
2401
3.62M
      count++;
2402
3.62M
  }
2403
1.21M
  if (RAW == ';') {
2404
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2405
1.10M
      ctxt->input->col++;
2406
1.10M
      ctxt->input->cur++;
2407
1.10M
  }
2408
1.21M
    } else {
2409
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2410
0
    }
2411
2412
    /*
2413
     * [ WFC: Legal Character ]
2414
     * Characters referred to using character references must match the
2415
     * production for Char.
2416
     */
2417
1.58M
    if (val >= 0x110000) {
2418
4.86k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2419
4.86k
                "xmlParseCharRef: character reference out of bounds\n",
2420
4.86k
          val);
2421
1.57M
    } else if (IS_CHAR(val)) {
2422
1.38M
        return(val);
2423
1.38M
    } else {
2424
194k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2425
194k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2426
194k
                    val);
2427
194k
    }
2428
199k
    return(0);
2429
1.58M
}
2430
2431
/**
2432
 * xmlParseStringCharRef:
2433
 * @ctxt:  an XML parser context
2434
 * @str:  a pointer to an index in the string
2435
 *
2436
 * parse Reference declarations, variant parsing from a string rather
2437
 * than an an input flow.
2438
 *
2439
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2440
 *                  '&#x' [0-9a-fA-F]+ ';'
2441
 *
2442
 * [ WFC: Legal Character ]
2443
 * Characters referred to using character references must match the
2444
 * production for Char.
2445
 *
2446
 * Returns the value parsed (as an int), 0 in case of error, str will be
2447
 *         updated to the current value of the index
2448
 */
2449
static int
2450
1.68M
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2451
1.68M
    const xmlChar *ptr;
2452
1.68M
    xmlChar cur;
2453
1.68M
    int val = 0;
2454
2455
1.68M
    if ((str == NULL) || (*str == NULL)) return(0);
2456
1.68M
    ptr = *str;
2457
1.68M
    cur = *ptr;
2458
1.68M
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2459
758k
  ptr += 3;
2460
758k
  cur = *ptr;
2461
1.73M
  while (cur != ';') { /* Non input consuming loop */
2462
976k
      if ((cur >= '0') && (cur <= '9'))
2463
337k
          val = val * 16 + (cur - '0');
2464
639k
      else if ((cur >= 'a') && (cur <= 'f'))
2465
566k
          val = val * 16 + (cur - 'a') + 10;
2466
72.7k
      else if ((cur >= 'A') && (cur <= 'F'))
2467
70.4k
          val = val * 16 + (cur - 'A') + 10;
2468
2.27k
      else {
2469
2.27k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2470
2.27k
    val = 0;
2471
2.27k
    break;
2472
2.27k
      }
2473
974k
      if (val > 0x110000)
2474
81.3k
          val = 0x110000;
2475
2476
974k
      ptr++;
2477
974k
      cur = *ptr;
2478
974k
  }
2479
758k
  if (cur == ';')
2480
756k
      ptr++;
2481
928k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2482
928k
  ptr += 2;
2483
928k
  cur = *ptr;
2484
3.41M
  while (cur != ';') { /* Non input consuming loops */
2485
2.48M
      if ((cur >= '0') && (cur <= '9'))
2486
2.48M
          val = val * 10 + (cur - '0');
2487
2.69k
      else {
2488
2.69k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2489
2.69k
    val = 0;
2490
2.69k
    break;
2491
2.69k
      }
2492
2.48M
      if (val > 0x110000)
2493
854
          val = 0x110000;
2494
2495
2.48M
      ptr++;
2496
2.48M
      cur = *ptr;
2497
2.48M
  }
2498
928k
  if (cur == ';')
2499
925k
      ptr++;
2500
928k
    } else {
2501
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2502
0
  return(0);
2503
0
    }
2504
1.68M
    *str = ptr;
2505
2506
    /*
2507
     * [ WFC: Legal Character ]
2508
     * Characters referred to using character references must match the
2509
     * production for Char.
2510
     */
2511
1.68M
    if (val >= 0x110000) {
2512
466
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2513
466
                "xmlParseStringCharRef: character reference out of bounds\n",
2514
466
                val);
2515
1.68M
    } else if (IS_CHAR(val)) {
2516
1.67M
        return(val);
2517
1.67M
    } else {
2518
6.16k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2519
6.16k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2520
6.16k
        val);
2521
6.16k
    }
2522
6.62k
    return(0);
2523
1.68M
}
2524
2525
/**
2526
 * xmlParserHandlePEReference:
2527
 * @ctxt:  the parser context
2528
 *
2529
 * [69] PEReference ::= '%' Name ';'
2530
 *
2531
 * [ WFC: No Recursion ]
2532
 * A parsed entity must not contain a recursive
2533
 * reference to itself, either directly or indirectly.
2534
 *
2535
 * [ WFC: Entity Declared ]
2536
 * In a document without any DTD, a document with only an internal DTD
2537
 * subset which contains no parameter entity references, or a document
2538
 * with "standalone='yes'", ...  ... The declaration of a parameter
2539
 * entity must precede any reference to it...
2540
 *
2541
 * [ VC: Entity Declared ]
2542
 * In a document with an external subset or external parameter entities
2543
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2544
 * must precede any reference to it...
2545
 *
2546
 * [ WFC: In DTD ]
2547
 * Parameter-entity references may only appear in the DTD.
2548
 * NOTE: misleading but this is handled.
2549
 *
2550
 * A PEReference may have been detected in the current input stream
2551
 * the handling is done accordingly to
2552
 *      http://www.w3.org/TR/REC-xml#entproc
2553
 * i.e.
2554
 *   - Included in literal in entity values
2555
 *   - Included as Parameter Entity reference within DTDs
2556
 */
2557
void
2558
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2559
0
    switch(ctxt->instate) {
2560
0
  case XML_PARSER_CDATA_SECTION:
2561
0
      return;
2562
0
        case XML_PARSER_COMMENT:
2563
0
      return;
2564
0
  case XML_PARSER_START_TAG:
2565
0
      return;
2566
0
  case XML_PARSER_END_TAG:
2567
0
      return;
2568
0
        case XML_PARSER_EOF:
2569
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2570
0
      return;
2571
0
        case XML_PARSER_PROLOG:
2572
0
  case XML_PARSER_START:
2573
0
  case XML_PARSER_MISC:
2574
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2575
0
      return;
2576
0
  case XML_PARSER_ENTITY_DECL:
2577
0
        case XML_PARSER_CONTENT:
2578
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2579
0
        case XML_PARSER_PI:
2580
0
  case XML_PARSER_SYSTEM_LITERAL:
2581
0
  case XML_PARSER_PUBLIC_LITERAL:
2582
      /* we just ignore it there */
2583
0
      return;
2584
0
        case XML_PARSER_EPILOG:
2585
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2586
0
      return;
2587
0
  case XML_PARSER_ENTITY_VALUE:
2588
      /*
2589
       * NOTE: in the case of entity values, we don't do the
2590
       *       substitution here since we need the literal
2591
       *       entity value to be able to save the internal
2592
       *       subset of the document.
2593
       *       This will be handled by xmlStringDecodeEntities
2594
       */
2595
0
      return;
2596
0
        case XML_PARSER_DTD:
2597
      /*
2598
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2599
       * In the internal DTD subset, parameter-entity references
2600
       * can occur only where markup declarations can occur, not
2601
       * within markup declarations.
2602
       * In that case this is handled in xmlParseMarkupDecl
2603
       */
2604
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2605
0
    return;
2606
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2607
0
    return;
2608
0
            break;
2609
0
        case XML_PARSER_IGNORE:
2610
0
            return;
2611
0
    }
2612
2613
0
    xmlParsePEReference(ctxt);
2614
0
}
2615
2616
/*
2617
 * Macro used to grow the current buffer.
2618
 * buffer##_size is expected to be a size_t
2619
 * mem_error: is expected to handle memory allocation failures
2620
 */
2621
956k
#define growBuffer(buffer, n) {           \
2622
956k
    xmlChar *tmp;             \
2623
956k
    size_t new_size = buffer##_size * 2 + n;                            \
2624
956k
    if (new_size < buffer##_size) goto mem_error;                       \
2625
956k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2626
956k
    if (tmp == NULL) goto mem_error;         \
2627
956k
    buffer = tmp;             \
2628
956k
    buffer##_size = new_size;                                           \
2629
956k
}
2630
2631
/**
2632
 * xmlStringLenDecodeEntities:
2633
 * @ctxt:  the parser context
2634
 * @str:  the input string
2635
 * @len: the string length
2636
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637
 * @end:  an end marker xmlChar, 0 if none
2638
 * @end2:  an end marker xmlChar, 0 if none
2639
 * @end3:  an end marker xmlChar, 0 if none
2640
 *
2641
 * Takes a entity string content and process to do the adequate substitutions.
2642
 *
2643
 * [67] Reference ::= EntityRef | CharRef
2644
 *
2645
 * [69] PEReference ::= '%' Name ';'
2646
 *
2647
 * Returns A newly allocated string with the substitution done. The caller
2648
 *      must deallocate it !
2649
 */
2650
xmlChar *
2651
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2652
5.32M
          int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2653
5.32M
    xmlChar *buffer = NULL;
2654
5.32M
    size_t buffer_size = 0;
2655
5.32M
    size_t nbchars = 0;
2656
2657
5.32M
    xmlChar *current = NULL;
2658
5.32M
    xmlChar *rep = NULL;
2659
5.32M
    const xmlChar *last;
2660
5.32M
    xmlEntityPtr ent;
2661
5.32M
    int c,l;
2662
2663
5.32M
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2664
0
  return(NULL);
2665
5.32M
    last = str + len;
2666
2667
5.32M
    if (((ctxt->depth > 40) &&
2668
5.32M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2669
5.32M
  (ctxt->depth > 1024)) {
2670
7.81k
  xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2671
7.81k
  return(NULL);
2672
7.81k
    }
2673
2674
    /*
2675
     * allocate a translation buffer.
2676
     */
2677
5.31M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2678
5.31M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2679
5.31M
    if (buffer == NULL) goto mem_error;
2680
2681
    /*
2682
     * OK loop until we reach one of the ending char or a size limit.
2683
     * we are operating on already parsed values.
2684
     */
2685
5.31M
    if (str < last)
2686
5.28M
  c = CUR_SCHAR(str, l);
2687
38.6k
    else
2688
38.6k
        c = 0;
2689
393M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2690
393M
           (c != end2) && (c != end3) &&
2691
393M
           (ctxt->instate != XML_PARSER_EOF)) {
2692
2693
389M
  if (c == 0) break;
2694
389M
        if ((c == '&') && (str[1] == '#')) {
2695
1.68M
      int val = xmlParseStringCharRef(ctxt, &str);
2696
1.68M
      if (val == 0)
2697
6.62k
                goto int_error;
2698
1.67M
      COPY_BUF(0,buffer,nbchars,val);
2699
1.67M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2700
70.3k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2701
70.3k
      }
2702
387M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2703
1.38M
      if (xmlParserDebugEntities)
2704
0
    xmlGenericError(xmlGenericErrorContext,
2705
0
      "String decoding Entity Reference: %.30s\n",
2706
0
      str);
2707
1.38M
      ent = xmlParseStringEntityRef(ctxt, &str);
2708
1.38M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2709
1.38M
      if (ent != NULL)
2710
1.19M
          ctxt->nbentities += ent->checked / 2;
2711
1.38M
      if ((ent != NULL) &&
2712
1.38M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2713
295k
    if (ent->content != NULL) {
2714
295k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2715
295k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2716
18.9k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2717
18.9k
        }
2718
295k
    } else {
2719
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2720
0
          "predefined entity has no content\n");
2721
0
                    goto int_error;
2722
0
    }
2723
1.08M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2724
895k
    ctxt->depth++;
2725
895k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2726
895k
                            0, 0, 0);
2727
895k
    ctxt->depth--;
2728
895k
    if (rep == NULL) {
2729
690k
                    ent->content[0] = 0;
2730
690k
                    goto int_error;
2731
690k
                }
2732
2733
205k
                current = rep;
2734
8.56M
                while (*current != 0) { /* non input consuming loop */
2735
8.36M
                    buffer[nbchars++] = *current++;
2736
8.36M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2737
14.9k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2738
0
                            goto int_error;
2739
44.8k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2740
44.8k
                    }
2741
8.36M
                }
2742
205k
                xmlFree(rep);
2743
205k
                rep = NULL;
2744
205k
      } else if (ent != NULL) {
2745
7.90k
    int i = xmlStrlen(ent->name);
2746
7.90k
    const xmlChar *cur = ent->name;
2747
2748
7.90k
    buffer[nbchars++] = '&';
2749
7.90k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2750
18
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2751
18
    }
2752
19.2k
    for (;i > 0;i--)
2753
11.3k
        buffer[nbchars++] = *cur++;
2754
7.90k
    buffer[nbchars++] = ';';
2755
7.90k
      }
2756
386M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2757
2.12M
      if (xmlParserDebugEntities)
2758
0
    xmlGenericError(xmlGenericErrorContext,
2759
0
      "String decoding PE Reference: %.30s\n", str);
2760
2.12M
      ent = xmlParseStringPEReference(ctxt, &str);
2761
2.12M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2762
2.12M
      if (ent != NULL)
2763
687k
          ctxt->nbentities += ent->checked / 2;
2764
2.12M
      if (ent != NULL) {
2765
687k
                if (ent->content == NULL) {
2766
        /*
2767
         * Note: external parsed entities will not be loaded,
2768
         * it is not required for a non-validating parser to
2769
         * complete external PEReferences coming from the
2770
         * internal subset
2771
         */
2772
4.93k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2773
4.93k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2774
4.93k
      (ctxt->validate != 0)) {
2775
4.70k
      xmlLoadEntityContent(ctxt, ent);
2776
4.70k
        } else {
2777
223
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2778
223
      "not validating will not read content for PE entity %s\n",
2779
223
                          ent->name, NULL);
2780
223
        }
2781
4.93k
    }
2782
687k
    ctxt->depth++;
2783
687k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2784
687k
                            0, 0, 0);
2785
687k
    ctxt->depth--;
2786
687k
    if (rep == NULL) {
2787
684k
                    if (ent->content != NULL)
2788
683k
                        ent->content[0] = 0;
2789
684k
                    goto int_error;
2790
684k
                }
2791
3.18k
                current = rep;
2792
2.70M
                while (*current != 0) { /* non input consuming loop */
2793
2.70M
                    buffer[nbchars++] = *current++;
2794
2.70M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2795
3.46k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2796
373
                            goto int_error;
2797
9.27k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798
9.27k
                    }
2799
2.70M
                }
2800
2.81k
                xmlFree(rep);
2801
2.81k
                rep = NULL;
2802
2.81k
      }
2803
383M
  } else {
2804
383M
      COPY_BUF(l,buffer,nbchars,c);
2805
383M
      str += l;
2806
383M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2807
1.38M
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2808
1.38M
      }
2809
383M
  }
2810
387M
  if (str < last)
2811
383M
      c = CUR_SCHAR(str, l);
2812
3.89M
  else
2813
3.89M
      c = 0;
2814
387M
    }
2815
3.93M
    buffer[nbchars] = 0;
2816
3.93M
    return(buffer);
2817
2818
0
mem_error:
2819
0
    xmlErrMemory(ctxt, NULL);
2820
1.38M
int_error:
2821
1.38M
    if (rep != NULL)
2822
373
        xmlFree(rep);
2823
1.38M
    if (buffer != NULL)
2824
1.38M
        xmlFree(buffer);
2825
1.38M
    return(NULL);
2826
0
}
2827
2828
/**
2829
 * xmlStringDecodeEntities:
2830
 * @ctxt:  the parser context
2831
 * @str:  the input string
2832
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2833
 * @end:  an end marker xmlChar, 0 if none
2834
 * @end2:  an end marker xmlChar, 0 if none
2835
 * @end3:  an end marker xmlChar, 0 if none
2836
 *
2837
 * Takes a entity string content and process to do the adequate substitutions.
2838
 *
2839
 * [67] Reference ::= EntityRef | CharRef
2840
 *
2841
 * [69] PEReference ::= '%' Name ';'
2842
 *
2843
 * Returns A newly allocated string with the substitution done. The caller
2844
 *      must deallocate it !
2845
 */
2846
xmlChar *
2847
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2848
5.30M
            xmlChar end, xmlChar  end2, xmlChar end3) {
2849
5.30M
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2850
5.29M
    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2851
5.29M
           end, end2, end3));
2852
5.30M
}
2853
2854
/************************************************************************
2855
 *                  *
2856
 *    Commodity functions, cleanup needed ?     *
2857
 *                  *
2858
 ************************************************************************/
2859
2860
/**
2861
 * areBlanks:
2862
 * @ctxt:  an XML parser context
2863
 * @str:  a xmlChar *
2864
 * @len:  the size of @str
2865
 * @blank_chars: we know the chars are blanks
2866
 *
2867
 * Is this a sequence of blank chars that one can ignore ?
2868
 *
2869
 * Returns 1 if ignorable 0 otherwise.
2870
 */
2871
2872
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2873
8.25M
                     int blank_chars) {
2874
8.25M
    int i, ret;
2875
8.25M
    xmlNodePtr lastChild;
2876
2877
    /*
2878
     * Don't spend time trying to differentiate them, the same callback is
2879
     * used !
2880
     */
2881
8.25M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2882
314k
  return(0);
2883
2884
    /*
2885
     * Check for xml:space value.
2886
     */
2887
7.94M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2888
7.94M
        (*(ctxt->space) == -2))
2889
2.77M
  return(0);
2890
2891
    /*
2892
     * Check that the string is made of blanks
2893
     */
2894
5.16M
    if (blank_chars == 0) {
2895
10.4M
  for (i = 0;i < len;i++)
2896
9.94M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2897
2.20M
    }
2898
2899
    /*
2900
     * Look if the element is mixed content in the DTD if available
2901
     */
2902
3.44M
    if (ctxt->node == NULL) return(0);
2903
3.36M
    if (ctxt->myDoc != NULL) {
2904
3.36M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2905
3.36M
        if (ret == 0) return(1);
2906
2.99M
        if (ret == 1) return(0);
2907
2.99M
    }
2908
2909
    /*
2910
     * Otherwise, heuristic :-\
2911
     */
2912
2.98M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2913
2.76M
    if ((ctxt->node->children == NULL) &&
2914
2.76M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2915
2916
2.75M
    lastChild = xmlGetLastChild(ctxt->node);
2917
2.75M
    if (lastChild == NULL) {
2918
1.02M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2919
1.02M
            (ctxt->node->content != NULL)) return(0);
2920
1.73M
    } else if (xmlNodeIsText(lastChild))
2921
611k
        return(0);
2922
1.12M
    else if ((ctxt->node->children != NULL) &&
2923
1.12M
             (xmlNodeIsText(ctxt->node->children)))
2924
164k
        return(0);
2925
1.98M
    return(1);
2926
2.75M
}
2927
2928
/************************************************************************
2929
 *                  *
2930
 *    Extra stuff for namespace support     *
2931
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2932
 *                  *
2933
 ************************************************************************/
2934
2935
/**
2936
 * xmlSplitQName:
2937
 * @ctxt:  an XML parser context
2938
 * @name:  an XML parser context
2939
 * @prefix:  a xmlChar **
2940
 *
2941
 * parse an UTF8 encoded XML qualified name string
2942
 *
2943
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2944
 *
2945
 * [NS 6] Prefix ::= NCName
2946
 *
2947
 * [NS 7] LocalPart ::= NCName
2948
 *
2949
 * Returns the local part, and prefix is updated
2950
 *   to get the Prefix if any.
2951
 */
2952
2953
xmlChar *
2954
11.9M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2955
11.9M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2956
11.9M
    xmlChar *buffer = NULL;
2957
11.9M
    int len = 0;
2958
11.9M
    int max = XML_MAX_NAMELEN;
2959
11.9M
    xmlChar *ret = NULL;
2960
11.9M
    const xmlChar *cur = name;
2961
11.9M
    int c;
2962
2963
11.9M
    if (prefix == NULL) return(NULL);
2964
11.9M
    *prefix = NULL;
2965
2966
11.9M
    if (cur == NULL) return(NULL);
2967
2968
#ifndef XML_XML_NAMESPACE
2969
    /* xml: prefix is not really a namespace */
2970
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2971
        (cur[2] == 'l') && (cur[3] == ':'))
2972
  return(xmlStrdup(name));
2973
#endif
2974
2975
    /* nasty but well=formed */
2976
11.9M
    if (cur[0] == ':')
2977
33.5k
  return(xmlStrdup(name));
2978
2979
11.9M
    c = *cur++;
2980
53.6M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2981
41.7M
  buf[len++] = c;
2982
41.7M
  c = *cur++;
2983
41.7M
    }
2984
11.9M
    if (len >= max) {
2985
  /*
2986
   * Okay someone managed to make a huge name, so he's ready to pay
2987
   * for the processing speed.
2988
   */
2989
3.68k
  max = len * 2;
2990
2991
3.68k
  buffer = (xmlChar *) xmlMallocAtomic(max);
2992
3.68k
  if (buffer == NULL) {
2993
0
      xmlErrMemory(ctxt, NULL);
2994
0
      return(NULL);
2995
0
  }
2996
3.68k
  memcpy(buffer, buf, len);
2997
3.87M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2998
3.87M
      if (len + 10 > max) {
2999
4.55k
          xmlChar *tmp;
3000
3001
4.55k
    max *= 2;
3002
4.55k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3003
4.55k
    if (tmp == NULL) {
3004
0
        xmlFree(buffer);
3005
0
        xmlErrMemory(ctxt, NULL);
3006
0
        return(NULL);
3007
0
    }
3008
4.55k
    buffer = tmp;
3009
4.55k
      }
3010
3.87M
      buffer[len++] = c;
3011
3.87M
      c = *cur++;
3012
3.87M
  }
3013
3.68k
  buffer[len] = 0;
3014
3.68k
    }
3015
3016
11.9M
    if ((c == ':') && (*cur == 0)) {
3017
10.3k
        if (buffer != NULL)
3018
171
      xmlFree(buffer);
3019
10.3k
  *prefix = NULL;
3020
10.3k
  return(xmlStrdup(name));
3021
10.3k
    }
3022
3023
11.8M
    if (buffer == NULL)
3024
11.8M
  ret = xmlStrndup(buf, len);
3025
3.51k
    else {
3026
3.51k
  ret = buffer;
3027
3.51k
  buffer = NULL;
3028
3.51k
  max = XML_MAX_NAMELEN;
3029
3.51k
    }
3030
3031
3032
11.8M
    if (c == ':') {
3033
4.91M
  c = *cur;
3034
4.91M
        *prefix = ret;
3035
4.91M
  if (c == 0) {
3036
0
      return(xmlStrndup(BAD_CAST "", 0));
3037
0
  }
3038
4.91M
  len = 0;
3039
3040
  /*
3041
   * Check that the first character is proper to start
3042
   * a new name
3043
   */
3044
4.91M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3045
4.91M
        ((c >= 0x41) && (c <= 0x5A)) ||
3046
4.91M
        (c == '_') || (c == ':'))) {
3047
6.38k
      int l;
3048
6.38k
      int first = CUR_SCHAR(cur, l);
3049
3050
6.38k
      if (!IS_LETTER(first) && (first != '_')) {
3051
2.88k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3052
2.88k
          "Name %s is not XML Namespace compliant\n",
3053
2.88k
          name);
3054
2.88k
      }
3055
6.38k
  }
3056
4.91M
  cur++;
3057
3058
24.2M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3059
19.3M
      buf[len++] = c;
3060
19.3M
      c = *cur++;
3061
19.3M
  }
3062
4.91M
  if (len >= max) {
3063
      /*
3064
       * Okay someone managed to make a huge name, so he's ready to pay
3065
       * for the processing speed.
3066
       */
3067
2.10k
      max = len * 2;
3068
3069
2.10k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3070
2.10k
      if (buffer == NULL) {
3071
0
          xmlErrMemory(ctxt, NULL);
3072
0
    return(NULL);
3073
0
      }
3074
2.10k
      memcpy(buffer, buf, len);
3075
2.22M
      while (c != 0) { /* tested bigname2.xml */
3076
2.22M
    if (len + 10 > max) {
3077
2.79k
        xmlChar *tmp;
3078
3079
2.79k
        max *= 2;
3080
2.79k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3081
2.79k
        if (tmp == NULL) {
3082
0
      xmlErrMemory(ctxt, NULL);
3083
0
      xmlFree(buffer);
3084
0
      return(NULL);
3085
0
        }
3086
2.79k
        buffer = tmp;
3087
2.79k
    }
3088
2.22M
    buffer[len++] = c;
3089
2.22M
    c = *cur++;
3090
2.22M
      }
3091
2.10k
      buffer[len] = 0;
3092
2.10k
  }
3093
3094
4.91M
  if (buffer == NULL)
3095
4.91M
      ret = xmlStrndup(buf, len);
3096
2.10k
  else {
3097
2.10k
      ret = buffer;
3098
2.10k
  }
3099
4.91M
    }
3100
3101
11.8M
    return(ret);
3102
11.8M
}
3103
3104
/************************************************************************
3105
 *                  *
3106
 *      The parser itself       *
3107
 *  Relates to http://www.w3.org/TR/REC-xml       *
3108
 *                  *
3109
 ************************************************************************/
3110
3111
/************************************************************************
3112
 *                  *
3113
 *  Routines to parse Name, NCName and NmToken      *
3114
 *                  *
3115
 ************************************************************************/
3116
#ifdef DEBUG
3117
static unsigned long nbParseName = 0;
3118
static unsigned long nbParseNmToken = 0;
3119
static unsigned long nbParseNCName = 0;
3120
static unsigned long nbParseNCNameComplex = 0;
3121
static unsigned long nbParseNameComplex = 0;
3122
static unsigned long nbParseStringName = 0;
3123
#endif
3124
3125
/*
3126
 * The two following functions are related to the change of accepted
3127
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3128
 * They correspond to the modified production [4] and the new production [4a]
3129
 * changes in that revision. Also note that the macros used for the
3130
 * productions Letter, Digit, CombiningChar and Extender are not needed
3131
 * anymore.
3132
 * We still keep compatibility to pre-revision5 parsing semantic if the
3133
 * new XML_PARSE_OLD10 option is given to the parser.
3134
 */
3135
static int
3136
4.00M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3137
4.00M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3138
        /*
3139
   * Use the new checks of production [4] [4a] amd [5] of the
3140
   * Update 5 of XML-1.0
3141
   */
3142
2.32M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3143
2.32M
      (((c >= 'a') && (c <= 'z')) ||
3144
2.11M
       ((c >= 'A') && (c <= 'Z')) ||
3145
2.11M
       (c == '_') || (c == ':') ||
3146
2.11M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3147
2.11M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3148
2.11M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3149
2.11M
       ((c >= 0x370) && (c <= 0x37D)) ||
3150
2.11M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3151
2.11M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3152
2.11M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3153
2.11M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3154
2.11M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3155
2.11M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3156
2.11M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3157
2.11M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3158
1.64M
      return(1);
3159
2.32M
    } else {
3160
1.67M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3161
1.24M
      return(1);
3162
1.67M
    }
3163
1.11M
    return(0);
3164
4.00M
}
3165
3166
static int
3167
33.3M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3168
33.3M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3169
        /*
3170
   * Use the new checks of production [4] [4a] amd [5] of the
3171
   * Update 5 of XML-1.0
3172
   */
3173
22.4M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3174
22.4M
      (((c >= 'a') && (c <= 'z')) ||
3175
22.2M
       ((c >= 'A') && (c <= 'Z')) ||
3176
22.2M
       ((c >= '0') && (c <= '9')) || /* !start */
3177
22.2M
       (c == '_') || (c == ':') ||
3178
22.2M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3179
22.2M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3180
22.2M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3181
22.2M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3182
22.2M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3183
22.2M
       ((c >= 0x370) && (c <= 0x37D)) ||
3184
22.2M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3185
22.2M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3186
22.2M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3187
22.2M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3188
22.2M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189
22.2M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190
22.2M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191
22.2M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192
22.2M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3193
20.7M
       return(1);
3194
22.4M
    } else {
3195
10.8M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3196
10.8M
            (c == '.') || (c == '-') ||
3197
10.8M
      (c == '_') || (c == ':') ||
3198
10.8M
      (IS_COMBINING(c)) ||
3199
10.8M
      (IS_EXTENDER(c)))
3200
9.57M
      return(1);
3201
10.8M
    }
3202
2.94M
    return(0);
3203
33.3M
}
3204
3205
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3206
                                          int *len, int *alloc, int normalize);
3207
3208
static const xmlChar *
3209
6.32M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3210
6.32M
    int len = 0, l;
3211
6.32M
    int c;
3212
6.32M
    int count = 0;
3213
6.32M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3214
5.45M
                    XML_MAX_TEXT_LENGTH :
3215
6.32M
                    XML_MAX_NAME_LENGTH;
3216
3217
#ifdef DEBUG
3218
    nbParseNameComplex++;
3219
#endif
3220
3221
    /*
3222
     * Handler for more complex cases
3223
     */
3224
6.32M
    GROW;
3225
6.32M
    if (ctxt->instate == XML_PARSER_EOF)
3226
0
        return(NULL);
3227
6.32M
    c = CUR_CHAR(l);
3228
6.32M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3229
        /*
3230
   * Use the new checks of production [4] [4a] amd [5] of the
3231
   * Update 5 of XML-1.0
3232
   */
3233
3.48M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3234
3.48M
      (!(((c >= 'a') && (c <= 'z')) ||
3235
3.09M
         ((c >= 'A') && (c <= 'Z')) ||
3236
3.09M
         (c == '_') || (c == ':') ||
3237
3.09M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3238
3.09M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3239
3.09M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3240
3.09M
         ((c >= 0x370) && (c <= 0x37D)) ||
3241
3.09M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3242
3.09M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3243
3.09M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3244
3.09M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3245
3.09M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3246
3.09M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3247
3.09M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3248
3.09M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3249
2.67M
      return(NULL);
3250
2.67M
  }
3251
807k
  len += l;
3252
807k
  NEXTL(l);
3253
807k
  c = CUR_CHAR(l);
3254
19.1M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3255
19.1M
         (((c >= 'a') && (c <= 'z')) ||
3256
18.9M
          ((c >= 'A') && (c <= 'Z')) ||
3257
18.9M
          ((c >= '0') && (c <= '9')) || /* !start */
3258
18.9M
          (c == '_') || (c == ':') ||
3259
18.9M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3260
18.9M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3261
18.9M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3262
18.9M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3263
18.9M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3264
18.9M
          ((c >= 0x370) && (c <= 0x37D)) ||
3265
18.9M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3266
18.9M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3267
18.9M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3268
18.9M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3269
18.9M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270
18.9M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271
18.9M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272
18.9M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273
18.9M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3274
18.9M
    )) {
3275
18.3M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3276
115k
    count = 0;
3277
115k
    GROW;
3278
115k
                if (ctxt->instate == XML_PARSER_EOF)
3279
0
                    return(NULL);
3280
115k
      }
3281
18.3M
            if (len <= INT_MAX - l)
3282
18.3M
          len += l;
3283
18.3M
      NEXTL(l);
3284
18.3M
      c = CUR_CHAR(l);
3285
18.3M
  }
3286
2.83M
    } else {
3287
2.83M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3288
2.83M
      (!IS_LETTER(c) && (c != '_') &&
3289
2.38M
       (c != ':'))) {
3290
2.32M
      return(NULL);
3291
2.32M
  }
3292
512k
  len += l;
3293
512k
  NEXTL(l);
3294
512k
  c = CUR_CHAR(l);
3295
3296
12.4M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3297
12.4M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298
12.3M
    (c == '.') || (c == '-') ||
3299
12.3M
    (c == '_') || (c == ':') ||
3300
12.3M
    (IS_COMBINING(c)) ||
3301
12.3M
    (IS_EXTENDER(c)))) {
3302
11.9M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3303
75.1k
    count = 0;
3304
75.1k
    GROW;
3305
75.1k
                if (ctxt->instate == XML_PARSER_EOF)
3306
0
                    return(NULL);
3307
75.1k
      }
3308
11.9M
            if (len <= INT_MAX - l)
3309
11.9M
          len += l;
3310
11.9M
      NEXTL(l);
3311
11.9M
      c = CUR_CHAR(l);
3312
11.9M
  }
3313
512k
    }
3314
1.32M
    if (len > maxLength) {
3315
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316
0
        return(NULL);
3317
0
    }
3318
1.32M
    if (ctxt->input->cur - ctxt->input->base < len) {
3319
        /*
3320
         * There were a couple of bugs where PERefs lead to to a change
3321
         * of the buffer. Check the buffer size to avoid passing an invalid
3322
         * pointer to xmlDictLookup.
3323
         */
3324
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3325
0
                    "unexpected change of input buffer");
3326
0
        return (NULL);
3327
0
    }
3328
1.32M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3329
1.73k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3330
1.31M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3331
1.32M
}
3332
3333
/**
3334
 * xmlParseName:
3335
 * @ctxt:  an XML parser context
3336
 *
3337
 * DEPRECATED: Internal function, don't use.
3338
 *
3339
 * parse an XML name.
3340
 *
3341
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3342
 *                  CombiningChar | Extender
3343
 *
3344
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3345
 *
3346
 * [6] Names ::= Name (#x20 Name)*
3347
 *
3348
 * Returns the Name parsed or NULL
3349
 */
3350
3351
const xmlChar *
3352
36.4M
xmlParseName(xmlParserCtxtPtr ctxt) {
3353
36.4M
    const xmlChar *in;
3354
36.4M
    const xmlChar *ret;
3355
36.4M
    size_t count = 0;
3356
36.4M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3357
29.5M
                       XML_MAX_TEXT_LENGTH :
3358
36.4M
                       XML_MAX_NAME_LENGTH;
3359
3360
36.4M
    GROW;
3361
3362
#ifdef DEBUG
3363
    nbParseName++;
3364
#endif
3365
3366
    /*
3367
     * Accelerator for simple ASCII names
3368
     */
3369
36.4M
    in = ctxt->input->cur;
3370
36.4M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3371
36.4M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3372
36.4M
  (*in == '_') || (*in == ':')) {
3373
31.1M
  in++;
3374
185M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3375
185M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3376
185M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3377
185M
         (*in == '_') || (*in == '-') ||
3378
185M
         (*in == ':') || (*in == '.'))
3379
154M
      in++;
3380
31.1M
  if ((*in > 0) && (*in < 0x80)) {
3381
30.0M
      count = in - ctxt->input->cur;
3382
30.0M
            if (count > maxLength) {
3383
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384
0
                return(NULL);
3385
0
            }
3386
30.0M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3387
30.0M
      ctxt->input->cur = in;
3388
30.0M
      ctxt->input->col += count;
3389
30.0M
      if (ret == NULL)
3390
0
          xmlErrMemory(ctxt, NULL);
3391
30.0M
      return(ret);
3392
30.0M
  }
3393
31.1M
    }
3394
    /* accelerator for special cases */
3395
6.32M
    return(xmlParseNameComplex(ctxt));
3396
36.4M
}
3397
3398
static const xmlChar *
3399
472k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3400
472k
    int len = 0, l;
3401
472k
    int c;
3402
472k
    int count = 0;
3403
472k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3404
190k
                    XML_MAX_TEXT_LENGTH :
3405
472k
                    XML_MAX_NAME_LENGTH;
3406
472k
    size_t startPosition = 0;
3407
3408
#ifdef DEBUG
3409
    nbParseNCNameComplex++;
3410
#endif
3411
3412
    /*
3413
     * Handler for more complex cases
3414
     */
3415
472k
    GROW;
3416
472k
    startPosition = CUR_PTR - BASE_PTR;
3417
472k
    c = CUR_CHAR(l);
3418
472k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
472k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
335k
  return(NULL);
3421
335k
    }
3422
3423
4.94M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
4.94M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
4.81M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3426
36.6k
      count = 0;
3427
36.6k
      GROW;
3428
36.6k
            if (ctxt->instate == XML_PARSER_EOF)
3429
0
                return(NULL);
3430
36.6k
  }
3431
4.81M
        if (len <= INT_MAX - l)
3432
4.81M
      len += l;
3433
4.81M
  NEXTL(l);
3434
4.81M
  c = CUR_CHAR(l);
3435
4.81M
  if (c == 0) {
3436
10.8k
      count = 0;
3437
      /*
3438
       * when shrinking to extend the buffer we really need to preserve
3439
       * the part of the name we already parsed. Hence rolling back
3440
       * by current length.
3441
       */
3442
10.8k
      ctxt->input->cur -= l;
3443
10.8k
      GROW;
3444
10.8k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
10.8k
      ctxt->input->cur += l;
3447
10.8k
      c = CUR_CHAR(l);
3448
10.8k
  }
3449
4.81M
    }
3450
136k
    if (len > maxLength) {
3451
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3452
0
        return(NULL);
3453
0
    }
3454
136k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3455
136k
}
3456
3457
/**
3458
 * xmlParseNCName:
3459
 * @ctxt:  an XML parser context
3460
 * @len:  length of the string parsed
3461
 *
3462
 * parse an XML name.
3463
 *
3464
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3465
 *                      CombiningChar | Extender
3466
 *
3467
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3468
 *
3469
 * Returns the Name parsed or NULL
3470
 */
3471
3472
static const xmlChar *
3473
7.20M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3474
7.20M
    const xmlChar *in, *e;
3475
7.20M
    const xmlChar *ret;
3476
7.20M
    size_t count = 0;
3477
7.20M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3478
3.30M
                       XML_MAX_TEXT_LENGTH :
3479
7.20M
                       XML_MAX_NAME_LENGTH;
3480
3481
#ifdef DEBUG
3482
    nbParseNCName++;
3483
#endif
3484
3485
    /*
3486
     * Accelerator for simple ASCII names
3487
     */
3488
7.20M
    in = ctxt->input->cur;
3489
7.20M
    e = ctxt->input->end;
3490
7.20M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3491
7.20M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3492
7.20M
   (*in == '_')) && (in < e)) {
3493
6.84M
  in++;
3494
46.7M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3495
46.7M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3496
46.7M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3497
46.7M
          (*in == '_') || (*in == '-') ||
3498
46.7M
          (*in == '.')) && (in < e))
3499
39.8M
      in++;
3500
6.84M
  if (in >= e)
3501
3.60k
      goto complex;
3502
6.84M
  if ((*in > 0) && (*in < 0x80)) {
3503
6.73M
      count = in - ctxt->input->cur;
3504
6.73M
            if (count > maxLength) {
3505
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3506
0
                return(NULL);
3507
0
            }
3508
6.73M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3509
6.73M
      ctxt->input->cur = in;
3510
6.73M
      ctxt->input->col += count;
3511
6.73M
      if (ret == NULL) {
3512
0
          xmlErrMemory(ctxt, NULL);
3513
0
      }
3514
6.73M
      return(ret);
3515
6.73M
  }
3516
6.84M
    }
3517
472k
complex:
3518
472k
    return(xmlParseNCNameComplex(ctxt));
3519
7.20M
}
3520
3521
/**
3522
 * xmlParseNameAndCompare:
3523
 * @ctxt:  an XML parser context
3524
 *
3525
 * parse an XML name and compares for match
3526
 * (specialized for endtag parsing)
3527
 *
3528
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3529
 * and the name for mismatch
3530
 */
3531
3532
static const xmlChar *
3533
4.25M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3534
4.25M
    register const xmlChar *cmp = other;
3535
4.25M
    register const xmlChar *in;
3536
4.25M
    const xmlChar *ret;
3537
3538
4.25M
    GROW;
3539
4.25M
    if (ctxt->instate == XML_PARSER_EOF)
3540
0
        return(NULL);
3541
3542
4.25M
    in = ctxt->input->cur;
3543
17.6M
    while (*in != 0 && *in == *cmp) {
3544
13.3M
  ++in;
3545
13.3M
  ++cmp;
3546
13.3M
    }
3547
4.25M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3548
  /* success */
3549
2.51M
  ctxt->input->col += in - ctxt->input->cur;
3550
2.51M
  ctxt->input->cur = in;
3551
2.51M
  return (const xmlChar*) 1;
3552
2.51M
    }
3553
    /* failure (or end of input buffer), check with full function */
3554
1.74M
    ret = xmlParseName (ctxt);
3555
    /* strings coming from the dictionary direct compare possible */
3556
1.74M
    if (ret == other) {
3557
38.4k
  return (const xmlChar*) 1;
3558
38.4k
    }
3559
1.70M
    return ret;
3560
1.74M
}
3561
3562
/**
3563
 * xmlParseStringName:
3564
 * @ctxt:  an XML parser context
3565
 * @str:  a pointer to the string pointer (IN/OUT)
3566
 *
3567
 * parse an XML name.
3568
 *
3569
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3570
 *                  CombiningChar | Extender
3571
 *
3572
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3573
 *
3574
 * [6] Names ::= Name (#x20 Name)*
3575
 *
3576
 * Returns the Name parsed or NULL. The @str pointer
3577
 * is updated to the current location in the string.
3578
 */
3579
3580
static xmlChar *
3581
3.56M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3582
3.56M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3583
3.56M
    const xmlChar *cur = *str;
3584
3.56M
    int len = 0, l;
3585
3.56M
    int c;
3586
3.56M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3587
3.32M
                    XML_MAX_TEXT_LENGTH :
3588
3.56M
                    XML_MAX_NAME_LENGTH;
3589
3590
#ifdef DEBUG
3591
    nbParseStringName++;
3592
#endif
3593
3594
3.56M
    c = CUR_SCHAR(cur, l);
3595
3.56M
    if (!xmlIsNameStartChar(ctxt, c)) {
3596
838k
  return(NULL);
3597
838k
    }
3598
3599
2.73M
    COPY_BUF(l,buf,len,c);
3600
2.73M
    cur += l;
3601
2.73M
    c = CUR_SCHAR(cur, l);
3602
17.1M
    while (xmlIsNameChar(ctxt, c)) {
3603
14.4M
  COPY_BUF(l,buf,len,c);
3604
14.4M
  cur += l;
3605
14.4M
  c = CUR_SCHAR(cur, l);
3606
14.4M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3607
      /*
3608
       * Okay someone managed to make a huge name, so he's ready to pay
3609
       * for the processing speed.
3610
       */
3611
47.2k
      xmlChar *buffer;
3612
47.2k
      int max = len * 2;
3613
3614
47.2k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3615
47.2k
      if (buffer == NULL) {
3616
0
          xmlErrMemory(ctxt, NULL);
3617
0
    return(NULL);
3618
0
      }
3619
47.2k
      memcpy(buffer, buf, len);
3620
7.80M
      while (xmlIsNameChar(ctxt, c)) {
3621
7.76M
    if (len + 10 > max) {
3622
32.7k
        xmlChar *tmp;
3623
3624
32.7k
        max *= 2;
3625
32.7k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3626
32.7k
        if (tmp == NULL) {
3627
0
      xmlErrMemory(ctxt, NULL);
3628
0
      xmlFree(buffer);
3629
0
      return(NULL);
3630
0
        }
3631
32.7k
        buffer = tmp;
3632
32.7k
    }
3633
7.76M
    COPY_BUF(l,buffer,len,c);
3634
7.76M
    cur += l;
3635
7.76M
    c = CUR_SCHAR(cur, l);
3636
7.76M
                if (len > maxLength) {
3637
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638
0
                    xmlFree(buffer);
3639
0
                    return(NULL);
3640
0
                }
3641
7.76M
      }
3642
47.2k
      buffer[len] = 0;
3643
47.2k
      *str = cur;
3644
47.2k
      return(buffer);
3645
47.2k
  }
3646
14.4M
    }
3647
2.68M
    if (len > maxLength) {
3648
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3649
0
        return(NULL);
3650
0
    }
3651
2.68M
    *str = cur;
3652
2.68M
    return(xmlStrndup(buf, len));
3653
2.68M
}
3654
3655
/**
3656
 * xmlParseNmtoken:
3657
 * @ctxt:  an XML parser context
3658
 *
3659
 * DEPRECATED: Internal function, don't use.
3660
 *
3661
 * parse an XML Nmtoken.
3662
 *
3663
 * [7] Nmtoken ::= (NameChar)+
3664
 *
3665
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3666
 *
3667
 * Returns the Nmtoken parsed or NULL
3668
 */
3669
3670
xmlChar *
3671
133k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3672
133k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3673
133k
    int len = 0, l;
3674
133k
    int c;
3675
133k
    int count = 0;
3676
133k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3677
71.3k
                    XML_MAX_TEXT_LENGTH :
3678
133k
                    XML_MAX_NAME_LENGTH;
3679
3680
#ifdef DEBUG
3681
    nbParseNmToken++;
3682
#endif
3683
3684
133k
    GROW;
3685
133k
    if (ctxt->instate == XML_PARSER_EOF)
3686
0
        return(NULL);
3687
133k
    c = CUR_CHAR(l);
3688
3689
735k
    while (xmlIsNameChar(ctxt, c)) {
3690
603k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3691
0
      count = 0;
3692
0
      GROW;
3693
0
  }
3694
603k
  COPY_BUF(l,buf,len,c);
3695
603k
  NEXTL(l);
3696
603k
  c = CUR_CHAR(l);
3697
603k
  if (c == 0) {
3698
692
      count = 0;
3699
692
      GROW;
3700
692
      if (ctxt->instate == XML_PARSER_EOF)
3701
0
    return(NULL);
3702
692
            c = CUR_CHAR(l);
3703
692
  }
3704
603k
  if (len >= XML_MAX_NAMELEN) {
3705
      /*
3706
       * Okay someone managed to make a huge token, so he's ready to pay
3707
       * for the processing speed.
3708
       */
3709
1.07k
      xmlChar *buffer;
3710
1.07k
      int max = len * 2;
3711
3712
1.07k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3713
1.07k
      if (buffer == NULL) {
3714
0
          xmlErrMemory(ctxt, NULL);
3715
0
    return(NULL);
3716
0
      }
3717
1.07k
      memcpy(buffer, buf, len);
3718
2.75M
      while (xmlIsNameChar(ctxt, c)) {
3719
2.75M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3720
27.5k
        count = 0;
3721
27.5k
        GROW;
3722
27.5k
                    if (ctxt->instate == XML_PARSER_EOF) {
3723
0
                        xmlFree(buffer);
3724
0
                        return(NULL);
3725
0
                    }
3726
27.5k
    }
3727
2.75M
    if (len + 10 > max) {
3728
2.73k
        xmlChar *tmp;
3729
3730
2.73k
        max *= 2;
3731
2.73k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3732
2.73k
        if (tmp == NULL) {
3733
0
      xmlErrMemory(ctxt, NULL);
3734
0
      xmlFree(buffer);
3735
0
      return(NULL);
3736
0
        }
3737
2.73k
        buffer = tmp;
3738
2.73k
    }
3739
2.75M
    COPY_BUF(l,buffer,len,c);
3740
2.75M
    NEXTL(l);
3741
2.75M
    c = CUR_CHAR(l);
3742
2.75M
                if (len > maxLength) {
3743
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3744
0
                    xmlFree(buffer);
3745
0
                    return(NULL);
3746
0
                }
3747
2.75M
      }
3748
1.07k
      buffer[len] = 0;
3749
1.07k
      return(buffer);
3750
1.07k
  }
3751
603k
    }
3752
132k
    if (len == 0)
3753
16.9k
        return(NULL);
3754
115k
    if (len > maxLength) {
3755
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3756
0
        return(NULL);
3757
0
    }
3758
115k
    return(xmlStrndup(buf, len));
3759
115k
}
3760
3761
/**
3762
 * xmlParseEntityValue:
3763
 * @ctxt:  an XML parser context
3764
 * @orig:  if non-NULL store a copy of the original entity value
3765
 *
3766
 * DEPRECATED: Internal function, don't use.
3767
 *
3768
 * parse a value for ENTITY declarations
3769
 *
3770
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3771
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3772
 *
3773
 * Returns the EntityValue parsed with reference substituted or NULL
3774
 */
3775
3776
xmlChar *
3777
361k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3778
361k
    xmlChar *buf = NULL;
3779
361k
    int len = 0;
3780
361k
    int size = XML_PARSER_BUFFER_SIZE;
3781
361k
    int c, l;
3782
361k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3783
113k
                    XML_MAX_HUGE_LENGTH :
3784
361k
                    XML_MAX_TEXT_LENGTH;
3785
361k
    xmlChar stop;
3786
361k
    xmlChar *ret = NULL;
3787
361k
    const xmlChar *cur = NULL;
3788
361k
    xmlParserInputPtr input;
3789
3790
361k
    if (RAW == '"') stop = '"';
3791
50.7k
    else if (RAW == '\'') stop = '\'';
3792
0
    else {
3793
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3794
0
  return(NULL);
3795
0
    }
3796
361k
    buf = (xmlChar *) xmlMallocAtomic(size);
3797
361k
    if (buf == NULL) {
3798
0
  xmlErrMemory(ctxt, NULL);
3799
0
  return(NULL);
3800
0
    }
3801
3802
    /*
3803
     * The content of the entity definition is copied in a buffer.
3804
     */
3805
3806
361k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3807
361k
    input = ctxt->input;
3808
361k
    GROW;
3809
361k
    if (ctxt->instate == XML_PARSER_EOF)
3810
0
        goto error;
3811
361k
    NEXT;
3812
361k
    c = CUR_CHAR(l);
3813
    /*
3814
     * NOTE: 4.4.5 Included in Literal
3815
     * When a parameter entity reference appears in a literal entity
3816
     * value, ... a single or double quote character in the replacement
3817
     * text is always treated as a normal data character and will not
3818
     * terminate the literal.
3819
     * In practice it means we stop the loop only when back at parsing
3820
     * the initial entity and the quote is found
3821
     */
3822
11.1M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3823
11.1M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3824
10.8M
  if (len + 5 >= size) {
3825
17.9k
      xmlChar *tmp;
3826
3827
17.9k
      size *= 2;
3828
17.9k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3829
17.9k
      if (tmp == NULL) {
3830
0
    xmlErrMemory(ctxt, NULL);
3831
0
                goto error;
3832
0
      }
3833
17.9k
      buf = tmp;
3834
17.9k
  }
3835
10.8M
  COPY_BUF(l,buf,len,c);
3836
10.8M
  NEXTL(l);
3837
3838
10.8M
  GROW;
3839
10.8M
  c = CUR_CHAR(l);
3840
10.8M
  if (c == 0) {
3841
1.47k
      GROW;
3842
1.47k
      c = CUR_CHAR(l);
3843
1.47k
  }
3844
3845
10.8M
        if (len > maxLength) {
3846
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3847
0
                           "entity value too long\n");
3848
0
            goto error;
3849
0
        }
3850
10.8M
    }
3851
361k
    buf[len] = 0;
3852
361k
    if (ctxt->instate == XML_PARSER_EOF)
3853
0
        goto error;
3854
361k
    if (c != stop) {
3855
2.29k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3856
2.29k
        goto error;
3857
2.29k
    }
3858
359k
    NEXT;
3859
3860
    /*
3861
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3862
     * reference constructs. Note Charref will be handled in
3863
     * xmlStringDecodeEntities()
3864
     */
3865
359k
    cur = buf;
3866
8.87M
    while (*cur != 0) { /* non input consuming */
3867
8.52M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3868
59.9k
      xmlChar *name;
3869
59.9k
      xmlChar tmp = *cur;
3870
59.9k
            int nameOk = 0;
3871
3872
59.9k
      cur++;
3873
59.9k
      name = xmlParseStringName(ctxt, &cur);
3874
59.9k
            if (name != NULL) {
3875
57.2k
                nameOk = 1;
3876
57.2k
                xmlFree(name);
3877
57.2k
            }
3878
59.9k
            if ((nameOk == 0) || (*cur != ';')) {
3879
6.22k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3880
6.22k
      "EntityValue: '%c' forbidden except for entities references\n",
3881
6.22k
                            tmp);
3882
6.22k
                goto error;
3883
6.22k
      }
3884
53.6k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3885
53.6k
    (ctxt->inputNr == 1)) {
3886
522
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3887
522
                goto error;
3888
522
      }
3889
53.1k
      if (*cur == 0)
3890
0
          break;
3891
53.1k
  }
3892
8.51M
  cur++;
3893
8.51M
    }
3894
3895
    /*
3896
     * Then PEReference entities are substituted.
3897
     *
3898
     * NOTE: 4.4.7 Bypassed
3899
     * When a general entity reference appears in the EntityValue in
3900
     * an entity declaration, it is bypassed and left as is.
3901
     * so XML_SUBSTITUTE_REF is not set here.
3902
     */
3903
352k
    ++ctxt->depth;
3904
352k
    ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3905
352k
                                  0, 0, 0);
3906
352k
    --ctxt->depth;
3907
352k
    if (orig != NULL) {
3908
352k
        *orig = buf;
3909
352k
        buf = NULL;
3910
352k
    }
3911
3912
361k
error:
3913
361k
    if (buf != NULL)
3914
9.04k
        xmlFree(buf);
3915
361k
    return(ret);
3916
352k
}
3917
3918
/**
3919
 * xmlParseAttValueComplex:
3920
 * @ctxt:  an XML parser context
3921
 * @len:   the resulting attribute len
3922
 * @normalize:  whether to apply the inner normalization
3923
 *
3924
 * parse a value for an attribute, this is the fallback function
3925
 * of xmlParseAttValue() when the attribute parsing requires handling
3926
 * of non-ASCII characters, or normalization compaction.
3927
 *
3928
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3929
 */
3930
static xmlChar *
3931
1.27M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3932
1.27M
    xmlChar limit = 0;
3933
1.27M
    xmlChar *buf = NULL;
3934
1.27M
    xmlChar *rep = NULL;
3935
1.27M
    size_t len = 0;
3936
1.27M
    size_t buf_size = 0;
3937
1.27M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3938
898k
                       XML_MAX_HUGE_LENGTH :
3939
1.27M
                       XML_MAX_TEXT_LENGTH;
3940
1.27M
    int c, l, in_space = 0;
3941
1.27M
    xmlChar *current = NULL;
3942
1.27M
    xmlEntityPtr ent;
3943
3944
1.27M
    if (NXT(0) == '"') {
3945
740k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3946
740k
  limit = '"';
3947
740k
        NEXT;
3948
740k
    } else if (NXT(0) == '\'') {
3949
536k
  limit = '\'';
3950
536k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3951
536k
        NEXT;
3952
536k
    } else {
3953
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3954
0
  return(NULL);
3955
0
    }
3956
3957
    /*
3958
     * allocate a translation buffer.
3959
     */
3960
1.27M
    buf_size = XML_PARSER_BUFFER_SIZE;
3961
1.27M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3962
1.27M
    if (buf == NULL) goto mem_error;
3963
3964
    /*
3965
     * OK loop until we reach one of the ending char or a size limit.
3966
     */
3967
1.27M
    c = CUR_CHAR(l);
3968
68.8M
    while (((NXT(0) != limit) && /* checked */
3969
68.8M
            (IS_CHAR(c)) && (c != '<')) &&
3970
68.8M
            (ctxt->instate != XML_PARSER_EOF)) {
3971
67.5M
  if (c == '&') {
3972
1.22M
      in_space = 0;
3973
1.22M
      if (NXT(1) == '#') {
3974
464k
    int val = xmlParseCharRef(ctxt);
3975
3976
464k
    if (val == '&') {
3977
60.5k
        if (ctxt->replaceEntities) {
3978
8.45k
      if (len + 10 > buf_size) {
3979
202
          growBuffer(buf, 10);
3980
202
      }
3981
8.45k
      buf[len++] = '&';
3982
52.0k
        } else {
3983
      /*
3984
       * The reparsing will be done in xmlStringGetNodeList()
3985
       * called by the attribute() function in SAX.c
3986
       */
3987
52.0k
      if (len + 10 > buf_size) {
3988
160
          growBuffer(buf, 10);
3989
160
      }
3990
52.0k
      buf[len++] = '&';
3991
52.0k
      buf[len++] = '#';
3992
52.0k
      buf[len++] = '3';
3993
52.0k
      buf[len++] = '8';
3994
52.0k
      buf[len++] = ';';
3995
52.0k
        }
3996
404k
    } else if (val != 0) {
3997
349k
        if (len + 10 > buf_size) {
3998
2.14k
      growBuffer(buf, 10);
3999
2.14k
        }
4000
349k
        len += xmlCopyChar(0, &buf[len], val);
4001
349k
    }
4002
755k
      } else {
4003
755k
    ent = xmlParseEntityRef(ctxt);
4004
755k
    ctxt->nbentities++;
4005
755k
    if (ent != NULL)
4006
511k
        ctxt->nbentities += ent->owner;
4007
755k
    if ((ent != NULL) &&
4008
755k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4009
387k
        if (len + 10 > buf_size) {
4010
232
      growBuffer(buf, 10);
4011
232
        }
4012
387k
        if ((ctxt->replaceEntities == 0) &&
4013
387k
            (ent->content[0] == '&')) {
4014
171k
      buf[len++] = '&';
4015
171k
      buf[len++] = '#';
4016
171k
      buf[len++] = '3';
4017
171k
      buf[len++] = '8';
4018
171k
      buf[len++] = ';';
4019
216k
        } else {
4020
216k
      buf[len++] = ent->content[0];
4021
216k
        }
4022
387k
    } else if ((ent != NULL) &&
4023
367k
               (ctxt->replaceEntities != 0)) {
4024
54.9k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4025
54.9k
      ++ctxt->depth;
4026
54.9k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4027
54.9k
                  XML_SUBSTITUTE_REF,
4028
54.9k
                  0, 0, 0);
4029
54.9k
      --ctxt->depth;
4030
54.9k
      if (rep != NULL) {
4031
50.5k
          current = rep;
4032
1.75M
          while (*current != 0) { /* non input consuming */
4033
1.70M
                                if ((*current == 0xD) || (*current == 0xA) ||
4034
1.70M
                                    (*current == 0x9)) {
4035
71.7k
                                    buf[len++] = 0x20;
4036
71.7k
                                    current++;
4037
71.7k
                                } else
4038
1.63M
                                    buf[len++] = *current++;
4039
1.70M
        if (len + 10 > buf_size) {
4040
6.20k
            growBuffer(buf, 10);
4041
6.20k
        }
4042
1.70M
          }
4043
50.5k
          xmlFree(rep);
4044
50.5k
          rep = NULL;
4045
50.5k
      }
4046
54.9k
        } else {
4047
0
      if (len + 10 > buf_size) {
4048
0
          growBuffer(buf, 10);
4049
0
      }
4050
0
      if (ent->content != NULL)
4051
0
          buf[len++] = ent->content[0];
4052
0
        }
4053
312k
    } else if (ent != NULL) {
4054
68.2k
        int i = xmlStrlen(ent->name);
4055
68.2k
        const xmlChar *cur = ent->name;
4056
4057
        /*
4058
         * This may look absurd but is needed to detect
4059
         * entities problems
4060
         */
4061
68.2k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4062
68.2k
      (ent->content != NULL) && (ent->checked == 0)) {
4063
12.5k
      unsigned long oldnbent = ctxt->nbentities, diff;
4064
4065
12.5k
      ++ctxt->depth;
4066
12.5k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4067
12.5k
              XML_SUBSTITUTE_REF, 0, 0, 0);
4068
12.5k
      --ctxt->depth;
4069
4070
12.5k
                        diff = ctxt->nbentities - oldnbent + 1;
4071
12.5k
                        if (diff > INT_MAX / 2)
4072
0
                            diff = INT_MAX / 2;
4073
12.5k
                        ent->checked = diff * 2;
4074
12.5k
      if (rep != NULL) {
4075
12.2k
          if (xmlStrchr(rep, '<'))
4076
676
              ent->checked |= 1;
4077
12.2k
          xmlFree(rep);
4078
12.2k
          rep = NULL;
4079
12.2k
      } else {
4080
299
                            ent->content[0] = 0;
4081
299
                        }
4082
12.5k
        }
4083
4084
        /*
4085
         * Just output the reference
4086
         */
4087
68.2k
        buf[len++] = '&';
4088
68.4k
        while (len + i + 10 > buf_size) {
4089
580
      growBuffer(buf, i + 10);
4090
580
        }
4091
249k
        for (;i > 0;i--)
4092
180k
      buf[len++] = *cur++;
4093
68.2k
        buf[len++] = ';';
4094
68.2k
    }
4095
755k
      }
4096
66.3M
  } else {
4097
66.3M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4098
8.99M
          if ((len != 0) || (!normalize)) {
4099
8.81M
        if ((!normalize) || (!in_space)) {
4100
8.40M
      COPY_BUF(l,buf,len,0x20);
4101
8.42M
      while (len + 10 > buf_size) {
4102
53.0k
          growBuffer(buf, 10);
4103
53.0k
      }
4104
8.40M
        }
4105
8.81M
        in_space = 1;
4106
8.81M
    }
4107
57.3M
      } else {
4108
57.3M
          in_space = 0;
4109
57.3M
    COPY_BUF(l,buf,len,c);
4110
57.3M
    if (len + 10 > buf_size) {
4111
342k
        growBuffer(buf, 10);
4112
342k
    }
4113
57.3M
      }
4114
66.3M
      NEXTL(l);
4115
66.3M
  }
4116
67.5M
  GROW;
4117
67.5M
  c = CUR_CHAR(l);
4118
67.5M
        if (len > maxLength) {
4119
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4120
0
                           "AttValue length too long\n");
4121
0
            goto mem_error;
4122
0
        }
4123
67.5M
    }
4124
1.27M
    if (ctxt->instate == XML_PARSER_EOF)
4125
0
        goto error;
4126
4127
1.27M
    if ((in_space) && (normalize)) {
4128
24.4k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4129
11.1k
    }
4130
1.27M
    buf[len] = 0;
4131
1.27M
    if (RAW == '<') {
4132
219k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4133
1.05M
    } else if (RAW != limit) {
4134
195k
  if ((c != 0) && (!IS_CHAR(c))) {
4135
102k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4136
102k
         "invalid character in attribute value\n");
4137
102k
  } else {
4138
93.5k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4139
93.5k
         "AttValue: ' expected\n");
4140
93.5k
        }
4141
195k
    } else
4142
862k
  NEXT;
4143
4144
1.27M
    if (attlen != NULL) *attlen = len;
4145
1.27M
    return(buf);
4146
4147
0
mem_error:
4148
0
    xmlErrMemory(ctxt, NULL);
4149
0
error:
4150
0
    if (buf != NULL)
4151
0
        xmlFree(buf);
4152
0
    if (rep != NULL)
4153
0
        xmlFree(rep);
4154
0
    return(NULL);
4155
0
}
4156
4157
/**
4158
 * xmlParseAttValue:
4159
 * @ctxt:  an XML parser context
4160
 *
4161
 * DEPRECATED: Internal function, don't use.
4162
 *
4163
 * parse a value for an attribute
4164
 * Note: the parser won't do substitution of entities here, this
4165
 * will be handled later in xmlStringGetNodeList
4166
 *
4167
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4168
 *                   "'" ([^<&'] | Reference)* "'"
4169
 *
4170
 * 3.3.3 Attribute-Value Normalization:
4171
 * Before the value of an attribute is passed to the application or
4172
 * checked for validity, the XML processor must normalize it as follows:
4173
 * - a character reference is processed by appending the referenced
4174
 *   character to the attribute value
4175
 * - an entity reference is processed by recursively processing the
4176
 *   replacement text of the entity
4177
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4178
 *   appending #x20 to the normalized value, except that only a single
4179
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4180
 *   parsed entity or the literal entity value of an internal parsed entity
4181
 * - other characters are processed by appending them to the normalized value
4182
 * If the declared value is not CDATA, then the XML processor must further
4183
 * process the normalized attribute value by discarding any leading and
4184
 * trailing space (#x20) characters, and by replacing sequences of space
4185
 * (#x20) characters by a single space (#x20) character.
4186
 * All attributes for which no declaration has been read should be treated
4187
 * by a non-validating parser as if declared CDATA.
4188
 *
4189
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4190
 */
4191
4192
4193
xmlChar *
4194
6.73M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4195
6.73M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4196
6.73M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4197
6.73M
}
4198
4199
/**
4200
 * xmlParseSystemLiteral:
4201
 * @ctxt:  an XML parser context
4202
 *
4203
 * DEPRECATED: Internal function, don't use.
4204
 *
4205
 * parse an XML Literal
4206
 *
4207
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4208
 *
4209
 * Returns the SystemLiteral parsed or NULL
4210
 */
4211
4212
xmlChar *
4213
175k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4214
175k
    xmlChar *buf = NULL;
4215
175k
    int len = 0;
4216
175k
    int size = XML_PARSER_BUFFER_SIZE;
4217
175k
    int cur, l;
4218
175k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4219
62.5k
                    XML_MAX_TEXT_LENGTH :
4220
175k
                    XML_MAX_NAME_LENGTH;
4221
175k
    xmlChar stop;
4222
175k
    int state = ctxt->instate;
4223
175k
    int count = 0;
4224
4225
175k
    SHRINK;
4226
175k
    if (RAW == '"') {
4227
157k
        NEXT;
4228
157k
  stop = '"';
4229
157k
    } else if (RAW == '\'') {
4230
10.5k
        NEXT;
4231
10.5k
  stop = '\'';
4232
10.5k
    } else {
4233
7.39k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4234
7.39k
  return(NULL);
4235
7.39k
    }
4236
4237
168k
    buf = (xmlChar *) xmlMallocAtomic(size);
4238
168k
    if (buf == NULL) {
4239
0
        xmlErrMemory(ctxt, NULL);
4240
0
  return(NULL);
4241
0
    }
4242
168k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4243
168k
    cur = CUR_CHAR(l);
4244
8.16M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4245
7.99M
  if (len + 5 >= size) {
4246
7.71k
      xmlChar *tmp;
4247
4248
7.71k
      size *= 2;
4249
7.71k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4250
7.71k
      if (tmp == NULL) {
4251
0
          xmlFree(buf);
4252
0
    xmlErrMemory(ctxt, NULL);
4253
0
    ctxt->instate = (xmlParserInputState) state;
4254
0
    return(NULL);
4255
0
      }
4256
7.71k
      buf = tmp;
4257
7.71k
  }
4258
7.99M
  count++;
4259
7.99M
  if (count > 50) {
4260
96.1k
      SHRINK;
4261
96.1k
      GROW;
4262
96.1k
      count = 0;
4263
96.1k
            if (ctxt->instate == XML_PARSER_EOF) {
4264
0
          xmlFree(buf);
4265
0
    return(NULL);
4266
0
            }
4267
96.1k
  }
4268
7.99M
  COPY_BUF(l,buf,len,cur);
4269
7.99M
  NEXTL(l);
4270
7.99M
  cur = CUR_CHAR(l);
4271
7.99M
  if (cur == 0) {
4272
2.69k
      GROW;
4273
2.69k
      SHRINK;
4274
2.69k
      cur = CUR_CHAR(l);
4275
2.69k
  }
4276
7.99M
        if (len > maxLength) {
4277
22
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4278
22
            xmlFree(buf);
4279
22
            ctxt->instate = (xmlParserInputState) state;
4280
22
            return(NULL);
4281
22
        }
4282
7.99M
    }
4283
168k
    buf[len] = 0;
4284
168k
    ctxt->instate = (xmlParserInputState) state;
4285
168k
    if (!IS_CHAR(cur)) {
4286
3.78k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4287
164k
    } else {
4288
164k
  NEXT;
4289
164k
    }
4290
168k
    return(buf);
4291
168k
}
4292
4293
/**
4294
 * xmlParsePubidLiteral:
4295
 * @ctxt:  an XML parser context
4296
 *
4297
 * DEPRECATED: Internal function, don't use.
4298
 *
4299
 * parse an XML public literal
4300
 *
4301
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4302
 *
4303
 * Returns the PubidLiteral parsed or NULL.
4304
 */
4305
4306
xmlChar *
4307
59.0k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4308
59.0k
    xmlChar *buf = NULL;
4309
59.0k
    int len = 0;
4310
59.0k
    int size = XML_PARSER_BUFFER_SIZE;
4311
59.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4312
22.8k
                    XML_MAX_TEXT_LENGTH :
4313
59.0k
                    XML_MAX_NAME_LENGTH;
4314
59.0k
    xmlChar cur;
4315
59.0k
    xmlChar stop;
4316
59.0k
    int count = 0;
4317
59.0k
    xmlParserInputState oldstate = ctxt->instate;
4318
4319
59.0k
    SHRINK;
4320
59.0k
    if (RAW == '"') {
4321
50.8k
        NEXT;
4322
50.8k
  stop = '"';
4323
50.8k
    } else if (RAW == '\'') {
4324
7.33k
        NEXT;
4325
7.33k
  stop = '\'';
4326
7.33k
    } else {
4327
835
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4328
835
  return(NULL);
4329
835
    }
4330
58.2k
    buf = (xmlChar *) xmlMallocAtomic(size);
4331
58.2k
    if (buf == NULL) {
4332
0
  xmlErrMemory(ctxt, NULL);
4333
0
  return(NULL);
4334
0
    }
4335
58.2k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4336
58.2k
    cur = CUR;
4337
3.75M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4338
3.69M
  if (len + 1 >= size) {
4339
5.05k
      xmlChar *tmp;
4340
4341
5.05k
      size *= 2;
4342
5.05k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4343
5.05k
      if (tmp == NULL) {
4344
0
    xmlErrMemory(ctxt, NULL);
4345
0
    xmlFree(buf);
4346
0
    return(NULL);
4347
0
      }
4348
5.05k
      buf = tmp;
4349
5.05k
  }
4350
3.69M
  buf[len++] = cur;
4351
3.69M
  count++;
4352
3.69M
  if (count > 50) {
4353
46.4k
      SHRINK;
4354
46.4k
      GROW;
4355
46.4k
      count = 0;
4356
46.4k
            if (ctxt->instate == XML_PARSER_EOF) {
4357
0
    xmlFree(buf);
4358
0
    return(NULL);
4359
0
            }
4360
46.4k
  }
4361
3.69M
  NEXT;
4362
3.69M
  cur = CUR;
4363
3.69M
  if (cur == 0) {
4364
1.29k
      GROW;
4365
1.29k
      SHRINK;
4366
1.29k
      cur = CUR;
4367
1.29k
  }
4368
3.69M
        if (len > maxLength) {
4369
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4370
0
            xmlFree(buf);
4371
0
            return(NULL);
4372
0
        }
4373
3.69M
    }
4374
58.2k
    buf[len] = 0;
4375
58.2k
    if (cur != stop) {
4376
4.37k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4377
53.8k
    } else {
4378
53.8k
  NEXT;
4379
53.8k
    }
4380
58.2k
    ctxt->instate = oldstate;
4381
58.2k
    return(buf);
4382
58.2k
}
4383
4384
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4385
4386
/*
4387
 * used for the test in the inner loop of the char data testing
4388
 */
4389
static const unsigned char test_char_data[256] = {
4390
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4392
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4395
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4396
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4397
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4398
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4399
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4400
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4401
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4402
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4403
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4404
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4405
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4406
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4407
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4416
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4417
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4418
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4420
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4422
};
4423
4424
/**
4425
 * xmlParseCharData:
4426
 * @ctxt:  an XML parser context
4427
 * @cdata:  int indicating whether we are within a CDATA section
4428
 *
4429
 * DEPRECATED: Internal function, don't use.
4430
 *
4431
 * parse a CharData section.
4432
 * if we are within a CDATA section ']]>' marks an end of section.
4433
 *
4434
 * The right angle bracket (>) may be represented using the string "&gt;",
4435
 * and must, for compatibility, be escaped using "&gt;" or a character
4436
 * reference when it appears in the string "]]>" in content, when that
4437
 * string is not marking the end of a CDATA section.
4438
 *
4439
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4440
 */
4441
4442
void
4443
28.6M
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4444
28.6M
    const xmlChar *in;
4445
28.6M
    int nbchar = 0;
4446
28.6M
    int line = ctxt->input->line;
4447
28.6M
    int col = ctxt->input->col;
4448
28.6M
    int ccol;
4449
4450
28.6M
    SHRINK;
4451
28.6M
    GROW;
4452
    /*
4453
     * Accelerated common case where input don't need to be
4454
     * modified before passing it to the handler.
4455
     */
4456
28.6M
    if (!cdata) {
4457
28.6M
  in = ctxt->input->cur;
4458
29.1M
  do {
4459
39.8M
get_more_space:
4460
114M
      while (*in == 0x20) { in++; ctxt->input->col++; }
4461
39.8M
      if (*in == 0xA) {
4462
11.1M
    do {
4463
11.1M
        ctxt->input->line++; ctxt->input->col = 1;
4464
11.1M
        in++;
4465
11.1M
    } while (*in == 0xA);
4466
10.6M
    goto get_more_space;
4467
10.6M
      }
4468
29.1M
      if (*in == '<') {
4469
9.08M
    nbchar = in - ctxt->input->cur;
4470
9.08M
    if (nbchar > 0) {
4471
9.04M
        const xmlChar *tmp = ctxt->input->cur;
4472
9.04M
        ctxt->input->cur = in;
4473
4474
9.04M
        if ((ctxt->sax != NULL) &&
4475
9.04M
            (ctxt->sax->ignorableWhitespace !=
4476
9.04M
             ctxt->sax->characters)) {
4477
4.36M
      if (areBlanks(ctxt, tmp, nbchar, 1)) {
4478
2.10M
          if (ctxt->sax->ignorableWhitespace != NULL)
4479
2.10M
        ctxt->sax->ignorableWhitespace(ctxt->userData,
4480
2.10M
                   tmp, nbchar);
4481
2.25M
      } else {
4482
2.25M
          if (ctxt->sax->characters != NULL)
4483
2.25M
        ctxt->sax->characters(ctxt->userData,
4484
2.25M
                  tmp, nbchar);
4485
2.25M
          if (*ctxt->space == -1)
4486
854k
              *ctxt->space = -2;
4487
2.25M
      }
4488
4.68M
        } else if ((ctxt->sax != NULL) &&
4489
4.68M
                   (ctxt->sax->characters != NULL)) {
4490
4.68M
      ctxt->sax->characters(ctxt->userData,
4491
4.68M
                tmp, nbchar);
4492
4.68M
        }
4493
9.04M
    }
4494
9.08M
    return;
4495
9.08M
      }
4496
4497
29.7M
get_more:
4498
29.7M
            ccol = ctxt->input->col;
4499
447M
      while (test_char_data[*in]) {
4500
417M
    in++;
4501
417M
    ccol++;
4502
417M
      }
4503
29.7M
      ctxt->input->col = ccol;
4504
29.7M
      if (*in == 0xA) {
4505
9.43M
    do {
4506
9.43M
        ctxt->input->line++; ctxt->input->col = 1;
4507
9.43M
        in++;
4508
9.43M
    } while (*in == 0xA);
4509
8.98M
    goto get_more;
4510
8.98M
      }
4511
20.8M
      if (*in == ']') {
4512
752k
    if ((in[1] == ']') && (in[2] == '>')) {
4513
46.3k
        xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4514
46.3k
        ctxt->input->cur = in + 1;
4515
46.3k
        return;
4516
46.3k
    }
4517
706k
    in++;
4518
706k
    ctxt->input->col++;
4519
706k
    goto get_more;
4520
752k
      }
4521
20.0M
      nbchar = in - ctxt->input->cur;
4522
20.0M
      if (nbchar > 0) {
4523
16.3M
    if ((ctxt->sax != NULL) &&
4524
16.3M
        (ctxt->sax->ignorableWhitespace !=
4525
16.3M
         ctxt->sax->characters) &&
4526
16.3M
        (IS_BLANK_CH(*ctxt->input->cur))) {
4527
3.19M
        const xmlChar *tmp = ctxt->input->cur;
4528
3.19M
        ctxt->input->cur = in;
4529
4530
3.19M
        if (areBlanks(ctxt, tmp, nbchar, 0)) {
4531
220k
            if (ctxt->sax->ignorableWhitespace != NULL)
4532
220k
          ctxt->sax->ignorableWhitespace(ctxt->userData,
4533
220k
                 tmp, nbchar);
4534
2.97M
        } else {
4535
2.97M
            if (ctxt->sax->characters != NULL)
4536
2.97M
          ctxt->sax->characters(ctxt->userData,
4537
2.97M
              tmp, nbchar);
4538
2.97M
      if (*ctxt->space == -1)
4539
1.79M
          *ctxt->space = -2;
4540
2.97M
        }
4541
3.19M
                    line = ctxt->input->line;
4542
3.19M
                    col = ctxt->input->col;
4543
13.1M
    } else if (ctxt->sax != NULL) {
4544
13.1M
        if (ctxt->sax->characters != NULL)
4545
13.1M
      ctxt->sax->characters(ctxt->userData,
4546
13.1M
                ctxt->input->cur, nbchar);
4547
13.1M
                    line = ctxt->input->line;
4548
13.1M
                    col = ctxt->input->col;
4549
13.1M
    }
4550
                /* something really bad happened in the SAX callback */
4551
16.3M
                if (ctxt->instate != XML_PARSER_CONTENT)
4552
0
                    return;
4553
16.3M
      }
4554
20.0M
      ctxt->input->cur = in;
4555
20.0M
      if (*in == 0xD) {
4556
757k
    in++;
4557
757k
    if (*in == 0xA) {
4558
599k
        ctxt->input->cur = in;
4559
599k
        in++;
4560
599k
        ctxt->input->line++; ctxt->input->col = 1;
4561
599k
        continue; /* while */
4562
599k
    }
4563
157k
    in--;
4564
157k
      }
4565
19.4M
      if (*in == '<') {
4566
8.59M
    return;
4567
8.59M
      }
4568
10.8M
      if (*in == '&') {
4569
4.17M
    return;
4570
4.17M
      }
4571
6.69M
      SHRINK;
4572
6.69M
      GROW;
4573
6.69M
            if (ctxt->instate == XML_PARSER_EOF)
4574
0
    return;
4575
6.69M
      in = ctxt->input->cur;
4576
7.29M
  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4577
6.70M
  nbchar = 0;
4578
6.70M
    }
4579
6.70M
    ctxt->input->line = line;
4580
6.70M
    ctxt->input->col = col;
4581
6.70M
    xmlParseCharDataComplex(ctxt, cdata);
4582
6.70M
}
4583
4584
/**
4585
 * xmlParseCharDataComplex:
4586
 * @ctxt:  an XML parser context
4587
 * @cdata:  int indicating whether we are within a CDATA section
4588
 *
4589
 * parse a CharData section.this is the fallback function
4590
 * of xmlParseCharData() when the parsing requires handling
4591
 * of non-ASCII characters.
4592
 */
4593
static void
4594
6.70M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4595
6.70M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4596
6.70M
    int nbchar = 0;
4597
6.70M
    int cur, l;
4598
6.70M
    int count = 0;
4599
4600
6.70M
    SHRINK;
4601
6.70M
    GROW;
4602
6.70M
    cur = CUR_CHAR(l);
4603
174M
    while ((cur != '<') && /* checked */
4604
174M
           (cur != '&') &&
4605
174M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4606
167M
  if ((cur == ']') && (NXT(1) == ']') &&
4607
167M
      (NXT(2) == '>')) {
4608
36.9k
      if (cdata) break;
4609
36.9k
      else {
4610
36.9k
    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4611
36.9k
      }
4612
36.9k
  }
4613
167M
  COPY_BUF(l,buf,nbchar,cur);
4614
  /* move current position before possible calling of ctxt->sax->characters */
4615
167M
  NEXTL(l);
4616
167M
  cur = CUR_CHAR(l);
4617
167M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4618
240k
      buf[nbchar] = 0;
4619
4620
      /*
4621
       * OK the segment is to be consumed as chars.
4622
       */
4623
240k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4624
55.3k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4625
688
        if (ctxt->sax->ignorableWhitespace != NULL)
4626
688
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4627
688
                                     buf, nbchar);
4628
54.6k
    } else {
4629
54.6k
        if (ctxt->sax->characters != NULL)
4630
54.6k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4631
54.6k
        if ((ctxt->sax->characters !=
4632
54.6k
             ctxt->sax->ignorableWhitespace) &&
4633
54.6k
      (*ctxt->space == -1))
4634
3.46k
      *ctxt->space = -2;
4635
54.6k
    }
4636
55.3k
      }
4637
240k
      nbchar = 0;
4638
            /* something really bad happened in the SAX callback */
4639
240k
            if (ctxt->instate != XML_PARSER_CONTENT)
4640
0
                return;
4641
240k
  }
4642
167M
  count++;
4643
167M
  if (count > 50) {
4644
2.36M
      SHRINK;
4645
2.36M
      GROW;
4646
2.36M
      count = 0;
4647
2.36M
            if (ctxt->instate == XML_PARSER_EOF)
4648
0
    return;
4649
2.36M
  }
4650
167M
    }
4651
6.70M
    if (nbchar != 0) {
4652
3.16M
        buf[nbchar] = 0;
4653
  /*
4654
   * OK the segment is to be consumed as chars.
4655
   */
4656
3.16M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4657
646k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4658
23.2k
    if (ctxt->sax->ignorableWhitespace != NULL)
4659
23.2k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4660
623k
      } else {
4661
623k
    if (ctxt->sax->characters != NULL)
4662
623k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4663
623k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4664
623k
        (*ctxt->space == -1))
4665
156k
        *ctxt->space = -2;
4666
623k
      }
4667
646k
  }
4668
3.16M
    }
4669
6.70M
    if ((cur != 0) && (!IS_CHAR(cur))) {
4670
  /* Generate the error and skip the offending character */
4671
3.61M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4672
3.61M
                          "PCDATA invalid Char value %d\n",
4673
3.61M
                    cur);
4674
3.61M
  NEXTL(l);
4675
3.61M
    }
4676
6.70M
}
4677
4678
/**
4679
 * xmlParseExternalID:
4680
 * @ctxt:  an XML parser context
4681
 * @publicID:  a xmlChar** receiving PubidLiteral
4682
 * @strict: indicate whether we should restrict parsing to only
4683
 *          production [75], see NOTE below
4684
 *
4685
 * DEPRECATED: Internal function, don't use.
4686
 *
4687
 * Parse an External ID or a Public ID
4688
 *
4689
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4690
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4691
 *
4692
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4693
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4694
 *
4695
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4696
 *
4697
 * Returns the function returns SystemLiteral and in the second
4698
 *                case publicID receives PubidLiteral, is strict is off
4699
 *                it is possible to return NULL and have publicID set.
4700
 */
4701
4702
xmlChar *
4703
427k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4704
427k
    xmlChar *URI = NULL;
4705
4706
427k
    SHRINK;
4707
4708
427k
    *publicID = NULL;
4709
427k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4710
120k
        SKIP(6);
4711
120k
  if (SKIP_BLANKS == 0) {
4712
633
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4713
633
                     "Space required after 'SYSTEM'\n");
4714
633
  }
4715
120k
  URI = xmlParseSystemLiteral(ctxt);
4716
120k
  if (URI == NULL) {
4717
973
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4718
973
        }
4719
306k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4720
59.0k
        SKIP(6);
4721
59.0k
  if (SKIP_BLANKS == 0) {
4722
752
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4723
752
        "Space required after 'PUBLIC'\n");
4724
752
  }
4725
59.0k
  *publicID = xmlParsePubidLiteral(ctxt);
4726
59.0k
  if (*publicID == NULL) {
4727
835
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4728
835
  }
4729
59.0k
  if (strict) {
4730
      /*
4731
       * We don't handle [83] so "S SystemLiteral" is required.
4732
       */
4733
54.6k
      if (SKIP_BLANKS == 0) {
4734
6.20k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4735
6.20k
      "Space required after the Public Identifier\n");
4736
6.20k
      }
4737
54.6k
  } else {
4738
      /*
4739
       * We handle [83] so we return immediately, if
4740
       * "S SystemLiteral" is not detected. We skip blanks if no
4741
             * system literal was found, but this is harmless since we must
4742
             * be at the end of a NotationDecl.
4743
       */
4744
4.42k
      if (SKIP_BLANKS == 0) return(NULL);
4745
744
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4746
744
  }
4747
54.9k
  URI = xmlParseSystemLiteral(ctxt);
4748
54.9k
  if (URI == NULL) {
4749
6.44k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4750
6.44k
        }
4751
54.9k
    }
4752
423k
    return(URI);
4753
427k
}
4754
4755
/**
4756
 * xmlParseCommentComplex:
4757
 * @ctxt:  an XML parser context
4758
 * @buf:  the already parsed part of the buffer
4759
 * @len:  number of bytes in the buffer
4760
 * @size:  allocated size of the buffer
4761
 *
4762
 * Skip an XML (SGML) comment <!-- .... -->
4763
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4764
 *  must not occur within comments. "
4765
 * This is the slow routine in case the accelerator for ascii didn't work
4766
 *
4767
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4768
 */
4769
static void
4770
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4771
370k
                       size_t len, size_t size) {
4772
370k
    int q, ql;
4773
370k
    int r, rl;
4774
370k
    int cur, l;
4775
370k
    size_t count = 0;
4776
370k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4777
306k
                       XML_MAX_HUGE_LENGTH :
4778
370k
                       XML_MAX_TEXT_LENGTH;
4779
370k
    int inputid;
4780
4781
370k
    inputid = ctxt->input->id;
4782
4783
370k
    if (buf == NULL) {
4784
22.3k
        len = 0;
4785
22.3k
  size = XML_PARSER_BUFFER_SIZE;
4786
22.3k
  buf = (xmlChar *) xmlMallocAtomic(size);
4787
22.3k
  if (buf == NULL) {
4788
0
      xmlErrMemory(ctxt, NULL);
4789
0
      return;
4790
0
  }
4791
22.3k
    }
4792
370k
    GROW; /* Assure there's enough input data */
4793
370k
    q = CUR_CHAR(ql);
4794
370k
    if (q == 0)
4795
70.8k
        goto not_terminated;
4796
299k
    if (!IS_CHAR(q)) {
4797
43.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4798
43.6k
                          "xmlParseComment: invalid xmlChar value %d\n",
4799
43.6k
                    q);
4800
43.6k
  xmlFree (buf);
4801
43.6k
  return;
4802
43.6k
    }
4803
255k
    NEXTL(ql);
4804
255k
    r = CUR_CHAR(rl);
4805
255k
    if (r == 0)
4806
10.5k
        goto not_terminated;
4807
245k
    if (!IS_CHAR(r)) {
4808
4.45k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4809
4.45k
                          "xmlParseComment: invalid xmlChar value %d\n",
4810
4.45k
                    q);
4811
4.45k
  xmlFree (buf);
4812
4.45k
  return;
4813
4.45k
    }
4814
240k
    NEXTL(rl);
4815
240k
    cur = CUR_CHAR(l);
4816
240k
    if (cur == 0)
4817
13.3k
        goto not_terminated;
4818
31.6M
    while (IS_CHAR(cur) && /* checked */
4819
31.6M
           ((cur != '>') ||
4820
31.5M
      (r != '-') || (q != '-'))) {
4821
31.4M
  if ((r == '-') && (q == '-')) {
4822
52.4k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4823
52.4k
  }
4824
31.4M
  if (len + 5 >= size) {
4825
117k
      xmlChar *new_buf;
4826
117k
            size_t new_size;
4827
4828
117k
      new_size = size * 2;
4829
117k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4830
117k
      if (new_buf == NULL) {
4831
0
    xmlFree (buf);
4832
0
    xmlErrMemory(ctxt, NULL);
4833
0
    return;
4834
0
      }
4835
117k
      buf = new_buf;
4836
117k
            size = new_size;
4837
117k
  }
4838
31.4M
  COPY_BUF(ql,buf,len,q);
4839
31.4M
  q = r;
4840
31.4M
  ql = rl;
4841
31.4M
  r = cur;
4842
31.4M
  rl = l;
4843
4844
31.4M
  count++;
4845
31.4M
  if (count > 50) {
4846
532k
      SHRINK;
4847
532k
      GROW;
4848
532k
      count = 0;
4849
532k
            if (ctxt->instate == XML_PARSER_EOF) {
4850
0
    xmlFree(buf);
4851
0
    return;
4852
0
            }
4853
532k
  }
4854
31.4M
  NEXTL(l);
4855
31.4M
  cur = CUR_CHAR(l);
4856
31.4M
  if (cur == 0) {
4857
93.5k
      SHRINK;
4858
93.5k
      GROW;
4859
93.5k
      cur = CUR_CHAR(l);
4860
93.5k
  }
4861
4862
31.4M
        if (len > maxLength) {
4863
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4864
0
                         "Comment too big found", NULL);
4865
0
            xmlFree (buf);
4866
0
            return;
4867
0
        }
4868
31.4M
    }
4869
227k
    buf[len] = 0;
4870
227k
    if (cur == 0) {
4871
93.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872
93.5k
                       "Comment not terminated \n<!--%.50s\n", buf);
4873
134k
    } else if (!IS_CHAR(cur)) {
4874
52.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4875
52.7k
                          "xmlParseComment: invalid xmlChar value %d\n",
4876
52.7k
                    cur);
4877
81.3k
    } else {
4878
81.3k
  if (inputid != ctxt->input->id) {
4879
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4880
0
               "Comment doesn't start and stop in the same"
4881
0
                           " entity\n");
4882
0
  }
4883
81.3k
        NEXT;
4884
81.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4885
81.3k
      (!ctxt->disableSAX))
4886
30.2k
      ctxt->sax->comment(ctxt->userData, buf);
4887
81.3k
    }
4888
227k
    xmlFree(buf);
4889
227k
    return;
4890
94.7k
not_terminated:
4891
94.7k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4892
94.7k
       "Comment not terminated\n", NULL);
4893
94.7k
    xmlFree(buf);
4894
94.7k
    return;
4895
227k
}
4896
4897
/**
4898
 * xmlParseComment:
4899
 * @ctxt:  an XML parser context
4900
 *
4901
 * DEPRECATED: Internal function, don't use.
4902
 *
4903
 * Skip an XML (SGML) comment <!-- .... -->
4904
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4905
 *  must not occur within comments. "
4906
 *
4907
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4908
 */
4909
void
4910
1.16M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4911
1.16M
    xmlChar *buf = NULL;
4912
1.16M
    size_t size = XML_PARSER_BUFFER_SIZE;
4913
1.16M
    size_t len = 0;
4914
1.16M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4915
804k
                       XML_MAX_HUGE_LENGTH :
4916
1.16M
                       XML_MAX_TEXT_LENGTH;
4917
1.16M
    xmlParserInputState state;
4918
1.16M
    const xmlChar *in;
4919
1.16M
    size_t nbchar = 0;
4920
1.16M
    int ccol;
4921
1.16M
    int inputid;
4922
4923
    /*
4924
     * Check that there is a comment right here.
4925
     */
4926
1.16M
    if ((RAW != '<') || (NXT(1) != '!') ||
4927
1.16M
        (NXT(2) != '-') || (NXT(3) != '-')) return;
4928
1.16M
    state = ctxt->instate;
4929
1.16M
    ctxt->instate = XML_PARSER_COMMENT;
4930
1.16M
    inputid = ctxt->input->id;
4931
1.16M
    SKIP(4);
4932
1.16M
    SHRINK;
4933
1.16M
    GROW;
4934
4935
    /*
4936
     * Accelerated common case where input don't need to be
4937
     * modified before passing it to the handler.
4938
     */
4939
1.16M
    in = ctxt->input->cur;
4940
1.16M
    do {
4941
1.16M
  if (*in == 0xA) {
4942
101k
      do {
4943
101k
    ctxt->input->line++; ctxt->input->col = 1;
4944
101k
    in++;
4945
101k
      } while (*in == 0xA);
4946
93.6k
  }
4947
2.58M
get_more:
4948
2.58M
        ccol = ctxt->input->col;
4949
66.4M
  while (((*in > '-') && (*in <= 0x7F)) ||
4950
66.4M
         ((*in >= 0x20) && (*in < '-')) ||
4951
66.4M
         (*in == 0x09)) {
4952
63.8M
        in++;
4953
63.8M
        ccol++;
4954
63.8M
  }
4955
2.58M
  ctxt->input->col = ccol;
4956
2.58M
  if (*in == 0xA) {
4957
876k
      do {
4958
876k
    ctxt->input->line++; ctxt->input->col = 1;
4959
876k
    in++;
4960
876k
      } while (*in == 0xA);
4961
852k
      goto get_more;
4962
852k
  }
4963
1.73M
  nbchar = in - ctxt->input->cur;
4964
  /*
4965
   * save current set of data
4966
   */
4967
1.73M
  if (nbchar > 0) {
4968
1.69M
      if ((ctxt->sax != NULL) &&
4969
1.69M
    (ctxt->sax->comment != NULL)) {
4970
1.69M
    if (buf == NULL) {
4971
1.13M
        if ((*in == '-') && (in[1] == '-'))
4972
706k
            size = nbchar + 1;
4973
425k
        else
4974
425k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4975
1.13M
        buf = (xmlChar *) xmlMallocAtomic(size);
4976
1.13M
        if (buf == NULL) {
4977
0
            xmlErrMemory(ctxt, NULL);
4978
0
      ctxt->instate = state;
4979
0
      return;
4980
0
        }
4981
1.13M
        len = 0;
4982
1.13M
    } else if (len + nbchar + 1 >= size) {
4983
93.9k
        xmlChar *new_buf;
4984
93.9k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4985
93.9k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4986
93.9k
        if (new_buf == NULL) {
4987
0
            xmlFree (buf);
4988
0
      xmlErrMemory(ctxt, NULL);
4989
0
      ctxt->instate = state;
4990
0
      return;
4991
0
        }
4992
93.9k
        buf = new_buf;
4993
93.9k
    }
4994
1.69M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4995
1.69M
    len += nbchar;
4996
1.69M
    buf[len] = 0;
4997
1.69M
      }
4998
1.69M
  }
4999
1.73M
        if (len > maxLength) {
5000
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5001
0
                         "Comment too big found", NULL);
5002
0
            xmlFree (buf);
5003
0
            return;
5004
0
        }
5005
1.73M
  ctxt->input->cur = in;
5006
1.73M
  if (*in == 0xA) {
5007
0
      in++;
5008
0
      ctxt->input->line++; ctxt->input->col = 1;
5009
0
  }
5010
1.73M
  if (*in == 0xD) {
5011
58.9k
      in++;
5012
58.9k
      if (*in == 0xA) {
5013
27.2k
    ctxt->input->cur = in;
5014
27.2k
    in++;
5015
27.2k
    ctxt->input->line++; ctxt->input->col = 1;
5016
27.2k
    goto get_more;
5017
27.2k
      }
5018
31.7k
      in--;
5019
31.7k
  }
5020
1.70M
  SHRINK;
5021
1.70M
  GROW;
5022
1.70M
        if (ctxt->instate == XML_PARSER_EOF) {
5023
0
            xmlFree(buf);
5024
0
            return;
5025
0
        }
5026
1.70M
  in = ctxt->input->cur;
5027
1.70M
  if (*in == '-') {
5028
1.33M
      if (in[1] == '-') {
5029
861k
          if (in[2] == '>') {
5030
795k
        if (ctxt->input->id != inputid) {
5031
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5032
0
                     "comment doesn't start and stop in the"
5033
0
                                       " same entity\n");
5034
0
        }
5035
795k
        SKIP(3);
5036
795k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5037
795k
            (!ctxt->disableSAX)) {
5038
335k
      if (buf != NULL)
5039
331k
          ctxt->sax->comment(ctxt->userData, buf);
5040
4.10k
      else
5041
4.10k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5042
335k
        }
5043
795k
        if (buf != NULL)
5044
783k
            xmlFree(buf);
5045
795k
        if (ctxt->instate != XML_PARSER_EOF)
5046
795k
      ctxt->instate = state;
5047
795k
        return;
5048
795k
    }
5049
66.0k
    if (buf != NULL) {
5050
61.0k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5051
61.0k
                          "Double hyphen within comment: "
5052
61.0k
                                      "<!--%.50s\n",
5053
61.0k
              buf);
5054
61.0k
    } else
5055
4.98k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5056
4.98k
                          "Double hyphen within comment\n", NULL);
5057
66.0k
                if (ctxt->instate == XML_PARSER_EOF) {
5058
0
                    xmlFree(buf);
5059
0
                    return;
5060
0
                }
5061
66.0k
    in++;
5062
66.0k
    ctxt->input->col++;
5063
66.0k
      }
5064
539k
      in++;
5065
539k
      ctxt->input->col++;
5066
539k
      goto get_more;
5067
1.33M
  }
5068
1.70M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5069
370k
    xmlParseCommentComplex(ctxt, buf, len, size);
5070
370k
    ctxt->instate = state;
5071
370k
    return;
5072
1.16M
}
5073
5074
5075
/**
5076
 * xmlParsePITarget:
5077
 * @ctxt:  an XML parser context
5078
 *
5079
 * DEPRECATED: Internal function, don't use.
5080
 *
5081
 * parse the name of a PI
5082
 *
5083
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5084
 *
5085
 * Returns the PITarget name or NULL
5086
 */
5087
5088
const xmlChar *
5089
465k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5090
465k
    const xmlChar *name;
5091
5092
465k
    name = xmlParseName(ctxt);
5093
465k
    if ((name != NULL) &&
5094
465k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5095
465k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5096
465k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5097
66.2k
  int i;
5098
66.2k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5099
66.2k
      (name[2] == 'l') && (name[3] == 0)) {
5100
52.0k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5101
52.0k
     "XML declaration allowed only at the start of the document\n");
5102
52.0k
      return(name);
5103
52.0k
  } else if (name[3] == 0) {
5104
7.50k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5105
7.50k
      return(name);
5106
7.50k
  }
5107
20.2k
  for (i = 0;;i++) {
5108
20.2k
      if (xmlW3CPIs[i] == NULL) break;
5109
13.5k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5110
0
          return(name);
5111
13.5k
  }
5112
6.75k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5113
6.75k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5114
6.75k
          NULL, NULL);
5115
6.75k
    }
5116
406k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5117
25.3k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5118
25.3k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5119
25.3k
    }
5120
406k
    return(name);
5121
465k
}
5122
5123
#ifdef LIBXML_CATALOG_ENABLED
5124
/**
5125
 * xmlParseCatalogPI:
5126
 * @ctxt:  an XML parser context
5127
 * @catalog:  the PI value string
5128
 *
5129
 * parse an XML Catalog Processing Instruction.
5130
 *
5131
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5132
 *
5133
 * Occurs only if allowed by the user and if happening in the Misc
5134
 * part of the document before any doctype information
5135
 * This will add the given catalog to the parsing context in order
5136
 * to be used if there is a resolution need further down in the document
5137
 */
5138
5139
static void
5140
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5141
0
    xmlChar *URL = NULL;
5142
0
    const xmlChar *tmp, *base;
5143
0
    xmlChar marker;
5144
5145
0
    tmp = catalog;
5146
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5147
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5148
0
  goto error;
5149
0
    tmp += 7;
5150
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5151
0
    if (*tmp != '=') {
5152
0
  return;
5153
0
    }
5154
0
    tmp++;
5155
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5156
0
    marker = *tmp;
5157
0
    if ((marker != '\'') && (marker != '"'))
5158
0
  goto error;
5159
0
    tmp++;
5160
0
    base = tmp;
5161
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5162
0
    if (*tmp == 0)
5163
0
  goto error;
5164
0
    URL = xmlStrndup(base, tmp - base);
5165
0
    tmp++;
5166
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5167
0
    if (*tmp != 0)
5168
0
  goto error;
5169
5170
0
    if (URL != NULL) {
5171
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5172
0
  xmlFree(URL);
5173
0
    }
5174
0
    return;
5175
5176
0
error:
5177
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5178
0
            "Catalog PI syntax error: %s\n",
5179
0
      catalog, NULL);
5180
0
    if (URL != NULL)
5181
0
  xmlFree(URL);
5182
0
}
5183
#endif
5184
5185
/**
5186
 * xmlParsePI:
5187
 * @ctxt:  an XML parser context
5188
 *
5189
 * DEPRECATED: Internal function, don't use.
5190
 *
5191
 * parse an XML Processing Instruction.
5192
 *
5193
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5194
 *
5195
 * The processing is transferred to SAX once parsed.
5196
 */
5197
5198
void
5199
465k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5200
465k
    xmlChar *buf = NULL;
5201
465k
    size_t len = 0;
5202
465k
    size_t size = XML_PARSER_BUFFER_SIZE;
5203
465k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5204
345k
                       XML_MAX_HUGE_LENGTH :
5205
465k
                       XML_MAX_TEXT_LENGTH;
5206
465k
    int cur, l;
5207
465k
    const xmlChar *target;
5208
465k
    xmlParserInputState state;
5209
465k
    int count = 0;
5210
5211
465k
    if ((RAW == '<') && (NXT(1) == '?')) {
5212
465k
  int inputid = ctxt->input->id;
5213
465k
  state = ctxt->instate;
5214
465k
        ctxt->instate = XML_PARSER_PI;
5215
  /*
5216
   * this is a Processing Instruction.
5217
   */
5218
465k
  SKIP(2);
5219
465k
  SHRINK;
5220
5221
  /*
5222
   * Parse the target name and check for special support like
5223
   * namespace.
5224
   */
5225
465k
        target = xmlParsePITarget(ctxt);
5226
465k
  if (target != NULL) {
5227
426k
      if ((RAW == '?') && (NXT(1) == '>')) {
5228
124k
    if (inputid != ctxt->input->id) {
5229
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5230
0
                             "PI declaration doesn't start and stop in"
5231
0
                                   " the same entity\n");
5232
0
    }
5233
124k
    SKIP(2);
5234
5235
    /*
5236
     * SAX: PI detected.
5237
     */
5238
124k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5239
124k
        (ctxt->sax->processingInstruction != NULL))
5240
66.6k
        ctxt->sax->processingInstruction(ctxt->userData,
5241
66.6k
                                         target, NULL);
5242
124k
    if (ctxt->instate != XML_PARSER_EOF)
5243
124k
        ctxt->instate = state;
5244
124k
    return;
5245
124k
      }
5246
301k
      buf = (xmlChar *) xmlMallocAtomic(size);
5247
301k
      if (buf == NULL) {
5248
0
    xmlErrMemory(ctxt, NULL);
5249
0
    ctxt->instate = state;
5250
0
    return;
5251
0
      }
5252
301k
      if (SKIP_BLANKS == 0) {
5253
107k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5254
107k
        "ParsePI: PI %s space expected\n", target);
5255
107k
      }
5256
301k
      cur = CUR_CHAR(l);
5257
20.8M
      while (IS_CHAR(cur) && /* checked */
5258
20.8M
       ((cur != '?') || (NXT(1) != '>'))) {
5259
20.5M
    if (len + 5 >= size) {
5260
61.8k
        xmlChar *tmp;
5261
61.8k
                    size_t new_size = size * 2;
5262
61.8k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5263
61.8k
        if (tmp == NULL) {
5264
0
      xmlErrMemory(ctxt, NULL);
5265
0
      xmlFree(buf);
5266
0
      ctxt->instate = state;
5267
0
      return;
5268
0
        }
5269
61.8k
        buf = tmp;
5270
61.8k
                    size = new_size;
5271
61.8k
    }
5272
20.5M
    count++;
5273
20.5M
    if (count > 50) {
5274
301k
        SHRINK;
5275
301k
        GROW;
5276
301k
                    if (ctxt->instate == XML_PARSER_EOF) {
5277
0
                        xmlFree(buf);
5278
0
                        return;
5279
0
                    }
5280
301k
        count = 0;
5281
301k
    }
5282
20.5M
    COPY_BUF(l,buf,len,cur);
5283
20.5M
    NEXTL(l);
5284
20.5M
    cur = CUR_CHAR(l);
5285
20.5M
    if (cur == 0) {
5286
97.9k
        SHRINK;
5287
97.9k
        GROW;
5288
97.9k
        cur = CUR_CHAR(l);
5289
97.9k
    }
5290
20.5M
                if (len > maxLength) {
5291
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5292
0
                                      "PI %s too big found", target);
5293
0
                    xmlFree(buf);
5294
0
                    ctxt->instate = state;
5295
0
                    return;
5296
0
                }
5297
20.5M
      }
5298
301k
      buf[len] = 0;
5299
301k
      if (cur != '?') {
5300
120k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5301
120k
          "ParsePI: PI %s never end ...\n", target);
5302
181k
      } else {
5303
181k
    if (inputid != ctxt->input->id) {
5304
43
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5305
43
                             "PI declaration doesn't start and stop in"
5306
43
                                   " the same entity\n");
5307
43
    }
5308
181k
    SKIP(2);
5309
5310
181k
#ifdef LIBXML_CATALOG_ENABLED
5311
181k
    if (((state == XML_PARSER_MISC) ||
5312
181k
               (state == XML_PARSER_START)) &&
5313
181k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5314
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5315
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5316
0
      (allow == XML_CATA_ALLOW_ALL))
5317
0
      xmlParseCatalogPI(ctxt, buf);
5318
0
    }
5319
181k
#endif
5320
5321
5322
    /*
5323
     * SAX: PI detected.
5324
     */
5325
181k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5326
181k
        (ctxt->sax->processingInstruction != NULL))
5327
100k
        ctxt->sax->processingInstruction(ctxt->userData,
5328
100k
                                         target, buf);
5329
181k
      }
5330
301k
      xmlFree(buf);
5331
301k
  } else {
5332
39.5k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5333
39.5k
  }
5334
341k
  if (ctxt->instate != XML_PARSER_EOF)
5335
341k
      ctxt->instate = state;
5336
341k
    }
5337
465k
}
5338
5339
/**
5340
 * xmlParseNotationDecl:
5341
 * @ctxt:  an XML parser context
5342
 *
5343
 * DEPRECATED: Internal function, don't use.
5344
 *
5345
 * parse a notation declaration
5346
 *
5347
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5348
 *
5349
 * Hence there is actually 3 choices:
5350
 *     'PUBLIC' S PubidLiteral
5351
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5352
 * and 'SYSTEM' S SystemLiteral
5353
 *
5354
 * See the NOTE on xmlParseExternalID().
5355
 */
5356
5357
void
5358
14.0k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5359
14.0k
    const xmlChar *name;
5360
14.0k
    xmlChar *Pubid;
5361
14.0k
    xmlChar *Systemid;
5362
5363
14.0k
    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5364
10.2k
  int inputid = ctxt->input->id;
5365
10.2k
  SHRINK;
5366
10.2k
  SKIP(10);
5367
10.2k
  if (SKIP_BLANKS == 0) {
5368
404
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5369
404
         "Space required after '<!NOTATION'\n");
5370
404
      return;
5371
404
  }
5372
5373
9.83k
        name = xmlParseName(ctxt);
5374
9.83k
  if (name == NULL) {
5375
491
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5376
491
      return;
5377
491
  }
5378
9.34k
  if (xmlStrchr(name, ':') != NULL) {
5379
487
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5380
487
         "colons are forbidden from notation names '%s'\n",
5381
487
         name, NULL, NULL);
5382
487
  }
5383
9.34k
  if (SKIP_BLANKS == 0) {
5384
746
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385
746
         "Space required after the NOTATION name'\n");
5386
746
      return;
5387
746
  }
5388
5389
  /*
5390
   * Parse the IDs.
5391
   */
5392
8.59k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5393
8.59k
  SKIP_BLANKS;
5394
5395
8.59k
  if (RAW == '>') {
5396
6.32k
      if (inputid != ctxt->input->id) {
5397
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5398
0
                         "Notation declaration doesn't start and stop"
5399
0
                               " in the same entity\n");
5400
0
      }
5401
6.32k
      NEXT;
5402
6.32k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5403
6.32k
    (ctxt->sax->notationDecl != NULL))
5404
5.13k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5405
6.32k
  } else {
5406
2.27k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5407
2.27k
  }
5408
8.59k
  if (Systemid != NULL) xmlFree(Systemid);
5409
8.59k
  if (Pubid != NULL) xmlFree(Pubid);
5410
8.59k
    }
5411
14.0k
}
5412
5413
/**
5414
 * xmlParseEntityDecl:
5415
 * @ctxt:  an XML parser context
5416
 *
5417
 * DEPRECATED: Internal function, don't use.
5418
 *
5419
 * parse <!ENTITY declarations
5420
 *
5421
 * [70] EntityDecl ::= GEDecl | PEDecl
5422
 *
5423
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5424
 *
5425
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5426
 *
5427
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5428
 *
5429
 * [74] PEDef ::= EntityValue | ExternalID
5430
 *
5431
 * [76] NDataDecl ::= S 'NDATA' S Name
5432
 *
5433
 * [ VC: Notation Declared ]
5434
 * The Name must match the declared name of a notation.
5435
 */
5436
5437
void
5438
497k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5439
497k
    const xmlChar *name = NULL;
5440
497k
    xmlChar *value = NULL;
5441
497k
    xmlChar *URI = NULL, *literal = NULL;
5442
497k
    const xmlChar *ndata = NULL;
5443
497k
    int isParameter = 0;
5444
497k
    xmlChar *orig = NULL;
5445
5446
    /* GROW; done in the caller */
5447
497k
    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5448
493k
  int inputid = ctxt->input->id;
5449
493k
  SHRINK;
5450
493k
  SKIP(8);
5451
493k
  if (SKIP_BLANKS == 0) {
5452
45.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5453
45.4k
         "Space required after '<!ENTITY'\n");
5454
45.4k
  }
5455
5456
493k
  if (RAW == '%') {
5457
41.6k
      NEXT;
5458
41.6k
      if (SKIP_BLANKS == 0) {
5459
1.30k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
1.30k
             "Space required after '%%'\n");
5461
1.30k
      }
5462
41.6k
      isParameter = 1;
5463
41.6k
  }
5464
5465
493k
        name = xmlParseName(ctxt);
5466
493k
  if (name == NULL) {
5467
38.4k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5468
38.4k
                     "xmlParseEntityDecl: no name\n");
5469
38.4k
            return;
5470
38.4k
  }
5471
454k
  if (xmlStrchr(name, ':') != NULL) {
5472
2.46k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5473
2.46k
         "colons are forbidden from entities names '%s'\n",
5474
2.46k
         name, NULL, NULL);
5475
2.46k
  }
5476
454k
  if (SKIP_BLANKS == 0) {
5477
18.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478
18.6k
         "Space required after the entity name\n");
5479
18.6k
  }
5480
5481
454k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5482
  /*
5483
   * handle the various case of definitions...
5484
   */
5485
454k
  if (isParameter) {
5486
41.2k
      if ((RAW == '"') || (RAW == '\'')) {
5487
31.8k
          value = xmlParseEntityValue(ctxt, &orig);
5488
31.8k
    if (value) {
5489
28.3k
        if ((ctxt->sax != NULL) &&
5490
28.3k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5491
25.2k
      ctxt->sax->entityDecl(ctxt->userData, name,
5492
25.2k
                        XML_INTERNAL_PARAMETER_ENTITY,
5493
25.2k
            NULL, NULL, value);
5494
28.3k
    }
5495
31.8k
      } else {
5496
9.36k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5497
9.36k
    if ((URI == NULL) && (literal == NULL)) {
5498
874
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5499
874
    }
5500
9.36k
    if (URI) {
5501
8.21k
        xmlURIPtr uri;
5502
5503
8.21k
        uri = xmlParseURI((const char *) URI);
5504
8.21k
        if (uri == NULL) {
5505
460
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5506
460
             "Invalid URI: %s\n", URI);
5507
      /*
5508
       * This really ought to be a well formedness error
5509
       * but the XML Core WG decided otherwise c.f. issue
5510
       * E26 of the XML erratas.
5511
       */
5512
7.75k
        } else {
5513
7.75k
      if (uri->fragment != NULL) {
5514
          /*
5515
           * Okay this is foolish to block those but not
5516
           * invalid URIs.
5517
           */
5518
106
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5519
7.65k
      } else {
5520
7.65k
          if ((ctxt->sax != NULL) &&
5521
7.65k
        (!ctxt->disableSAX) &&
5522
7.65k
        (ctxt->sax->entityDecl != NULL))
5523
7.09k
        ctxt->sax->entityDecl(ctxt->userData, name,
5524
7.09k
              XML_EXTERNAL_PARAMETER_ENTITY,
5525
7.09k
              literal, URI, NULL);
5526
7.65k
      }
5527
7.75k
      xmlFreeURI(uri);
5528
7.75k
        }
5529
8.21k
    }
5530
9.36k
      }
5531
413k
  } else {
5532
413k
      if ((RAW == '"') || (RAW == '\'')) {
5533
329k
          value = xmlParseEntityValue(ctxt, &orig);
5534
329k
    if ((ctxt->sax != NULL) &&
5535
329k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5536
274k
        ctxt->sax->entityDecl(ctxt->userData, name,
5537
274k
        XML_INTERNAL_GENERAL_ENTITY,
5538
274k
        NULL, NULL, value);
5539
    /*
5540
     * For expat compatibility in SAX mode.
5541
     */
5542
329k
    if ((ctxt->myDoc == NULL) ||
5543
329k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5544
10.6k
        if (ctxt->myDoc == NULL) {
5545
1.87k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5546
1.87k
      if (ctxt->myDoc == NULL) {
5547
0
          xmlErrMemory(ctxt, "New Doc failed");
5548
0
          return;
5549
0
      }
5550
1.87k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5551
1.87k
        }
5552
10.6k
        if (ctxt->myDoc->intSubset == NULL)
5553
1.87k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5554
1.87k
              BAD_CAST "fake", NULL, NULL);
5555
5556
10.6k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5557
10.6k
                    NULL, NULL, value);
5558
10.6k
    }
5559
329k
      } else {
5560
84.2k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5561
84.2k
    if ((URI == NULL) && (literal == NULL)) {
5562
20.5k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5563
20.5k
    }
5564
84.2k
    if (URI) {
5565
61.0k
        xmlURIPtr uri;
5566
5567
61.0k
        uri = xmlParseURI((const char *)URI);
5568
61.0k
        if (uri == NULL) {
5569
4.25k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5570
4.25k
             "Invalid URI: %s\n", URI);
5571
      /*
5572
       * This really ought to be a well formedness error
5573
       * but the XML Core WG decided otherwise c.f. issue
5574
       * E26 of the XML erratas.
5575
       */
5576
56.7k
        } else {
5577
56.7k
      if (uri->fragment != NULL) {
5578
          /*
5579
           * Okay this is foolish to block those but not
5580
           * invalid URIs.
5581
           */
5582
485
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5583
485
      }
5584
56.7k
      xmlFreeURI(uri);
5585
56.7k
        }
5586
61.0k
    }
5587
84.2k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5588
6.40k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5589
6.40k
           "Space required before 'NDATA'\n");
5590
6.40k
    }
5591
84.2k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5592
11.7k
        SKIP(5);
5593
11.7k
        if (SKIP_BLANKS == 0) {
5594
866
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5595
866
               "Space required after 'NDATA'\n");
5596
866
        }
5597
11.7k
        ndata = xmlParseName(ctxt);
5598
11.7k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5599
11.7k
            (ctxt->sax->unparsedEntityDecl != NULL))
5600
10.6k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5601
10.6k
            literal, URI, ndata);
5602
72.4k
    } else {
5603
72.4k
        if ((ctxt->sax != NULL) &&
5604
72.4k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5605
62.3k
      ctxt->sax->entityDecl(ctxt->userData, name,
5606
62.3k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5607
62.3k
            literal, URI, NULL);
5608
        /*
5609
         * For expat compatibility in SAX mode.
5610
         * assuming the entity replacement was asked for
5611
         */
5612
72.4k
        if ((ctxt->replaceEntities != 0) &&
5613
72.4k
      ((ctxt->myDoc == NULL) ||
5614
44.4k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5615
1.42k
      if (ctxt->myDoc == NULL) {
5616
255
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5617
255
          if (ctxt->myDoc == NULL) {
5618
0
              xmlErrMemory(ctxt, "New Doc failed");
5619
0
        return;
5620
0
          }
5621
255
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5622
255
      }
5623
5624
1.42k
      if (ctxt->myDoc->intSubset == NULL)
5625
255
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5626
255
            BAD_CAST "fake", NULL, NULL);
5627
1.42k
      xmlSAX2EntityDecl(ctxt, name,
5628
1.42k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5629
1.42k
                  literal, URI, NULL);
5630
1.42k
        }
5631
72.4k
    }
5632
84.2k
      }
5633
413k
  }
5634
454k
  if (ctxt->instate == XML_PARSER_EOF)
5635
0
      goto done;
5636
454k
  SKIP_BLANKS;
5637
454k
  if (RAW != '>') {
5638
14.5k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5639
14.5k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5640
14.5k
      xmlHaltParser(ctxt);
5641
440k
  } else {
5642
440k
      if (inputid != ctxt->input->id) {
5643
96
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5644
96
                         "Entity declaration doesn't start and stop in"
5645
96
                               " the same entity\n");
5646
96
      }
5647
440k
      NEXT;
5648
440k
  }
5649
454k
  if (orig != NULL) {
5650
      /*
5651
       * Ugly mechanism to save the raw entity value.
5652
       */
5653
352k
      xmlEntityPtr cur = NULL;
5654
5655
352k
      if (isParameter) {
5656
29.3k
          if ((ctxt->sax != NULL) &&
5657
29.3k
        (ctxt->sax->getParameterEntity != NULL))
5658
29.3k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5659
322k
      } else {
5660
322k
          if ((ctxt->sax != NULL) &&
5661
322k
        (ctxt->sax->getEntity != NULL))
5662
322k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5663
322k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5664
38.9k
        cur = xmlSAX2GetEntity(ctxt, name);
5665
38.9k
    }
5666
322k
      }
5667
352k
            if ((cur != NULL) && (cur->orig == NULL)) {
5668
290k
    cur->orig = orig;
5669
290k
                orig = NULL;
5670
290k
      }
5671
352k
  }
5672
5673
454k
done:
5674
454k
  if (value != NULL) xmlFree(value);
5675
454k
  if (URI != NULL) xmlFree(URI);
5676
454k
  if (literal != NULL) xmlFree(literal);
5677
454k
        if (orig != NULL) xmlFree(orig);
5678
454k
    }
5679
497k
}
5680
5681
/**
5682
 * xmlParseDefaultDecl:
5683
 * @ctxt:  an XML parser context
5684
 * @value:  Receive a possible fixed default value for the attribute
5685
 *
5686
 * DEPRECATED: Internal function, don't use.
5687
 *
5688
 * Parse an attribute default declaration
5689
 *
5690
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5691
 *
5692
 * [ VC: Required Attribute ]
5693
 * if the default declaration is the keyword #REQUIRED, then the
5694
 * attribute must be specified for all elements of the type in the
5695
 * attribute-list declaration.
5696
 *
5697
 * [ VC: Attribute Default Legal ]
5698
 * The declared default value must meet the lexical constraints of
5699
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5700
 *
5701
 * [ VC: Fixed Attribute Default ]
5702
 * if an attribute has a default value declared with the #FIXED
5703
 * keyword, instances of that attribute must match the default value.
5704
 *
5705
 * [ WFC: No < in Attribute Values ]
5706
 * handled in xmlParseAttValue()
5707
 *
5708
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5709
 *          or XML_ATTRIBUTE_FIXED.
5710
 */
5711
5712
int
5713
541k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5714
541k
    int val;
5715
541k
    xmlChar *ret;
5716
5717
541k
    *value = NULL;
5718
541k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5719
172k
  SKIP(9);
5720
172k
  return(XML_ATTRIBUTE_REQUIRED);
5721
172k
    }
5722
369k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5723
206k
  SKIP(8);
5724
206k
  return(XML_ATTRIBUTE_IMPLIED);
5725
206k
    }
5726
162k
    val = XML_ATTRIBUTE_NONE;
5727
162k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5728
35.5k
  SKIP(6);
5729
35.5k
  val = XML_ATTRIBUTE_FIXED;
5730
35.5k
  if (SKIP_BLANKS == 0) {
5731
412
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
412
         "Space required after '#FIXED'\n");
5733
412
  }
5734
35.5k
    }
5735
162k
    ret = xmlParseAttValue(ctxt);
5736
162k
    ctxt->instate = XML_PARSER_DTD;
5737
162k
    if (ret == NULL) {
5738
41.4k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5739
41.4k
           "Attribute default value declaration error\n");
5740
41.4k
    } else
5741
121k
        *value = ret;
5742
162k
    return(val);
5743
369k
}
5744
5745
/**
5746
 * xmlParseNotationType:
5747
 * @ctxt:  an XML parser context
5748
 *
5749
 * DEPRECATED: Internal function, don't use.
5750
 *
5751
 * parse an Notation attribute type.
5752
 *
5753
 * Note: the leading 'NOTATION' S part has already being parsed...
5754
 *
5755
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5756
 *
5757
 * [ VC: Notation Attributes ]
5758
 * Values of this type must match one of the notation names included
5759
 * in the declaration; all notation names in the declaration must be declared.
5760
 *
5761
 * Returns: the notation attribute tree built while parsing
5762
 */
5763
5764
xmlEnumerationPtr
5765
38.7k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5766
38.7k
    const xmlChar *name;
5767
38.7k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5768
5769
38.7k
    if (RAW != '(') {
5770
568
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5771
568
  return(NULL);
5772
568
    }
5773
38.1k
    SHRINK;
5774
49.2k
    do {
5775
49.2k
        NEXT;
5776
49.2k
  SKIP_BLANKS;
5777
49.2k
        name = xmlParseName(ctxt);
5778
49.2k
  if (name == NULL) {
5779
5.12k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5780
5.12k
         "Name expected in NOTATION declaration\n");
5781
5.12k
            xmlFreeEnumeration(ret);
5782
5.12k
      return(NULL);
5783
5.12k
  }
5784
44.1k
  tmp = ret;
5785
60.1k
  while (tmp != NULL) {
5786
18.2k
      if (xmlStrEqual(name, tmp->name)) {
5787
2.26k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5788
2.26k
    "standalone: attribute notation value token %s duplicated\n",
5789
2.26k
         name, NULL);
5790
2.26k
    if (!xmlDictOwns(ctxt->dict, name))
5791
0
        xmlFree((xmlChar *) name);
5792
2.26k
    break;
5793
2.26k
      }
5794
16.0k
      tmp = tmp->next;
5795
16.0k
  }
5796
44.1k
  if (tmp == NULL) {
5797
41.8k
      cur = xmlCreateEnumeration(name);
5798
41.8k
      if (cur == NULL) {
5799
0
                xmlFreeEnumeration(ret);
5800
0
                return(NULL);
5801
0
            }
5802
41.8k
      if (last == NULL) ret = last = cur;
5803
3.97k
      else {
5804
3.97k
    last->next = cur;
5805
3.97k
    last = cur;
5806
3.97k
      }
5807
41.8k
  }
5808
44.1k
  SKIP_BLANKS;
5809
44.1k
    } while (RAW == '|');
5810
33.0k
    if (RAW != ')') {
5811
1.72k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5812
1.72k
        xmlFreeEnumeration(ret);
5813
1.72k
  return(NULL);
5814
1.72k
    }
5815
31.3k
    NEXT;
5816
31.3k
    return(ret);
5817
33.0k
}
5818
5819
/**
5820
 * xmlParseEnumerationType:
5821
 * @ctxt:  an XML parser context
5822
 *
5823
 * DEPRECATED: Internal function, don't use.
5824
 *
5825
 * parse an Enumeration attribute type.
5826
 *
5827
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5828
 *
5829
 * [ VC: Enumeration ]
5830
 * Values of this type must match one of the Nmtoken tokens in
5831
 * the declaration
5832
 *
5833
 * Returns: the enumeration attribute tree built while parsing
5834
 */
5835
5836
xmlEnumerationPtr
5837
105k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5838
105k
    xmlChar *name;
5839
105k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5840
5841
105k
    if (RAW != '(') {
5842
35.9k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5843
35.9k
  return(NULL);
5844
35.9k
    }
5845
69.4k
    SHRINK;
5846
109k
    do {
5847
109k
        NEXT;
5848
109k
  SKIP_BLANKS;
5849
109k
        name = xmlParseNmtoken(ctxt);
5850
109k
  if (name == NULL) {
5851
3.79k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5852
3.79k
      return(ret);
5853
3.79k
  }
5854
105k
  tmp = ret;
5855
160k
  while (tmp != NULL) {
5856
57.1k
      if (xmlStrEqual(name, tmp->name)) {
5857
2.01k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5858
2.01k
    "standalone: attribute enumeration value token %s duplicated\n",
5859
2.01k
         name, NULL);
5860
2.01k
    if (!xmlDictOwns(ctxt->dict, name))
5861
2.01k
        xmlFree(name);
5862
2.01k
    break;
5863
2.01k
      }
5864
55.1k
      tmp = tmp->next;
5865
55.1k
  }
5866
105k
  if (tmp == NULL) {
5867
103k
      cur = xmlCreateEnumeration(name);
5868
103k
      if (!xmlDictOwns(ctxt->dict, name))
5869
103k
    xmlFree(name);
5870
103k
      if (cur == NULL) {
5871
0
                xmlFreeEnumeration(ret);
5872
0
                return(NULL);
5873
0
            }
5874
103k
      if (last == NULL) ret = last = cur;
5875
34.3k
      else {
5876
34.3k
    last->next = cur;
5877
34.3k
    last = cur;
5878
34.3k
      }
5879
103k
  }
5880
105k
  SKIP_BLANKS;
5881
105k
    } while (RAW == '|');
5882
65.6k
    if (RAW != ')') {
5883
8.80k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5884
8.80k
  return(ret);
5885
8.80k
    }
5886
56.8k
    NEXT;
5887
56.8k
    return(ret);
5888
65.6k
}
5889
5890
/**
5891
 * xmlParseEnumeratedType:
5892
 * @ctxt:  an XML parser context
5893
 * @tree:  the enumeration tree built while parsing
5894
 *
5895
 * DEPRECATED: Internal function, don't use.
5896
 *
5897
 * parse an Enumerated attribute type.
5898
 *
5899
 * [57] EnumeratedType ::= NotationType | Enumeration
5900
 *
5901
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5902
 *
5903
 *
5904
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5905
 */
5906
5907
int
5908
144k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5909
144k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5910
39.2k
  SKIP(8);
5911
39.2k
  if (SKIP_BLANKS == 0) {
5912
522
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5913
522
         "Space required after 'NOTATION'\n");
5914
522
      return(0);
5915
522
  }
5916
38.7k
  *tree = xmlParseNotationType(ctxt);
5917
38.7k
  if (*tree == NULL) return(0);
5918
31.3k
  return(XML_ATTRIBUTE_NOTATION);
5919
38.7k
    }
5920
105k
    *tree = xmlParseEnumerationType(ctxt);
5921
105k
    if (*tree == NULL) return(0);
5922
69.1k
    return(XML_ATTRIBUTE_ENUMERATION);
5923
105k
}
5924
5925
/**
5926
 * xmlParseAttributeType:
5927
 * @ctxt:  an XML parser context
5928
 * @tree:  the enumeration tree built while parsing
5929
 *
5930
 * DEPRECATED: Internal function, don't use.
5931
 *
5932
 * parse the Attribute list def for an element
5933
 *
5934
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5935
 *
5936
 * [55] StringType ::= 'CDATA'
5937
 *
5938
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5939
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5940
 *
5941
 * Validity constraints for attribute values syntax are checked in
5942
 * xmlValidateAttributeValue()
5943
 *
5944
 * [ VC: ID ]
5945
 * Values of type ID must match the Name production. A name must not
5946
 * appear more than once in an XML document as a value of this type;
5947
 * i.e., ID values must uniquely identify the elements which bear them.
5948
 *
5949
 * [ VC: One ID per Element Type ]
5950
 * No element type may have more than one ID attribute specified.
5951
 *
5952
 * [ VC: ID Attribute Default ]
5953
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5954
 *
5955
 * [ VC: IDREF ]
5956
 * Values of type IDREF must match the Name production, and values
5957
 * of type IDREFS must match Names; each IDREF Name must match the value
5958
 * of an ID attribute on some element in the XML document; i.e. IDREF
5959
 * values must match the value of some ID attribute.
5960
 *
5961
 * [ VC: Entity Name ]
5962
 * Values of type ENTITY must match the Name production, values
5963
 * of type ENTITIES must match Names; each Entity Name must match the
5964
 * name of an unparsed entity declared in the DTD.
5965
 *
5966
 * [ VC: Name Token ]
5967
 * Values of type NMTOKEN must match the Nmtoken production; values
5968
 * of type NMTOKENS must match Nmtokens.
5969
 *
5970
 * Returns the attribute type
5971
 */
5972
int
5973
601k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5974
601k
    SHRINK;
5975
601k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5976
171k
  SKIP(5);
5977
171k
  return(XML_ATTRIBUTE_CDATA);
5978
429k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5979
15.5k
  SKIP(6);
5980
15.5k
  return(XML_ATTRIBUTE_IDREFS);
5981
414k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5982
34.8k
  SKIP(5);
5983
34.8k
  return(XML_ATTRIBUTE_IDREF);
5984
379k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5985
62.4k
        SKIP(2);
5986
62.4k
  return(XML_ATTRIBUTE_ID);
5987
317k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5988
1.96k
  SKIP(6);
5989
1.96k
  return(XML_ATTRIBUTE_ENTITY);
5990
315k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5991
30.2k
  SKIP(8);
5992
30.2k
  return(XML_ATTRIBUTE_ENTITIES);
5993
284k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5994
35.2k
  SKIP(8);
5995
35.2k
  return(XML_ATTRIBUTE_NMTOKENS);
5996
249k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5997
104k
  SKIP(7);
5998
104k
  return(XML_ATTRIBUTE_NMTOKEN);
5999
104k
     }
6000
144k
     return(xmlParseEnumeratedType(ctxt, tree));
6001
601k
}
6002
6003
/**
6004
 * xmlParseAttributeListDecl:
6005
 * @ctxt:  an XML parser context
6006
 *
6007
 * DEPRECATED: Internal function, don't use.
6008
 *
6009
 * : parse the Attribute list def for an element
6010
 *
6011
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6012
 *
6013
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6014
 *
6015
 */
6016
void
6017
566k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6018
566k
    const xmlChar *elemName;
6019
566k
    const xmlChar *attrName;
6020
566k
    xmlEnumerationPtr tree;
6021
6022
566k
    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6023
558k
  int inputid = ctxt->input->id;
6024
6025
558k
  SKIP(9);
6026
558k
  if (SKIP_BLANKS == 0) {
6027
6.52k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6028
6.52k
                     "Space required after '<!ATTLIST'\n");
6029
6.52k
  }
6030
558k
        elemName = xmlParseName(ctxt);
6031
558k
  if (elemName == NULL) {
6032
6.60k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6033
6.60k
         "ATTLIST: no name for Element\n");
6034
6.60k
      return;
6035
6.60k
  }
6036
552k
  SKIP_BLANKS;
6037
552k
  GROW;
6038
1.04M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6039
660k
      int type;
6040
660k
      int def;
6041
660k
      xmlChar *defaultValue = NULL;
6042
6043
660k
      GROW;
6044
660k
            tree = NULL;
6045
660k
      attrName = xmlParseName(ctxt);
6046
660k
      if (attrName == NULL) {
6047
50.9k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6048
50.9k
             "ATTLIST: no name for Attribute\n");
6049
50.9k
    break;
6050
50.9k
      }
6051
609k
      GROW;
6052
609k
      if (SKIP_BLANKS == 0) {
6053
8.33k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6054
8.33k
            "Space required after the attribute name\n");
6055
8.33k
    break;
6056
8.33k
      }
6057
6058
601k
      type = xmlParseAttributeType(ctxt, &tree);
6059
601k
      if (type <= 0) {
6060
44.1k
          break;
6061
44.1k
      }
6062
6063
557k
      GROW;
6064
557k
      if (SKIP_BLANKS == 0) {
6065
15.7k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6066
15.7k
             "Space required after the attribute type\n");
6067
15.7k
          if (tree != NULL)
6068
12.6k
        xmlFreeEnumeration(tree);
6069
15.7k
    break;
6070
15.7k
      }
6071
6072
541k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6073
541k
      if (def <= 0) {
6074
0
                if (defaultValue != NULL)
6075
0
        xmlFree(defaultValue);
6076
0
          if (tree != NULL)
6077
0
        xmlFreeEnumeration(tree);
6078
0
          break;
6079
0
      }
6080
541k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6081
86.3k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6082
6083
541k
      GROW;
6084
541k
            if (RAW != '>') {
6085
194k
    if (SKIP_BLANKS == 0) {
6086
53.2k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6087
53.2k
      "Space required after the attribute default value\n");
6088
53.2k
        if (defaultValue != NULL)
6089
4.50k
      xmlFree(defaultValue);
6090
53.2k
        if (tree != NULL)
6091
22.1k
      xmlFreeEnumeration(tree);
6092
53.2k
        break;
6093
53.2k
    }
6094
194k
      }
6095
488k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6096
488k
    (ctxt->sax->attributeDecl != NULL))
6097
426k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6098
426k
                          type, def, defaultValue, tree);
6099
61.6k
      else if (tree != NULL)
6100
8.26k
    xmlFreeEnumeration(tree);
6101
6102
488k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6103
488k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6104
488k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6105
67.1k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6106
67.1k
      }
6107
488k
      if (ctxt->sax2) {
6108
285k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6109
285k
      }
6110
488k
      if (defaultValue != NULL)
6111
116k
          xmlFree(defaultValue);
6112
488k
      GROW;
6113
488k
  }
6114
552k
  if (RAW == '>') {
6115
385k
      if (inputid != ctxt->input->id) {
6116
212
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6117
212
                               "Attribute list declaration doesn't start and"
6118
212
                               " stop in the same entity\n");
6119
212
      }
6120
385k
      NEXT;
6121
385k
  }
6122
552k
    }
6123
566k
}
6124
6125
/**
6126
 * xmlParseElementMixedContentDecl:
6127
 * @ctxt:  an XML parser context
6128
 * @inputchk:  the input used for the current entity, needed for boundary checks
6129
 *
6130
 * DEPRECATED: Internal function, don't use.
6131
 *
6132
 * parse the declaration for a Mixed Element content
6133
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6134
 *
6135
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6136
 *                '(' S? '#PCDATA' S? ')'
6137
 *
6138
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6139
 *
6140
 * [ VC: No Duplicate Types ]
6141
 * The same name must not appear more than once in a single
6142
 * mixed-content declaration.
6143
 *
6144
 * returns: the list of the xmlElementContentPtr describing the element choices
6145
 */
6146
xmlElementContentPtr
6147
142k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6148
142k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6149
142k
    const xmlChar *elem = NULL;
6150
6151
142k
    GROW;
6152
142k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6153
142k
  SKIP(7);
6154
142k
  SKIP_BLANKS;
6155
142k
  SHRINK;
6156
142k
  if (RAW == ')') {
6157
125k
      if (ctxt->input->id != inputchk) {
6158
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159
0
                               "Element content declaration doesn't start and"
6160
0
                               " stop in the same entity\n");
6161
0
      }
6162
125k
      NEXT;
6163
125k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6164
125k
      if (ret == NULL)
6165
0
          return(NULL);
6166
125k
      if (RAW == '*') {
6167
379
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168
379
    NEXT;
6169
379
      }
6170
125k
      return(ret);
6171
125k
  }
6172
17.0k
  if ((RAW == '(') || (RAW == '|')) {
6173
15.1k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6174
15.1k
      if (ret == NULL) return(NULL);
6175
15.1k
  }
6176
62.3k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6177
46.4k
      NEXT;
6178
46.4k
      if (elem == NULL) {
6179
14.9k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6180
14.9k
    if (ret == NULL) {
6181
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6182
0
                    return(NULL);
6183
0
                }
6184
14.9k
    ret->c1 = cur;
6185
14.9k
    if (cur != NULL)
6186
14.9k
        cur->parent = ret;
6187
14.9k
    cur = ret;
6188
31.4k
      } else {
6189
31.4k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6190
31.4k
    if (n == NULL) {
6191
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6192
0
                    return(NULL);
6193
0
                }
6194
31.4k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6195
31.4k
    if (n->c1 != NULL)
6196
31.4k
        n->c1->parent = n;
6197
31.4k
          cur->c2 = n;
6198
31.4k
    if (n != NULL)
6199
31.4k
        n->parent = cur;
6200
31.4k
    cur = n;
6201
31.4k
      }
6202
46.4k
      SKIP_BLANKS;
6203
46.4k
      elem = xmlParseName(ctxt);
6204
46.4k
      if (elem == NULL) {
6205
1.13k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6206
1.13k
      "xmlParseElementMixedContentDecl : Name expected\n");
6207
1.13k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6208
1.13k
    return(NULL);
6209
1.13k
      }
6210
45.2k
      SKIP_BLANKS;
6211
45.2k
      GROW;
6212
45.2k
  }
6213
15.9k
  if ((RAW == ')') && (NXT(1) == '*')) {
6214
12.8k
      if (elem != NULL) {
6215
12.8k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6216
12.8k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6217
12.8k
    if (cur->c2 != NULL)
6218
12.8k
        cur->c2->parent = cur;
6219
12.8k
            }
6220
12.8k
            if (ret != NULL)
6221
12.8k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6222
12.8k
      if (ctxt->input->id != inputchk) {
6223
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6224
0
                               "Element content declaration doesn't start and"
6225
0
                               " stop in the same entity\n");
6226
0
      }
6227
12.8k
      SKIP(2);
6228
12.8k
  } else {
6229
3.04k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6230
3.04k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6231
3.04k
      return(NULL);
6232
3.04k
  }
6233
6234
15.9k
    } else {
6235
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6236
0
    }
6237
12.8k
    return(ret);
6238
142k
}
6239
6240
/**
6241
 * xmlParseElementChildrenContentDeclPriv:
6242
 * @ctxt:  an XML parser context
6243
 * @inputchk:  the input used for the current entity, needed for boundary checks
6244
 * @depth: the level of recursion
6245
 *
6246
 * parse the declaration for a Mixed Element content
6247
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6248
 *
6249
 *
6250
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6251
 *
6252
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6253
 *
6254
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6255
 *
6256
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6257
 *
6258
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6259
 * TODO Parameter-entity replacement text must be properly nested
6260
 *  with parenthesized groups. That is to say, if either of the
6261
 *  opening or closing parentheses in a choice, seq, or Mixed
6262
 *  construct is contained in the replacement text for a parameter
6263
 *  entity, both must be contained in the same replacement text. For
6264
 *  interoperability, if a parameter-entity reference appears in a
6265
 *  choice, seq, or Mixed construct, its replacement text should not
6266
 *  be empty, and neither the first nor last non-blank character of
6267
 *  the replacement text should be a connector (| or ,).
6268
 *
6269
 * Returns the tree of xmlElementContentPtr describing the element
6270
 *          hierarchy.
6271
 */
6272
static xmlElementContentPtr
6273
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6274
320k
                                       int depth) {
6275
320k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6276
320k
    const xmlChar *elem;
6277
320k
    xmlChar type = 0;
6278
6279
320k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6280
320k
        (depth >  2048)) {
6281
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6282
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6283
0
                          depth);
6284
0
  return(NULL);
6285
0
    }
6286
320k
    SKIP_BLANKS;
6287
320k
    GROW;
6288
320k
    if (RAW == '(') {
6289
27.8k
  int inputid = ctxt->input->id;
6290
6291
        /* Recurse on first child */
6292
27.8k
  NEXT;
6293
27.8k
  SKIP_BLANKS;
6294
27.8k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6295
27.8k
                                                           depth + 1);
6296
27.8k
        if (cur == NULL)
6297
5.26k
            return(NULL);
6298
22.5k
  SKIP_BLANKS;
6299
22.5k
  GROW;
6300
293k
    } else {
6301
293k
  elem = xmlParseName(ctxt);
6302
293k
  if (elem == NULL) {
6303
4.53k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6304
4.53k
      return(NULL);
6305
4.53k
  }
6306
288k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6307
288k
  if (cur == NULL) {
6308
0
      xmlErrMemory(ctxt, NULL);
6309
0
      return(NULL);
6310
0
  }
6311
288k
  GROW;
6312
288k
  if (RAW == '?') {
6313
4.84k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6314
4.84k
      NEXT;
6315
283k
  } else if (RAW == '*') {
6316
39.1k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6317
39.1k
      NEXT;
6318
244k
  } else if (RAW == '+') {
6319
16.1k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6320
16.1k
      NEXT;
6321
228k
  } else {
6322
228k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6323
228k
  }
6324
288k
  GROW;
6325
288k
    }
6326
311k
    SKIP_BLANKS;
6327
311k
    SHRINK;
6328
635k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6329
        /*
6330
   * Each loop we parse one separator and one element.
6331
   */
6332
367k
        if (RAW == ',') {
6333
134k
      if (type == 0) type = CUR;
6334
6335
      /*
6336
       * Detect "Name | Name , Name" error
6337
       */
6338
63.2k
      else if (type != CUR) {
6339
290
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6340
290
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6341
290
                      type);
6342
290
    if ((last != NULL) && (last != ret))
6343
290
        xmlFreeDocElementContent(ctxt->myDoc, last);
6344
290
    if (ret != NULL)
6345
290
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6346
290
    return(NULL);
6347
290
      }
6348
134k
      NEXT;
6349
6350
134k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6351
134k
      if (op == NULL) {
6352
0
    if ((last != NULL) && (last != ret))
6353
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6354
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6355
0
    return(NULL);
6356
0
      }
6357
134k
      if (last == NULL) {
6358
71.5k
    op->c1 = ret;
6359
71.5k
    if (ret != NULL)
6360
71.5k
        ret->parent = op;
6361
71.5k
    ret = cur = op;
6362
71.5k
      } else {
6363
62.9k
          cur->c2 = op;
6364
62.9k
    if (op != NULL)
6365
62.9k
        op->parent = cur;
6366
62.9k
    op->c1 = last;
6367
62.9k
    if (last != NULL)
6368
62.9k
        last->parent = op;
6369
62.9k
    cur =op;
6370
62.9k
    last = NULL;
6371
62.9k
      }
6372
232k
  } else if (RAW == '|') {
6373
209k
      if (type == 0) type = CUR;
6374
6375
      /*
6376
       * Detect "Name , Name | Name" error
6377
       */
6378
145k
      else if (type != CUR) {
6379
286
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6380
286
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6381
286
          type);
6382
286
    if ((last != NULL) && (last != ret))
6383
286
        xmlFreeDocElementContent(ctxt->myDoc, last);
6384
286
    if (ret != NULL)
6385
286
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6386
286
    return(NULL);
6387
286
      }
6388
208k
      NEXT;
6389
6390
208k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6391
208k
      if (op == NULL) {
6392
0
    if ((last != NULL) && (last != ret))
6393
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6394
0
    if (ret != NULL)
6395
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6396
0
    return(NULL);
6397
0
      }
6398
208k
      if (last == NULL) {
6399
63.8k
    op->c1 = ret;
6400
63.8k
    if (ret != NULL)
6401
63.8k
        ret->parent = op;
6402
63.8k
    ret = cur = op;
6403
145k
      } else {
6404
145k
          cur->c2 = op;
6405
145k
    if (op != NULL)
6406
145k
        op->parent = cur;
6407
145k
    op->c1 = last;
6408
145k
    if (last != NULL)
6409
145k
        last->parent = op;
6410
145k
    cur =op;
6411
145k
    last = NULL;
6412
145k
      }
6413
208k
  } else {
6414
23.6k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6415
23.6k
      if ((last != NULL) && (last != ret))
6416
3.10k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6417
23.6k
      if (ret != NULL)
6418
23.6k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
23.6k
      return(NULL);
6420
23.6k
  }
6421
343k
  GROW;
6422
343k
  SKIP_BLANKS;
6423
343k
  GROW;
6424
343k
  if (RAW == '(') {
6425
29.1k
      int inputid = ctxt->input->id;
6426
      /* Recurse on second child */
6427
29.1k
      NEXT;
6428
29.1k
      SKIP_BLANKS;
6429
29.1k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6430
29.1k
                                                          depth + 1);
6431
29.1k
            if (last == NULL) {
6432
2.66k
    if (ret != NULL)
6433
2.66k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6434
2.66k
    return(NULL);
6435
2.66k
            }
6436
26.4k
      SKIP_BLANKS;
6437
314k
  } else {
6438
314k
      elem = xmlParseName(ctxt);
6439
314k
      if (elem == NULL) {
6440
16.2k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6441
16.2k
    if (ret != NULL)
6442
16.2k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
16.2k
    return(NULL);
6444
16.2k
      }
6445
298k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6446
298k
      if (last == NULL) {
6447
0
    if (ret != NULL)
6448
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6449
0
    return(NULL);
6450
0
      }
6451
298k
      if (RAW == '?') {
6452
76.9k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6453
76.9k
    NEXT;
6454
221k
      } else if (RAW == '*') {
6455
7.58k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6456
7.58k
    NEXT;
6457
213k
      } else if (RAW == '+') {
6458
10.2k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6459
10.2k
    NEXT;
6460
203k
      } else {
6461
203k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6462
203k
      }
6463
298k
  }
6464
324k
  SKIP_BLANKS;
6465
324k
  GROW;
6466
324k
    }
6467
267k
    if ((cur != NULL) && (last != NULL)) {
6468
112k
        cur->c2 = last;
6469
112k
  if (last != NULL)
6470
112k
      last->parent = cur;
6471
112k
    }
6472
267k
    if (ctxt->input->id != inputchk) {
6473
3
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6474
3
                       "Element content declaration doesn't start and stop in"
6475
3
                       " the same entity\n");
6476
3
    }
6477
267k
    NEXT;
6478
267k
    if (RAW == '?') {
6479
7.85k
  if (ret != NULL) {
6480
7.85k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6481
7.85k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6482
61
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6483
7.79k
      else
6484
7.79k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6485
7.85k
  }
6486
7.85k
  NEXT;
6487
260k
    } else if (RAW == '*') {
6488
113k
  if (ret != NULL) {
6489
113k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6490
113k
      cur = ret;
6491
      /*
6492
       * Some normalization:
6493
       * (a | b* | c?)* == (a | b | c)*
6494
       */
6495
294k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6496
181k
    if ((cur->c1 != NULL) &&
6497
181k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6498
181k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6499
33.3k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6500
181k
    if ((cur->c2 != NULL) &&
6501
181k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6502
181k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6503
5.34k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6504
181k
    cur = cur->c2;
6505
181k
      }
6506
113k
  }
6507
113k
  NEXT;
6508
146k
    } else if (RAW == '+') {
6509
10.0k
  if (ret != NULL) {
6510
10.0k
      int found = 0;
6511
6512
10.0k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6513
10.0k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6514
90
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6515
9.92k
      else
6516
9.92k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6517
      /*
6518
       * Some normalization:
6519
       * (a | b*)+ == (a | b)*
6520
       * (a | b?)+ == (a | b)*
6521
       */
6522
14.2k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6523
4.19k
    if ((cur->c1 != NULL) &&
6524
4.19k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6525
4.19k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6526
839
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6527
839
        found = 1;
6528
839
    }
6529
4.19k
    if ((cur->c2 != NULL) &&
6530
4.19k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6531
4.19k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6532
351
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6533
351
        found = 1;
6534
351
    }
6535
4.19k
    cur = cur->c2;
6536
4.19k
      }
6537
10.0k
      if (found)
6538
1.00k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6539
10.0k
  }
6540
10.0k
  NEXT;
6541
10.0k
    }
6542
267k
    return(ret);
6543
311k
}
6544
6545
/**
6546
 * xmlParseElementChildrenContentDecl:
6547
 * @ctxt:  an XML parser context
6548
 * @inputchk:  the input used for the current entity, needed for boundary checks
6549
 *
6550
 * DEPRECATED: Internal function, don't use.
6551
 *
6552
 * parse the declaration for a Mixed Element content
6553
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6554
 *
6555
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6556
 *
6557
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6558
 *
6559
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6560
 *
6561
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6562
 *
6563
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6564
 * TODO Parameter-entity replacement text must be properly nested
6565
 *  with parenthesized groups. That is to say, if either of the
6566
 *  opening or closing parentheses in a choice, seq, or Mixed
6567
 *  construct is contained in the replacement text for a parameter
6568
 *  entity, both must be contained in the same replacement text. For
6569
 *  interoperability, if a parameter-entity reference appears in a
6570
 *  choice, seq, or Mixed construct, its replacement text should not
6571
 *  be empty, and neither the first nor last non-blank character of
6572
 *  the replacement text should be a connector (| or ,).
6573
 *
6574
 * Returns the tree of xmlElementContentPtr describing the element
6575
 *          hierarchy.
6576
 */
6577
xmlElementContentPtr
6578
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6579
    /* stub left for API/ABI compat */
6580
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6581
0
}
6582
6583
/**
6584
 * xmlParseElementContentDecl:
6585
 * @ctxt:  an XML parser context
6586
 * @name:  the name of the element being defined.
6587
 * @result:  the Element Content pointer will be stored here if any
6588
 *
6589
 * DEPRECATED: Internal function, don't use.
6590
 *
6591
 * parse the declaration for an Element content either Mixed or Children,
6592
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6593
 *
6594
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6595
 *
6596
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6597
 */
6598
6599
int
6600
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6601
406k
                           xmlElementContentPtr *result) {
6602
6603
406k
    xmlElementContentPtr tree = NULL;
6604
406k
    int inputid = ctxt->input->id;
6605
406k
    int res;
6606
6607
406k
    *result = NULL;
6608
6609
406k
    if (RAW != '(') {
6610
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6611
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6612
0
  return(-1);
6613
0
    }
6614
406k
    NEXT;
6615
406k
    GROW;
6616
406k
    if (ctxt->instate == XML_PARSER_EOF)
6617
0
        return(-1);
6618
406k
    SKIP_BLANKS;
6619
406k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6620
142k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6621
142k
  res = XML_ELEMENT_TYPE_MIXED;
6622
263k
    } else {
6623
263k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6624
263k
  res = XML_ELEMENT_TYPE_ELEMENT;
6625
263k
    }
6626
406k
    SKIP_BLANKS;
6627
406k
    *result = tree;
6628
406k
    return(res);
6629
406k
}
6630
6631
/**
6632
 * xmlParseElementDecl:
6633
 * @ctxt:  an XML parser context
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse an Element declaration.
6638
 *
6639
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6640
 *
6641
 * [ VC: Unique Element Type Declaration ]
6642
 * No element type may be declared more than once
6643
 *
6644
 * Returns the type of the element, or -1 in case of error
6645
 */
6646
int
6647
629k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6648
629k
    const xmlChar *name;
6649
629k
    int ret = -1;
6650
629k
    xmlElementContentPtr content  = NULL;
6651
6652
    /* GROW; done in the caller */
6653
629k
    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6654
623k
  int inputid = ctxt->input->id;
6655
6656
623k
  SKIP(9);
6657
623k
  if (SKIP_BLANKS == 0) {
6658
1.92k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6659
1.92k
               "Space required after 'ELEMENT'\n");
6660
1.92k
      return(-1);
6661
1.92k
  }
6662
621k
        name = xmlParseName(ctxt);
6663
621k
  if (name == NULL) {
6664
2.62k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6665
2.62k
         "xmlParseElementDecl: no name for Element\n");
6666
2.62k
      return(-1);
6667
2.62k
  }
6668
618k
  if (SKIP_BLANKS == 0) {
6669
7.94k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6670
7.94k
         "Space required after the element name\n");
6671
7.94k
  }
6672
618k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6673
162k
      SKIP(5);
6674
      /*
6675
       * Element must always be empty.
6676
       */
6677
162k
      ret = XML_ELEMENT_TYPE_EMPTY;
6678
456k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6679
456k
             (NXT(2) == 'Y')) {
6680
3.35k
      SKIP(3);
6681
      /*
6682
       * Element is a generic container.
6683
       */
6684
3.35k
      ret = XML_ELEMENT_TYPE_ANY;
6685
452k
  } else if (RAW == '(') {
6686
406k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6687
406k
  } else {
6688
      /*
6689
       * [ WFC: PEs in Internal Subset ] error handling.
6690
       */
6691
46.7k
      if ((RAW == '%') && (ctxt->external == 0) &&
6692
46.7k
          (ctxt->inputNr == 1)) {
6693
398
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6694
398
    "PEReference: forbidden within markup decl in internal subset\n");
6695
46.3k
      } else {
6696
46.3k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6697
46.3k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6698
46.3k
            }
6699
46.7k
      return(-1);
6700
46.7k
  }
6701
6702
571k
  SKIP_BLANKS;
6703
6704
571k
  if (RAW != '>') {
6705
37.0k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6706
37.0k
      if (content != NULL) {
6707
4.74k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6708
4.74k
      }
6709
534k
  } else {
6710
534k
      if (inputid != ctxt->input->id) {
6711
45
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
45
                               "Element declaration doesn't start and stop in"
6713
45
                               " the same entity\n");
6714
45
      }
6715
6716
534k
      NEXT;
6717
534k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6718
534k
    (ctxt->sax->elementDecl != NULL)) {
6719
481k
    if (content != NULL)
6720
314k
        content->parent = NULL;
6721
481k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6722
481k
                           content);
6723
481k
    if ((content != NULL) && (content->parent == NULL)) {
6724
        /*
6725
         * this is a trick: if xmlAddElementDecl is called,
6726
         * instead of copying the full tree it is plugged directly
6727
         * if called from the parser. Avoid duplicating the
6728
         * interfaces or change the API/ABI
6729
         */
6730
26.2k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6731
26.2k
    }
6732
481k
      } else if (content != NULL) {
6733
38.2k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6734
38.2k
      }
6735
534k
  }
6736
571k
    }
6737
578k
    return(ret);
6738
629k
}
6739
6740
/**
6741
 * xmlParseConditionalSections
6742
 * @ctxt:  an XML parser context
6743
 *
6744
 * [61] conditionalSect ::= includeSect | ignoreSect
6745
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6746
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6747
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6748
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6749
 */
6750
6751
static void
6752
8.11k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6753
8.11k
    int *inputIds = NULL;
6754
8.11k
    size_t inputIdsSize = 0;
6755
8.11k
    size_t depth = 0;
6756
6757
51.7k
    while (ctxt->instate != XML_PARSER_EOF) {
6758
51.5k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6759
24.8k
            int id = ctxt->input->id;
6760
6761
24.8k
            SKIP(3);
6762
24.8k
            SKIP_BLANKS;
6763
6764
24.8k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6765
20.3k
                SKIP(7);
6766
20.3k
                SKIP_BLANKS;
6767
20.3k
                if (RAW != '[') {
6768
83
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6769
83
                    xmlHaltParser(ctxt);
6770
83
                    goto error;
6771
83
                }
6772
20.2k
                if (ctxt->input->id != id) {
6773
48
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6774
48
                                   "All markup of the conditional section is"
6775
48
                                   " not in the same entity\n");
6776
48
                }
6777
20.2k
                NEXT;
6778
6779
20.2k
                if (inputIdsSize <= depth) {
6780
5.77k
                    int *tmp;
6781
6782
5.77k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6783
5.77k
                    tmp = (int *) xmlRealloc(inputIds,
6784
5.77k
                            inputIdsSize * sizeof(int));
6785
5.77k
                    if (tmp == NULL) {
6786
0
                        xmlErrMemory(ctxt, NULL);
6787
0
                        goto error;
6788
0
                    }
6789
5.77k
                    inputIds = tmp;
6790
5.77k
                }
6791
20.2k
                inputIds[depth] = id;
6792
20.2k
                depth++;
6793
20.2k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6794
3.45k
                int state;
6795
3.45k
                xmlParserInputState instate;
6796
3.45k
                size_t ignoreDepth = 0;
6797
6798
3.45k
                SKIP(6);
6799
3.45k
                SKIP_BLANKS;
6800
3.45k
                if (RAW != '[') {
6801
86
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6802
86
                    xmlHaltParser(ctxt);
6803
86
                    goto error;
6804
86
                }
6805
3.37k
                if (ctxt->input->id != id) {
6806
12
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6807
12
                                   "All markup of the conditional section is"
6808
12
                                   " not in the same entity\n");
6809
12
                }
6810
3.37k
                NEXT;
6811
6812
                /*
6813
                 * Parse up to the end of the conditional section but disable
6814
                 * SAX event generating DTD building in the meantime
6815
                 */
6816
3.37k
                state = ctxt->disableSAX;
6817
3.37k
                instate = ctxt->instate;
6818
3.37k
                if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6819
3.37k
                ctxt->instate = XML_PARSER_IGNORE;
6820
6821
1.72M
                while (RAW != 0) {
6822
1.72M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6823
8.48k
                        SKIP(3);
6824
8.48k
                        ignoreDepth++;
6825
                        /* Check for integer overflow */
6826
8.48k
                        if (ignoreDepth == 0) {
6827
0
                            xmlErrMemory(ctxt, NULL);
6828
0
                            goto error;
6829
0
                        }
6830
1.71M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6831
1.71M
                               (NXT(2) == '>')) {
6832
7.42k
                        if (ignoreDepth == 0)
6833
1.83k
                            break;
6834
5.58k
                        SKIP(3);
6835
5.58k
                        ignoreDepth--;
6836
1.71M
                    } else {
6837
1.71M
                        NEXT;
6838
1.71M
                    }
6839
1.72M
                }
6840
6841
3.37k
                ctxt->disableSAX = state;
6842
3.37k
                ctxt->instate = instate;
6843
6844
3.37k
    if (RAW == 0) {
6845
1.53k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6846
1.53k
                    goto error;
6847
1.53k
    }
6848
1.83k
                if (ctxt->input->id != id) {
6849
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6850
0
                                   "All markup of the conditional section is"
6851
0
                                   " not in the same entity\n");
6852
0
                }
6853
1.83k
                SKIP(3);
6854
1.83k
            } else {
6855
1.06k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6856
1.06k
                xmlHaltParser(ctxt);
6857
1.06k
                goto error;
6858
1.06k
            }
6859
26.7k
        } else if ((depth > 0) &&
6860
26.7k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6861
10.8k
            depth--;
6862
10.8k
            if (ctxt->input->id != inputIds[depth]) {
6863
270
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6864
270
                               "All markup of the conditional section is not"
6865
270
                               " in the same entity\n");
6866
270
            }
6867
10.8k
            SKIP(3);
6868
15.8k
        } else {
6869
15.8k
            int id = ctxt->input->id;
6870
15.8k
            unsigned long cons = CUR_CONSUMED;
6871
6872
15.8k
            xmlParseMarkupDecl(ctxt);
6873
6874
15.8k
            if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6875
1.80k
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6876
1.80k
                xmlHaltParser(ctxt);
6877
1.80k
                goto error;
6878
1.80k
            }
6879
15.8k
        }
6880
6881
46.9k
        if (depth == 0)
6882
3.35k
            break;
6883
6884
43.6k
        SKIP_BLANKS;
6885
43.6k
        GROW;
6886
43.6k
    }
6887
6888
8.11k
error:
6889
8.11k
    xmlFree(inputIds);
6890
8.11k
}
6891
6892
/**
6893
 * xmlParseMarkupDecl:
6894
 * @ctxt:  an XML parser context
6895
 *
6896
 * DEPRECATED: Internal function, don't use.
6897
 *
6898
 * parse Markup declarations
6899
 *
6900
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6901
 *                     NotationDecl | PI | Comment
6902
 *
6903
 * [ VC: Proper Declaration/PE Nesting ]
6904
 * Parameter-entity replacement text must be properly nested with
6905
 * markup declarations. That is to say, if either the first character
6906
 * or the last character of a markup declaration (markupdecl above) is
6907
 * contained in the replacement text for a parameter-entity reference,
6908
 * both must be contained in the same replacement text.
6909
 *
6910
 * [ WFC: PEs in Internal Subset ]
6911
 * In the internal DTD subset, parameter-entity references can occur
6912
 * only where markup declarations can occur, not within markup declarations.
6913
 * (This does not apply to references that occur in external parameter
6914
 * entities or to the external subset.)
6915
 */
6916
void
6917
2.30M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6918
2.30M
    GROW;
6919
2.30M
    if (CUR == '<') {
6920
2.00M
        if (NXT(1) == '!') {
6921
1.88M
      switch (NXT(2)) {
6922
1.12M
          case 'E':
6923
1.12M
        if (NXT(3) == 'L')
6924
629k
      xmlParseElementDecl(ctxt);
6925
499k
        else if (NXT(3) == 'N')
6926
497k
      xmlParseEntityDecl(ctxt);
6927
1.12M
        break;
6928
566k
          case 'A':
6929
566k
        xmlParseAttributeListDecl(ctxt);
6930
566k
        break;
6931
14.0k
          case 'N':
6932
14.0k
        xmlParseNotationDecl(ctxt);
6933
14.0k
        break;
6934
174k
          case '-':
6935
174k
        xmlParseComment(ctxt);
6936
174k
        break;
6937
4.02k
    default:
6938
        /* there is an error but it will be detected later */
6939
4.02k
        break;
6940
1.88M
      }
6941
1.88M
  } else if (NXT(1) == '?') {
6942
107k
      xmlParsePI(ctxt);
6943
107k
  }
6944
2.00M
    }
6945
6946
    /*
6947
     * detect requirement to exit there and act accordingly
6948
     * and avoid having instate overridden later on
6949
     */
6950
2.30M
    if (ctxt->instate == XML_PARSER_EOF)
6951
14.5k
        return;
6952
6953
2.29M
    ctxt->instate = XML_PARSER_DTD;
6954
2.29M
}
6955
6956
/**
6957
 * xmlParseTextDecl:
6958
 * @ctxt:  an XML parser context
6959
 *
6960
 * DEPRECATED: Internal function, don't use.
6961
 *
6962
 * parse an XML declaration header for external entities
6963
 *
6964
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6965
 */
6966
6967
void
6968
12.4k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6969
12.4k
    xmlChar *version;
6970
12.4k
    const xmlChar *encoding;
6971
12.4k
    int oldstate;
6972
6973
    /*
6974
     * We know that '<?xml' is here.
6975
     */
6976
12.4k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6977
12.3k
  SKIP(5);
6978
12.3k
    } else {
6979
128
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6980
128
  return;
6981
128
    }
6982
6983
    /* Avoid expansion of parameter entities when skipping blanks. */
6984
12.3k
    oldstate = ctxt->instate;
6985
12.3k
    ctxt->instate = XML_PARSER_START;
6986
6987
12.3k
    if (SKIP_BLANKS == 0) {
6988
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6989
0
           "Space needed after '<?xml'\n");
6990
0
    }
6991
6992
    /*
6993
     * We may have the VersionInfo here.
6994
     */
6995
12.3k
    version = xmlParseVersionInfo(ctxt);
6996
12.3k
    if (version == NULL)
6997
761
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6998
11.6k
    else {
6999
11.6k
  if (SKIP_BLANKS == 0) {
7000
402
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7001
402
               "Space needed here\n");
7002
402
  }
7003
11.6k
    }
7004
12.3k
    ctxt->input->version = version;
7005
7006
    /*
7007
     * We must have the encoding declaration
7008
     */
7009
12.3k
    encoding = xmlParseEncodingDecl(ctxt);
7010
12.3k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7011
  /*
7012
   * The XML REC instructs us to stop parsing right here
7013
   */
7014
93
        ctxt->instate = oldstate;
7015
93
        return;
7016
93
    }
7017
12.2k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7018
1.99k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7019
1.99k
           "Missing encoding in text declaration\n");
7020
1.99k
    }
7021
7022
12.2k
    SKIP_BLANKS;
7023
12.2k
    if ((RAW == '?') && (NXT(1) == '>')) {
7024
10.4k
        SKIP(2);
7025
10.4k
    } else if (RAW == '>') {
7026
        /* Deprecated old WD ... */
7027
186
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7028
186
  NEXT;
7029
1.64k
    } else {
7030
1.64k
        int c;
7031
7032
1.64k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7033
144k
        while ((c = CUR) != 0) {
7034
144k
            NEXT;
7035
144k
            if (c == '>')
7036
1.34k
                break;
7037
144k
        }
7038
1.64k
    }
7039
7040
12.2k
    ctxt->instate = oldstate;
7041
12.2k
}
7042
7043
/**
7044
 * xmlParseExternalSubset:
7045
 * @ctxt:  an XML parser context
7046
 * @ExternalID: the external identifier
7047
 * @SystemID: the system identifier (or URL)
7048
 *
7049
 * parse Markup declarations from an external subset
7050
 *
7051
 * [30] extSubset ::= textDecl? extSubsetDecl
7052
 *
7053
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7054
 */
7055
void
7056
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7057
29.2k
                       const xmlChar *SystemID) {
7058
29.2k
    xmlDetectSAX2(ctxt);
7059
29.2k
    GROW;
7060
7061
29.2k
    if ((ctxt->encoding == NULL) &&
7062
29.2k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7063
29.1k
        xmlChar start[4];
7064
29.1k
  xmlCharEncoding enc;
7065
7066
29.1k
  start[0] = RAW;
7067
29.1k
  start[1] = NXT(1);
7068
29.1k
  start[2] = NXT(2);
7069
29.1k
  start[3] = NXT(3);
7070
29.1k
  enc = xmlDetectCharEncoding(start, 4);
7071
29.1k
  if (enc != XML_CHAR_ENCODING_NONE)
7072
3.60k
      xmlSwitchEncoding(ctxt, enc);
7073
29.1k
    }
7074
7075
29.2k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7076
3.30k
  xmlParseTextDecl(ctxt);
7077
3.30k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7078
      /*
7079
       * The XML REC instructs us to stop parsing right here
7080
       */
7081
78
      xmlHaltParser(ctxt);
7082
78
      return;
7083
78
  }
7084
3.30k
    }
7085
29.1k
    if (ctxt->myDoc == NULL) {
7086
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7087
0
  if (ctxt->myDoc == NULL) {
7088
0
      xmlErrMemory(ctxt, "New Doc failed");
7089
0
      return;
7090
0
  }
7091
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7092
0
    }
7093
29.1k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7094
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7095
7096
29.1k
    ctxt->instate = XML_PARSER_DTD;
7097
29.1k
    ctxt->external = 1;
7098
29.1k
    SKIP_BLANKS;
7099
149k
    while (((RAW == '<') && (NXT(1) == '?')) ||
7100
149k
           ((RAW == '<') && (NXT(1) == '!')) ||
7101
149k
     (RAW == '%')) {
7102
123k
  int id = ctxt->input->id;
7103
123k
  unsigned long cons = CUR_CONSUMED;
7104
7105
123k
  GROW;
7106
123k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7107
8.11k
      xmlParseConditionalSections(ctxt);
7108
8.11k
  } else
7109
115k
      xmlParseMarkupDecl(ctxt);
7110
123k
        SKIP_BLANKS;
7111
7112
123k
  if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7113
3.73k
      xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7114
3.73k
      break;
7115
3.73k
  }
7116
123k
    }
7117
7118
29.1k
    if (RAW != 0) {
7119
9.04k
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7120
9.04k
    }
7121
7122
29.1k
}
7123
7124
/**
7125
 * xmlParseReference:
7126
 * @ctxt:  an XML parser context
7127
 *
7128
 * DEPRECATED: Internal function, don't use.
7129
 *
7130
 * parse and handle entity references in content, depending on the SAX
7131
 * interface, this may end-up in a call to character() if this is a
7132
 * CharRef, a predefined entity, if there is no reference() callback.
7133
 * or if the parser was asked to switch to that mode.
7134
 *
7135
 * [67] Reference ::= EntityRef | CharRef
7136
 */
7137
void
7138
5.92M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7139
5.92M
    xmlEntityPtr ent;
7140
5.92M
    xmlChar *val;
7141
5.92M
    int was_checked;
7142
5.92M
    xmlNodePtr list = NULL;
7143
5.92M
    xmlParserErrors ret = XML_ERR_OK;
7144
7145
7146
5.92M
    if (RAW != '&')
7147
0
        return;
7148
7149
    /*
7150
     * Simple case of a CharRef
7151
     */
7152
5.92M
    if (NXT(1) == '#') {
7153
1.11M
  int i = 0;
7154
1.11M
  xmlChar out[16];
7155
1.11M
  int hex = NXT(2);
7156
1.11M
  int value = xmlParseCharRef(ctxt);
7157
7158
1.11M
  if (value == 0)
7159
144k
      return;
7160
974k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7161
      /*
7162
       * So we are using non-UTF-8 buffers
7163
       * Check that the char fit on 8bits, if not
7164
       * generate a CharRef.
7165
       */
7166
589k
      if (value <= 0xFF) {
7167
569k
    out[0] = value;
7168
569k
    out[1] = 0;
7169
569k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7170
569k
        (!ctxt->disableSAX))
7171
114k
        ctxt->sax->characters(ctxt->userData, out, 1);
7172
569k
      } else {
7173
20.0k
    if ((hex == 'x') || (hex == 'X'))
7174
3.51k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7175
16.5k
    else
7176
16.5k
        snprintf((char *)out, sizeof(out), "#%d", value);
7177
20.0k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7178
20.0k
        (!ctxt->disableSAX))
7179
7.93k
        ctxt->sax->reference(ctxt->userData, out);
7180
20.0k
      }
7181
589k
  } else {
7182
      /*
7183
       * Just encode the value in UTF-8
7184
       */
7185
385k
      COPY_BUF(0 ,out, i, value);
7186
385k
      out[i] = 0;
7187
385k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7188
385k
    (!ctxt->disableSAX))
7189
160k
    ctxt->sax->characters(ctxt->userData, out, i);
7190
385k
  }
7191
974k
  return;
7192
1.11M
    }
7193
7194
    /*
7195
     * We are seeing an entity reference
7196
     */
7197
4.80M
    ent = xmlParseEntityRef(ctxt);
7198
4.80M
    if (ent == NULL) return;
7199
3.62M
    if (!ctxt->wellFormed)
7200
1.00M
  return;
7201
2.61M
    was_checked = ent->checked;
7202
7203
    /* special case of predefined entities */
7204
2.61M
    if ((ent->name == NULL) ||
7205
2.61M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7206
62.3k
  val = ent->content;
7207
62.3k
  if (val == NULL) return;
7208
  /*
7209
   * inline the entity.
7210
   */
7211
62.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7212
62.3k
      (!ctxt->disableSAX))
7213
62.3k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7214
62.3k
  return;
7215
62.3k
    }
7216
7217
    /*
7218
     * The first reference to the entity trigger a parsing phase
7219
     * where the ent->children is filled with the result from
7220
     * the parsing.
7221
     * Note: external parsed entities will not be loaded, it is not
7222
     * required for a non-validating parser, unless the parsing option
7223
     * of validating, or substituting entities were given. Doing so is
7224
     * far more secure as the parser will only process data coming from
7225
     * the document entity by default.
7226
     */
7227
2.55M
    if (((ent->checked == 0) ||
7228
2.55M
         ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7229
2.55M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7230
2.46M
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7231
2.45M
  unsigned long oldnbent = ctxt->nbentities, diff;
7232
7233
  /*
7234
   * This is a bit hackish but this seems the best
7235
   * way to make sure both SAX and DOM entity support
7236
   * behaves okay.
7237
   */
7238
2.45M
  void *user_data;
7239
2.45M
  if (ctxt->userData == ctxt)
7240
2.45M
      user_data = NULL;
7241
0
  else
7242
0
      user_data = ctxt->userData;
7243
7244
  /*
7245
   * Check that this entity is well formed
7246
   * 4.3.2: An internal general parsed entity is well-formed
7247
   * if its replacement text matches the production labeled
7248
   * content.
7249
   */
7250
2.45M
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7251
305k
      ctxt->depth++;
7252
305k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7253
305k
                                                user_data, &list);
7254
305k
      ctxt->depth--;
7255
7256
2.15M
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7257
2.15M
      ctxt->depth++;
7258
2.15M
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7259
2.15M
                                     user_data, ctxt->depth, ent->URI,
7260
2.15M
             ent->ExternalID, &list);
7261
2.15M
      ctxt->depth--;
7262
2.15M
  } else {
7263
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7264
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7265
0
       "invalid entity type found\n", NULL);
7266
0
  }
7267
7268
  /*
7269
   * Store the number of entities needing parsing for this entity
7270
   * content and do checkings
7271
   */
7272
2.45M
        diff = ctxt->nbentities - oldnbent + 1;
7273
2.45M
        if (diff > INT_MAX / 2)
7274
0
            diff = INT_MAX / 2;
7275
2.45M
        ent->checked = diff * 2;
7276
2.45M
  if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7277
155k
      ent->checked |= 1;
7278
2.45M
  if (ret == XML_ERR_ENTITY_LOOP) {
7279
674k
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7280
674k
            xmlHaltParser(ctxt);
7281
674k
      xmlFreeNodeList(list);
7282
674k
      return;
7283
674k
  }
7284
1.78M
  if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7285
24
      xmlFreeNodeList(list);
7286
24
      return;
7287
24
  }
7288
7289
1.78M
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7290
31.8k
      if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7291
31.8k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7292
31.8k
    (ent->children == NULL)) {
7293
31.4k
    ent->children = list;
7294
                /*
7295
                 * Prune it directly in the generated document
7296
                 * except for single text nodes.
7297
                 */
7298
31.4k
                if ((ctxt->replaceEntities == 0) ||
7299
31.4k
                    (ctxt->parseMode == XML_PARSE_READER) ||
7300
31.4k
                    ((list->type == XML_TEXT_NODE) &&
7301
24.0k
                     (list->next == NULL))) {
7302
24.0k
                    ent->owner = 1;
7303
74.8k
                    while (list != NULL) {
7304
50.7k
                        list->parent = (xmlNodePtr) ent;
7305
50.7k
                        if (list->doc != ent->doc)
7306
0
                            xmlSetTreeDoc(list, ent->doc);
7307
50.7k
                        if (list->next == NULL)
7308
24.0k
                            ent->last = list;
7309
50.7k
                        list = list->next;
7310
50.7k
                    }
7311
24.0k
                    list = NULL;
7312
24.0k
                } else {
7313
7.39k
                    ent->owner = 0;
7314
35.4k
                    while (list != NULL) {
7315
28.0k
                        list->parent = (xmlNodePtr) ctxt->node;
7316
28.0k
                        list->doc = ctxt->myDoc;
7317
28.0k
                        if (list->next == NULL)
7318
7.39k
                            ent->last = list;
7319
28.0k
                        list = list->next;
7320
28.0k
                    }
7321
7.39k
                    list = ent->children;
7322
#ifdef LIBXML_LEGACY_ENABLED
7323
                    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7324
                        xmlAddEntityReference(ent, list, NULL);
7325
#endif /* LIBXML_LEGACY_ENABLED */
7326
7.39k
                }
7327
31.4k
      } else {
7328
339
    xmlFreeNodeList(list);
7329
339
    list = NULL;
7330
339
      }
7331
1.75M
  } else if ((ret != XML_ERR_OK) &&
7332
1.75M
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7333
1.70M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7334
1.70M
         "Entity '%s' failed to parse\n", ent->name);
7335
1.70M
            if (ent->content != NULL)
7336
34.2k
                ent->content[0] = 0;
7337
1.70M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
7338
1.70M
  } else if (list != NULL) {
7339
0
      xmlFreeNodeList(list);
7340
0
      list = NULL;
7341
0
  }
7342
1.78M
  if (ent->checked == 0)
7343
0
      ent->checked = 2;
7344
7345
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7346
1.78M
        was_checked = 0;
7347
1.78M
    } else if (ent->checked != 1) {
7348
96.7k
  ctxt->nbentities += ent->checked / 2;
7349
96.7k
    }
7350
7351
    /*
7352
     * Now that the entity content has been gathered
7353
     * provide it to the application, this can take different forms based
7354
     * on the parsing modes.
7355
     */
7356
1.88M
    if (ent->children == NULL) {
7357
  /*
7358
   * Probably running in SAX mode and the callbacks don't
7359
   * build the entity content. So unless we already went
7360
   * though parsing for first checking go though the entity
7361
   * content to generate callbacks associated to the entity
7362
   */
7363
1.77M
  if (was_checked != 0) {
7364
18.7k
      void *user_data;
7365
      /*
7366
       * This is a bit hackish but this seems the best
7367
       * way to make sure both SAX and DOM entity support
7368
       * behaves okay.
7369
       */
7370
18.7k
      if (ctxt->userData == ctxt)
7371
18.7k
    user_data = NULL;
7372
0
      else
7373
0
    user_data = ctxt->userData;
7374
7375
18.7k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7376
435
    ctxt->depth++;
7377
435
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7378
435
           ent->content, user_data, NULL);
7379
435
    ctxt->depth--;
7380
18.3k
      } else if (ent->etype ==
7381
18.3k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7382
18.3k
    ctxt->depth++;
7383
18.3k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7384
18.3k
         ctxt->sax, user_data, ctxt->depth,
7385
18.3k
         ent->URI, ent->ExternalID, NULL);
7386
18.3k
    ctxt->depth--;
7387
18.3k
      } else {
7388
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7389
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7390
0
           "invalid entity type found\n", NULL);
7391
0
      }
7392
18.7k
      if (ret == XML_ERR_ENTITY_LOOP) {
7393
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7394
0
    return;
7395
0
      }
7396
18.7k
  }
7397
1.77M
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7398
1.77M
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7399
      /*
7400
       * Entity reference callback comes second, it's somewhat
7401
       * superfluous but a compatibility to historical behaviour
7402
       */
7403
32.8k
      ctxt->sax->reference(ctxt->userData, ent->name);
7404
32.8k
  }
7405
1.77M
  return;
7406
1.77M
    }
7407
7408
    /*
7409
     * If we didn't get any children for the entity being built
7410
     */
7411
103k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7412
103k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7413
  /*
7414
   * Create a node.
7415
   */
7416
34.3k
  ctxt->sax->reference(ctxt->userData, ent->name);
7417
34.3k
  return;
7418
34.3k
    }
7419
7420
69.1k
    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7421
  /*
7422
   * There is a problem on the handling of _private for entities
7423
   * (bug 155816): Should we copy the content of the field from
7424
   * the entity (possibly overwriting some value set by the user
7425
   * when a copy is created), should we leave it alone, or should
7426
   * we try to take care of different situations?  The problem
7427
   * is exacerbated by the usage of this field by the xmlReader.
7428
   * To fix this bug, we look at _private on the created node
7429
   * and, if it's NULL, we copy in whatever was in the entity.
7430
   * If it's not NULL we leave it alone.  This is somewhat of a
7431
   * hack - maybe we should have further tests to determine
7432
   * what to do.
7433
   */
7434
69.1k
  if ((ctxt->node != NULL) && (ent->children != NULL)) {
7435
      /*
7436
       * Seems we are generating the DOM content, do
7437
       * a simple tree copy for all references except the first
7438
       * In the first occurrence list contains the replacement.
7439
       */
7440
69.1k
      if (((list == NULL) && (ent->owner == 0)) ||
7441
69.1k
    (ctxt->parseMode == XML_PARSE_READER)) {
7442
16.9k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7443
7444
    /*
7445
     * We are copying here, make sure there is no abuse
7446
     */
7447
16.9k
    ctxt->sizeentcopy += ent->length + 5;
7448
16.9k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7449
0
        return;
7450
7451
    /*
7452
     * when operating on a reader, the entities definitions
7453
     * are always owning the entities subtree.
7454
    if (ctxt->parseMode == XML_PARSE_READER)
7455
        ent->owner = 1;
7456
     */
7457
7458
16.9k
    cur = ent->children;
7459
39.1k
    while (cur != NULL) {
7460
39.1k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7461
39.1k
        if (nw != NULL) {
7462
39.1k
      if (nw->_private == NULL)
7463
39.1k
          nw->_private = cur->_private;
7464
39.1k
      if (firstChild == NULL){
7465
16.9k
          firstChild = nw;
7466
16.9k
      }
7467
39.1k
      nw = xmlAddChild(ctxt->node, nw);
7468
39.1k
        }
7469
39.1k
        if (cur == ent->last) {
7470
      /*
7471
       * needed to detect some strange empty
7472
       * node cases in the reader tests
7473
       */
7474
16.9k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7475
16.9k
          (nw != NULL) &&
7476
16.9k
          (nw->type == XML_ELEMENT_NODE) &&
7477
16.9k
          (nw->children == NULL))
7478
1.47k
          nw->extra = 1;
7479
7480
16.9k
      break;
7481
16.9k
        }
7482
22.1k
        cur = cur->next;
7483
22.1k
    }
7484
#ifdef LIBXML_LEGACY_ENABLED
7485
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7486
      xmlAddEntityReference(ent, firstChild, nw);
7487
#endif /* LIBXML_LEGACY_ENABLED */
7488
52.1k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7489
52.1k
    xmlNodePtr nw = NULL, cur, next, last,
7490
52.1k
         firstChild = NULL;
7491
7492
    /*
7493
     * We are copying here, make sure there is no abuse
7494
     */
7495
52.1k
    ctxt->sizeentcopy += ent->length + 5;
7496
52.1k
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7497
0
        return;
7498
7499
    /*
7500
     * Copy the entity child list and make it the new
7501
     * entity child list. The goal is to make sure any
7502
     * ID or REF referenced will be the one from the
7503
     * document content and not the entity copy.
7504
     */
7505
52.1k
    cur = ent->children;
7506
52.1k
    ent->children = NULL;
7507
52.1k
    last = ent->last;
7508
52.1k
    ent->last = NULL;
7509
100k
    while (cur != NULL) {
7510
100k
        next = cur->next;
7511
100k
        cur->next = NULL;
7512
100k
        cur->parent = NULL;
7513
100k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7514
100k
        if (nw != NULL) {
7515
100k
      if (nw->_private == NULL)
7516
100k
          nw->_private = cur->_private;
7517
100k
      if (firstChild == NULL){
7518
52.1k
          firstChild = cur;
7519
52.1k
      }
7520
100k
      xmlAddChild((xmlNodePtr) ent, nw);
7521
100k
      xmlAddChild(ctxt->node, cur);
7522
100k
        }
7523
100k
        if (cur == last)
7524
52.1k
      break;
7525
48.2k
        cur = next;
7526
48.2k
    }
7527
52.1k
    if (ent->owner == 0)
7528
7.39k
        ent->owner = 1;
7529
#ifdef LIBXML_LEGACY_ENABLED
7530
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7531
      xmlAddEntityReference(ent, firstChild, nw);
7532
#endif /* LIBXML_LEGACY_ENABLED */
7533
52.1k
      } else {
7534
0
    const xmlChar *nbktext;
7535
7536
    /*
7537
     * the name change is to avoid coalescing of the
7538
     * node with a possible previous text one which
7539
     * would make ent->children a dangling pointer
7540
     */
7541
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7542
0
          -1);
7543
0
    if (ent->children->type == XML_TEXT_NODE)
7544
0
        ent->children->name = nbktext;
7545
0
    if ((ent->last != ent->children) &&
7546
0
        (ent->last->type == XML_TEXT_NODE))
7547
0
        ent->last->name = nbktext;
7548
0
    xmlAddChildList(ctxt->node, ent->children);
7549
0
      }
7550
7551
      /*
7552
       * This is to avoid a nasty side effect, see
7553
       * characters() in SAX.c
7554
       */
7555
69.1k
      ctxt->nodemem = 0;
7556
69.1k
      ctxt->nodelen = 0;
7557
69.1k
      return;
7558
69.1k
  }
7559
69.1k
    }
7560
69.1k
}
7561
7562
/**
7563
 * xmlParseEntityRef:
7564
 * @ctxt:  an XML parser context
7565
 *
7566
 * DEPRECATED: Internal function, don't use.
7567
 *
7568
 * parse ENTITY references declarations
7569
 *
7570
 * [68] EntityRef ::= '&' Name ';'
7571
 *
7572
 * [ WFC: Entity Declared ]
7573
 * In a document without any DTD, a document with only an internal DTD
7574
 * subset which contains no parameter entity references, or a document
7575
 * with "standalone='yes'", the Name given in the entity reference
7576
 * must match that in an entity declaration, except that well-formed
7577
 * documents need not declare any of the following entities: amp, lt,
7578
 * gt, apos, quot.  The declaration of a parameter entity must precede
7579
 * any reference to it.  Similarly, the declaration of a general entity
7580
 * must precede any reference to it which appears in a default value in an
7581
 * attribute-list declaration. Note that if entities are declared in the
7582
 * external subset or in external parameter entities, a non-validating
7583
 * processor is not obligated to read and process their declarations;
7584
 * for such documents, the rule that an entity must be declared is a
7585
 * well-formedness constraint only if standalone='yes'.
7586
 *
7587
 * [ WFC: Parsed Entity ]
7588
 * An entity reference must not contain the name of an unparsed entity
7589
 *
7590
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7591
 */
7592
xmlEntityPtr
7593
5.56M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7594
5.56M
    const xmlChar *name;
7595
5.56M
    xmlEntityPtr ent = NULL;
7596
7597
5.56M
    GROW;
7598
5.56M
    if (ctxt->instate == XML_PARSER_EOF)
7599
0
        return(NULL);
7600
7601
5.56M
    if (RAW != '&')
7602
0
        return(NULL);
7603
5.56M
    NEXT;
7604
5.56M
    name = xmlParseName(ctxt);
7605
5.56M
    if (name == NULL) {
7606
324k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7607
324k
           "xmlParseEntityRef: no name\n");
7608
324k
        return(NULL);
7609
324k
    }
7610
5.24M
    if (RAW != ';') {
7611
635k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7612
635k
  return(NULL);
7613
635k
    }
7614
4.60M
    NEXT;
7615
7616
    /*
7617
     * Predefined entities override any extra definition
7618
     */
7619
4.60M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7620
2.52M
        ent = xmlGetPredefinedEntity(name);
7621
2.52M
        if (ent != NULL)
7622
371k
            return(ent);
7623
2.52M
    }
7624
7625
    /*
7626
     * Increase the number of entity references parsed
7627
     */
7628
4.23M
    ctxt->nbentities++;
7629
7630
    /*
7631
     * Ask first SAX for entity resolution, otherwise try the
7632
     * entities which may have stored in the parser context.
7633
     */
7634
4.23M
    if (ctxt->sax != NULL) {
7635
4.23M
  if (ctxt->sax->getEntity != NULL)
7636
4.23M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7637
4.23M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7638
4.23M
      (ctxt->options & XML_PARSE_OLDSAX))
7639
7.22k
      ent = xmlGetPredefinedEntity(name);
7640
4.23M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7641
4.23M
      (ctxt->userData==ctxt)) {
7642
16.2k
      ent = xmlSAX2GetEntity(ctxt, name);
7643
16.2k
  }
7644
4.23M
    }
7645
4.23M
    if (ctxt->instate == XML_PARSER_EOF)
7646
0
  return(NULL);
7647
    /*
7648
     * [ WFC: Entity Declared ]
7649
     * In a document without any DTD, a document with only an
7650
     * internal DTD subset which contains no parameter entity
7651
     * references, or a document with "standalone='yes'", the
7652
     * Name given in the entity reference must match that in an
7653
     * entity declaration, except that well-formed documents
7654
     * need not declare any of the following entities: amp, lt,
7655
     * gt, apos, quot.
7656
     * The declaration of a parameter entity must precede any
7657
     * reference to it.
7658
     * Similarly, the declaration of a general entity must
7659
     * precede any reference to it which appears in a default
7660
     * value in an attribute-list declaration. Note that if
7661
     * entities are declared in the external subset or in
7662
     * external parameter entities, a non-validating processor
7663
     * is not obligated to read and process their declarations;
7664
     * for such documents, the rule that an entity must be
7665
     * declared is a well-formedness constraint only if
7666
     * standalone='yes'.
7667
     */
7668
4.23M
    if (ent == NULL) {
7669
468k
  if ((ctxt->standalone == 1) ||
7670
468k
      ((ctxt->hasExternalSubset == 0) &&
7671
465k
       (ctxt->hasPErefs == 0))) {
7672
425k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7673
425k
         "Entity '%s' not defined\n", name);
7674
425k
  } else {
7675
43.0k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7676
43.0k
         "Entity '%s' not defined\n", name);
7677
43.0k
      if ((ctxt->inSubset == 0) &&
7678
43.0k
    (ctxt->sax != NULL) &&
7679
43.0k
    (ctxt->sax->reference != NULL)) {
7680
38.2k
    ctxt->sax->reference(ctxt->userData, name);
7681
38.2k
      }
7682
43.0k
  }
7683
468k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7684
468k
  ctxt->valid = 0;
7685
468k
    }
7686
7687
    /*
7688
     * [ WFC: Parsed Entity ]
7689
     * An entity reference must not contain the name of an
7690
     * unparsed entity
7691
     */
7692
3.76M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7693
1.33k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7694
1.33k
     "Entity reference to unparsed entity %s\n", name);
7695
1.33k
    }
7696
7697
    /*
7698
     * [ WFC: No External Entity References ]
7699
     * Attribute values cannot contain direct or indirect
7700
     * entity references to external entities.
7701
     */
7702
3.76M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7703
3.76M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7704
6.01k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7705
6.01k
       "Attribute references external entity '%s'\n", name);
7706
6.01k
    }
7707
    /*
7708
     * [ WFC: No < in Attribute Values ]
7709
     * The replacement text of any entity referred to directly or
7710
     * indirectly in an attribute value (other than "&lt;") must
7711
     * not contain a <.
7712
     */
7713
3.75M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7714
3.75M
       (ent != NULL) && 
7715
3.75M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7716
116k
  if (((ent->checked & 1) || (ent->checked == 0)) &&
7717
116k
       (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7718
4.12k
      xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7719
4.12k
  "'<' in entity '%s' is not allowed in attributes values\n", name);
7720
4.12k
        }
7721
116k
    }
7722
7723
    /*
7724
     * Internal check, no parameter entities here ...
7725
     */
7726
3.64M
    else {
7727
3.64M
  switch (ent->etype) {
7728
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7729
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7730
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7731
0
       "Attempt to reference the parameter entity '%s'\n",
7732
0
            name);
7733
0
      break;
7734
3.64M
      default:
7735
3.64M
      break;
7736
3.64M
  }
7737
3.64M
    }
7738
7739
    /*
7740
     * [ WFC: No Recursion ]
7741
     * A parsed entity must not contain a recursive reference
7742
     * to itself, either directly or indirectly.
7743
     * Done somewhere else
7744
     */
7745
4.23M
    return(ent);
7746
4.23M
}
7747
7748
/**
7749
 * xmlParseStringEntityRef:
7750
 * @ctxt:  an XML parser context
7751
 * @str:  a pointer to an index in the string
7752
 *
7753
 * parse ENTITY references declarations, but this version parses it from
7754
 * a string value.
7755
 *
7756
 * [68] EntityRef ::= '&' Name ';'
7757
 *
7758
 * [ WFC: Entity Declared ]
7759
 * In a document without any DTD, a document with only an internal DTD
7760
 * subset which contains no parameter entity references, or a document
7761
 * with "standalone='yes'", the Name given in the entity reference
7762
 * must match that in an entity declaration, except that well-formed
7763
 * documents need not declare any of the following entities: amp, lt,
7764
 * gt, apos, quot.  The declaration of a parameter entity must precede
7765
 * any reference to it.  Similarly, the declaration of a general entity
7766
 * must precede any reference to it which appears in a default value in an
7767
 * attribute-list declaration. Note that if entities are declared in the
7768
 * external subset or in external parameter entities, a non-validating
7769
 * processor is not obligated to read and process their declarations;
7770
 * for such documents, the rule that an entity must be declared is a
7771
 * well-formedness constraint only if standalone='yes'.
7772
 *
7773
 * [ WFC: Parsed Entity ]
7774
 * An entity reference must not contain the name of an unparsed entity
7775
 *
7776
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7777
 * is updated to the current location in the string.
7778
 */
7779
static xmlEntityPtr
7780
1.38M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7781
1.38M
    xmlChar *name;
7782
1.38M
    const xmlChar *ptr;
7783
1.38M
    xmlChar cur;
7784
1.38M
    xmlEntityPtr ent = NULL;
7785
7786
1.38M
    if ((str == NULL) || (*str == NULL))
7787
0
        return(NULL);
7788
1.38M
    ptr = *str;
7789
1.38M
    cur = *ptr;
7790
1.38M
    if (cur != '&')
7791
0
  return(NULL);
7792
7793
1.38M
    ptr++;
7794
1.38M
    name = xmlParseStringName(ctxt, &ptr);
7795
1.38M
    if (name == NULL) {
7796
11.3k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7797
11.3k
           "xmlParseStringEntityRef: no name\n");
7798
11.3k
  *str = ptr;
7799
11.3k
  return(NULL);
7800
11.3k
    }
7801
1.37M
    if (*ptr != ';') {
7802
16.4k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7803
16.4k
        xmlFree(name);
7804
16.4k
  *str = ptr;
7805
16.4k
  return(NULL);
7806
16.4k
    }
7807
1.35M
    ptr++;
7808
7809
7810
    /*
7811
     * Predefined entities override any extra definition
7812
     */
7813
1.35M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7814
714k
        ent = xmlGetPredefinedEntity(name);
7815
714k
        if (ent != NULL) {
7816
152k
            xmlFree(name);
7817
152k
            *str = ptr;
7818
152k
            return(ent);
7819
152k
        }
7820
714k
    }
7821
7822
    /*
7823
     * Increase the number of entity references parsed
7824
     */
7825
1.20M
    ctxt->nbentities++;
7826
7827
    /*
7828
     * Ask first SAX for entity resolution, otherwise try the
7829
     * entities which may have stored in the parser context.
7830
     */
7831
1.20M
    if (ctxt->sax != NULL) {
7832
1.20M
  if (ctxt->sax->getEntity != NULL)
7833
1.20M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7834
1.20M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7835
75.0k
      ent = xmlGetPredefinedEntity(name);
7836
1.20M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7837
155k
      ent = xmlSAX2GetEntity(ctxt, name);
7838
155k
  }
7839
1.20M
    }
7840
1.20M
    if (ctxt->instate == XML_PARSER_EOF) {
7841
0
  xmlFree(name);
7842
0
  return(NULL);
7843
0
    }
7844
7845
    /*
7846
     * [ WFC: Entity Declared ]
7847
     * In a document without any DTD, a document with only an
7848
     * internal DTD subset which contains no parameter entity
7849
     * references, or a document with "standalone='yes'", the
7850
     * Name given in the entity reference must match that in an
7851
     * entity declaration, except that well-formed documents
7852
     * need not declare any of the following entities: amp, lt,
7853
     * gt, apos, quot.
7854
     * The declaration of a parameter entity must precede any
7855
     * reference to it.
7856
     * Similarly, the declaration of a general entity must
7857
     * precede any reference to it which appears in a default
7858
     * value in an attribute-list declaration. Note that if
7859
     * entities are declared in the external subset or in
7860
     * external parameter entities, a non-validating processor
7861
     * is not obligated to read and process their declarations;
7862
     * for such documents, the rule that an entity must be
7863
     * declared is a well-formedness constraint only if
7864
     * standalone='yes'.
7865
     */
7866
1.20M
    if (ent == NULL) {
7867
155k
  if ((ctxt->standalone == 1) ||
7868
155k
      ((ctxt->hasExternalSubset == 0) &&
7869
148k
       (ctxt->hasPErefs == 0))) {
7870
148k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7871
148k
         "Entity '%s' not defined\n", name);
7872
148k
  } else {
7873
7.02k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7874
7.02k
        "Entity '%s' not defined\n",
7875
7.02k
        name);
7876
7.02k
  }
7877
155k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7878
  /* TODO ? check regressions ctxt->valid = 0; */
7879
155k
    }
7880
7881
    /*
7882
     * [ WFC: Parsed Entity ]
7883
     * An entity reference must not contain the name of an
7884
     * unparsed entity
7885
     */
7886
1.04M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7887
446
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7888
446
     "Entity reference to unparsed entity %s\n", name);
7889
446
    }
7890
7891
    /*
7892
     * [ WFC: No External Entity References ]
7893
     * Attribute values cannot contain direct or indirect
7894
     * entity references to external entities.
7895
     */
7896
1.04M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7897
1.04M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7898
653
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7899
653
   "Attribute references external entity '%s'\n", name);
7900
653
    }
7901
    /*
7902
     * [ WFC: No < in Attribute Values ]
7903
     * The replacement text of any entity referred to directly or
7904
     * indirectly in an attribute value (other than "&lt;") must
7905
     * not contain a <.
7906
     */
7907
1.04M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7908
1.04M
       (ent != NULL) && (ent->content != NULL) &&
7909
1.04M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7910
1.04M
       (xmlStrchr(ent->content, '<'))) {
7911
200k
  xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7912
200k
     "'<' in entity '%s' is not allowed in attributes values\n",
7913
200k
        name);
7914
200k
    }
7915
7916
    /*
7917
     * Internal check, no parameter entities here ...
7918
     */
7919
845k
    else {
7920
845k
  switch (ent->etype) {
7921
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7922
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7923
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7924
0
       "Attempt to reference the parameter entity '%s'\n",
7925
0
          name);
7926
0
      break;
7927
845k
      default:
7928
845k
      break;
7929
845k
  }
7930
845k
    }
7931
7932
    /*
7933
     * [ WFC: No Recursion ]
7934
     * A parsed entity must not contain a recursive reference
7935
     * to itself, either directly or indirectly.
7936
     * Done somewhere else
7937
     */
7938
7939
1.20M
    xmlFree(name);
7940
1.20M
    *str = ptr;
7941
1.20M
    return(ent);
7942
1.20M
}
7943
7944
/**
7945
 * xmlParsePEReference:
7946
 * @ctxt:  an XML parser context
7947
 *
7948
 * DEPRECATED: Internal function, don't use.
7949
 *
7950
 * parse PEReference declarations
7951
 * The entity content is handled directly by pushing it's content as
7952
 * a new input stream.
7953
 *
7954
 * [69] PEReference ::= '%' Name ';'
7955
 *
7956
 * [ WFC: No Recursion ]
7957
 * A parsed entity must not contain a recursive
7958
 * reference to itself, either directly or indirectly.
7959
 *
7960
 * [ WFC: Entity Declared ]
7961
 * In a document without any DTD, a document with only an internal DTD
7962
 * subset which contains no parameter entity references, or a document
7963
 * with "standalone='yes'", ...  ... The declaration of a parameter
7964
 * entity must precede any reference to it...
7965
 *
7966
 * [ VC: Entity Declared ]
7967
 * In a document with an external subset or external parameter entities
7968
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7969
 * must precede any reference to it...
7970
 *
7971
 * [ WFC: In DTD ]
7972
 * Parameter-entity references may only appear in the DTD.
7973
 * NOTE: misleading but this is handled.
7974
 */
7975
void
7976
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7977
3.23M
{
7978
3.23M
    const xmlChar *name;
7979
3.23M
    xmlEntityPtr entity = NULL;
7980
3.23M
    xmlParserInputPtr input;
7981
7982
3.23M
    if (RAW != '%')
7983
2.01M
        return;
7984
1.21M
    NEXT;
7985
1.21M
    name = xmlParseName(ctxt);
7986
1.21M
    if (name == NULL) {
7987
97.3k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7988
97.3k
  return;
7989
97.3k
    }
7990
1.11M
    if (xmlParserDebugEntities)
7991
0
  xmlGenericError(xmlGenericErrorContext,
7992
0
    "PEReference: %s\n", name);
7993
1.11M
    if (RAW != ';') {
7994
189k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7995
189k
        return;
7996
189k
    }
7997
7998
929k
    NEXT;
7999
8000
    /*
8001
     * Increase the number of entity references parsed
8002
     */
8003
929k
    ctxt->nbentities++;
8004
8005
    /*
8006
     * Request the entity from SAX
8007
     */
8008
929k
    if ((ctxt->sax != NULL) &&
8009
929k
  (ctxt->sax->getParameterEntity != NULL))
8010
929k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8011
929k
    if (ctxt->instate == XML_PARSER_EOF)
8012
0
  return;
8013
929k
    if (entity == NULL) {
8014
  /*
8015
   * [ WFC: Entity Declared ]
8016
   * In a document without any DTD, a document with only an
8017
   * internal DTD subset which contains no parameter entity
8018
   * references, or a document with "standalone='yes'", ...
8019
   * ... The declaration of a parameter entity must precede
8020
   * any reference to it...
8021
   */
8022
90.1k
  if ((ctxt->standalone == 1) ||
8023
90.1k
      ((ctxt->hasExternalSubset == 0) &&
8024
89.8k
       (ctxt->hasPErefs == 0))) {
8025
2.56k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8026
2.56k
            "PEReference: %%%s; not found\n",
8027
2.56k
            name);
8028
87.6k
  } else {
8029
      /*
8030
       * [ VC: Entity Declared ]
8031
       * In a document with an external subset or external
8032
       * parameter entities with "standalone='no'", ...
8033
       * ... The declaration of a parameter entity must
8034
       * precede any reference to it...
8035
       */
8036
87.6k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8037
16.7k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8038
16.7k
                                 "PEReference: %%%s; not found\n",
8039
16.7k
                                 name, NULL);
8040
16.7k
            } else
8041
70.8k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8042
70.8k
                              "PEReference: %%%s; not found\n",
8043
70.8k
                              name, NULL);
8044
87.6k
            ctxt->valid = 0;
8045
87.6k
  }
8046
90.1k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8047
838k
    } else {
8048
  /*
8049
   * Internal checking in case the entity quest barfed
8050
   */
8051
838k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8052
838k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8053
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8054
0
      "Internal: %%%s; is not a parameter entity\n",
8055
0
        name, NULL);
8056
838k
  } else {
8057
838k
            xmlChar start[4];
8058
838k
            xmlCharEncoding enc;
8059
8060
838k
      if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8061
478
          return;
8062
8063
838k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8064
838k
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8065
838k
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8066
838k
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8067
838k
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8068
838k
    (ctxt->replaceEntities == 0) &&
8069
838k
    (ctxt->validate == 0))
8070
60
    return;
8071
8072
838k
      input = xmlNewEntityInputStream(ctxt, entity);
8073
838k
      if (xmlPushInput(ctxt, input) < 0) {
8074
3.02k
                xmlFreeInputStream(input);
8075
3.02k
    return;
8076
3.02k
            }
8077
8078
835k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8079
                /*
8080
                 * Get the 4 first bytes and decode the charset
8081
                 * if enc != XML_CHAR_ENCODING_NONE
8082
                 * plug some encoding conversion routines.
8083
                 * Note that, since we may have some non-UTF8
8084
                 * encoding (like UTF16, bug 135229), the 'length'
8085
                 * is not known, but we can calculate based upon
8086
                 * the amount of data in the buffer.
8087
                 */
8088
504
                GROW
8089
504
                if (ctxt->instate == XML_PARSER_EOF)
8090
0
                    return;
8091
504
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8092
421
                    start[0] = RAW;
8093
421
                    start[1] = NXT(1);
8094
421
                    start[2] = NXT(2);
8095
421
                    start[3] = NXT(3);
8096
421
                    enc = xmlDetectCharEncoding(start, 4);
8097
421
                    if (enc != XML_CHAR_ENCODING_NONE) {
8098
111
                        xmlSwitchEncoding(ctxt, enc);
8099
111
                    }
8100
421
                }
8101
8102
504
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8103
504
                    (IS_BLANK_CH(NXT(5)))) {
8104
45
                    xmlParseTextDecl(ctxt);
8105
45
                }
8106
504
            }
8107
835k
  }
8108
838k
    }
8109
925k
    ctxt->hasPErefs = 1;
8110
925k
}
8111
8112
/**
8113
 * xmlLoadEntityContent:
8114
 * @ctxt:  an XML parser context
8115
 * @entity: an unloaded system entity
8116
 *
8117
 * Load the original content of the given system entity from the
8118
 * ExternalID/SystemID given. This is to be used for Included in Literal
8119
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8120
 *
8121
 * Returns 0 in case of success and -1 in case of failure
8122
 */
8123
static int
8124
4.70k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8125
4.70k
    xmlParserInputPtr input;
8126
4.70k
    xmlBufferPtr buf;
8127
4.70k
    int l, c;
8128
4.70k
    int count = 0;
8129
8130
4.70k
    if ((ctxt == NULL) || (entity == NULL) ||
8131
4.70k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8132
4.70k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8133
4.70k
  (entity->content != NULL)) {
8134
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8135
0
              "xmlLoadEntityContent parameter error");
8136
0
        return(-1);
8137
0
    }
8138
8139
4.70k
    if (xmlParserDebugEntities)
8140
0
  xmlGenericError(xmlGenericErrorContext,
8141
0
    "Reading %s entity content input\n", entity->name);
8142
8143
4.70k
    buf = xmlBufferCreate();
8144
4.70k
    if (buf == NULL) {
8145
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8146
0
              "xmlLoadEntityContent parameter error");
8147
0
        return(-1);
8148
0
    }
8149
4.70k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8150
8151
4.70k
    input = xmlNewEntityInputStream(ctxt, entity);
8152
4.70k
    if (input == NULL) {
8153
245
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8154
245
              "xmlLoadEntityContent input error");
8155
245
  xmlBufferFree(buf);
8156
245
        return(-1);
8157
245
    }
8158
8159
    /*
8160
     * Push the entity as the current input, read char by char
8161
     * saving to the buffer until the end of the entity or an error
8162
     */
8163
4.46k
    if (xmlPushInput(ctxt, input) < 0) {
8164
0
        xmlBufferFree(buf);
8165
0
  xmlFreeInputStream(input);
8166
0
  return(-1);
8167
0
    }
8168
8169
4.46k
    GROW;
8170
4.46k
    c = CUR_CHAR(l);
8171
6.34M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8172
6.34M
           (IS_CHAR(c))) {
8173
6.33M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8174
6.33M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8175
59.9k
      count = 0;
8176
59.9k
      GROW;
8177
59.9k
            if (ctxt->instate == XML_PARSER_EOF) {
8178
0
                xmlBufferFree(buf);
8179
0
                return(-1);
8180
0
            }
8181
59.9k
  }
8182
6.33M
  NEXTL(l);
8183
6.33M
  c = CUR_CHAR(l);
8184
6.33M
  if (c == 0) {
8185
4.00k
      count = 0;
8186
4.00k
      GROW;
8187
4.00k
            if (ctxt->instate == XML_PARSER_EOF) {
8188
0
                xmlBufferFree(buf);
8189
0
                return(-1);
8190
0
            }
8191
4.00k
      c = CUR_CHAR(l);
8192
4.00k
  }
8193
6.33M
    }
8194
8195
4.46k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8196
3.56k
        xmlPopInput(ctxt);
8197
3.56k
    } else if (!IS_CHAR(c)) {
8198
894
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8199
894
                          "xmlLoadEntityContent: invalid char value %d\n",
8200
894
                    c);
8201
894
  xmlBufferFree(buf);
8202
894
  return(-1);
8203
894
    }
8204
3.56k
    entity->content = buf->content;
8205
3.56k
    buf->content = NULL;
8206
3.56k
    xmlBufferFree(buf);
8207
8208
3.56k
    return(0);
8209
4.46k
}
8210
8211
/**
8212
 * xmlParseStringPEReference:
8213
 * @ctxt:  an XML parser context
8214
 * @str:  a pointer to an index in the string
8215
 *
8216
 * parse PEReference declarations
8217
 *
8218
 * [69] PEReference ::= '%' Name ';'
8219
 *
8220
 * [ WFC: No Recursion ]
8221
 * A parsed entity must not contain a recursive
8222
 * reference to itself, either directly or indirectly.
8223
 *
8224
 * [ WFC: Entity Declared ]
8225
 * In a document without any DTD, a document with only an internal DTD
8226
 * subset which contains no parameter entity references, or a document
8227
 * with "standalone='yes'", ...  ... The declaration of a parameter
8228
 * entity must precede any reference to it...
8229
 *
8230
 * [ VC: Entity Declared ]
8231
 * In a document with an external subset or external parameter entities
8232
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8233
 * must precede any reference to it...
8234
 *
8235
 * [ WFC: In DTD ]
8236
 * Parameter-entity references may only appear in the DTD.
8237
 * NOTE: misleading but this is handled.
8238
 *
8239
 * Returns the string of the entity content.
8240
 *         str is updated to the current value of the index
8241
 */
8242
static xmlEntityPtr
8243
2.12M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8244
2.12M
    const xmlChar *ptr;
8245
2.12M
    xmlChar cur;
8246
2.12M
    xmlChar *name;
8247
2.12M
    xmlEntityPtr entity = NULL;
8248
8249
2.12M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8250
2.12M
    ptr = *str;
8251
2.12M
    cur = *ptr;
8252
2.12M
    if (cur != '%')
8253
0
        return(NULL);
8254
2.12M
    ptr++;
8255
2.12M
    name = xmlParseStringName(ctxt, &ptr);
8256
2.12M
    if (name == NULL) {
8257
824k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8258
824k
           "xmlParseStringPEReference: no name\n");
8259
824k
  *str = ptr;
8260
824k
  return(NULL);
8261
824k
    }
8262
1.30M
    cur = *ptr;
8263
1.30M
    if (cur != ';') {
8264
377k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8265
377k
  xmlFree(name);
8266
377k
  *str = ptr;
8267
377k
  return(NULL);
8268
377k
    }
8269
924k
    ptr++;
8270
8271
    /*
8272
     * Increase the number of entity references parsed
8273
     */
8274
924k
    ctxt->nbentities++;
8275
8276
    /*
8277
     * Request the entity from SAX
8278
     */
8279
924k
    if ((ctxt->sax != NULL) &&
8280
924k
  (ctxt->sax->getParameterEntity != NULL))
8281
924k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8282
924k
    if (ctxt->instate == XML_PARSER_EOF) {
8283
0
  xmlFree(name);
8284
0
  *str = ptr;
8285
0
  return(NULL);
8286
0
    }
8287
924k
    if (entity == NULL) {
8288
  /*
8289
   * [ WFC: Entity Declared ]
8290
   * In a document without any DTD, a document with only an
8291
   * internal DTD subset which contains no parameter entity
8292
   * references, or a document with "standalone='yes'", ...
8293
   * ... The declaration of a parameter entity must precede
8294
   * any reference to it...
8295
   */
8296
236k
  if ((ctxt->standalone == 1) ||
8297
236k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8298
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8299
0
     "PEReference: %%%s; not found\n", name);
8300
236k
  } else {
8301
      /*
8302
       * [ VC: Entity Declared ]
8303
       * In a document with an external subset or external
8304
       * parameter entities with "standalone='no'", ...
8305
       * ... The declaration of a parameter entity must
8306
       * precede any reference to it...
8307
       */
8308
236k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8309
236k
        "PEReference: %%%s; not found\n",
8310
236k
        name, NULL);
8311
236k
      ctxt->valid = 0;
8312
236k
  }
8313
236k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8314
687k
    } else {
8315
  /*
8316
   * Internal checking in case the entity quest barfed
8317
   */
8318
687k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8319
687k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8320
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8321
0
        "%%%s; is not a parameter entity\n",
8322
0
        name, NULL);
8323
0
  }
8324
687k
    }
8325
924k
    ctxt->hasPErefs = 1;
8326
924k
    xmlFree(name);
8327
924k
    *str = ptr;
8328
924k
    return(entity);
8329
924k
}
8330
8331
/**
8332
 * xmlParseDocTypeDecl:
8333
 * @ctxt:  an XML parser context
8334
 *
8335
 * DEPRECATED: Internal function, don't use.
8336
 *
8337
 * parse a DOCTYPE declaration
8338
 *
8339
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8340
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8341
 *
8342
 * [ VC: Root Element Type ]
8343
 * The Name in the document type declaration must match the element
8344
 * type of the root element.
8345
 */
8346
8347
void
8348
324k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8349
324k
    const xmlChar *name = NULL;
8350
324k
    xmlChar *ExternalID = NULL;
8351
324k
    xmlChar *URI = NULL;
8352
8353
    /*
8354
     * We know that '<!DOCTYPE' has been detected.
8355
     */
8356
324k
    SKIP(9);
8357
8358
324k
    SKIP_BLANKS;
8359
8360
    /*
8361
     * Parse the DOCTYPE name.
8362
     */
8363
324k
    name = xmlParseName(ctxt);
8364
324k
    if (name == NULL) {
8365
1.35k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8366
1.35k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8367
1.35k
    }
8368
324k
    ctxt->intSubName = name;
8369
8370
324k
    SKIP_BLANKS;
8371
8372
    /*
8373
     * Check for SystemID and ExternalID
8374
     */
8375
324k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8376
8377
324k
    if ((URI != NULL) || (ExternalID != NULL)) {
8378
99.3k
        ctxt->hasExternalSubset = 1;
8379
99.3k
    }
8380
324k
    ctxt->extSubURI = URI;
8381
324k
    ctxt->extSubSystem = ExternalID;
8382
8383
324k
    SKIP_BLANKS;
8384
8385
    /*
8386
     * Create and update the internal subset.
8387
     */
8388
324k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8389
324k
  (!ctxt->disableSAX))
8390
314k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8391
324k
    if (ctxt->instate == XML_PARSER_EOF)
8392
0
  return;
8393
8394
    /*
8395
     * Is there any internal subset declarations ?
8396
     * they are handled separately in xmlParseInternalSubset()
8397
     */
8398
324k
    if (RAW == '[')
8399
248k
  return;
8400
8401
    /*
8402
     * We should be at the end of the DOCTYPE declaration.
8403
     */
8404
76.7k
    if (RAW != '>') {
8405
13.5k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8406
13.5k
    }
8407
76.7k
    NEXT;
8408
76.7k
}
8409
8410
/**
8411
 * xmlParseInternalSubset:
8412
 * @ctxt:  an XML parser context
8413
 *
8414
 * parse the internal subset declaration
8415
 *
8416
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8417
 */
8418
8419
static void
8420
233k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8421
    /*
8422
     * Is there any DTD definition ?
8423
     */
8424
233k
    if (RAW == '[') {
8425
233k
        int baseInputNr = ctxt->inputNr;
8426
233k
        ctxt->instate = XML_PARSER_DTD;
8427
233k
        NEXT;
8428
  /*
8429
   * Parse the succession of Markup declarations and
8430
   * PEReferences.
8431
   * Subsequence (markupdecl | PEReference | S)*
8432
   */
8433
2.34M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8434
2.34M
               (ctxt->instate != XML_PARSER_EOF)) {
8435
2.17M
      int id = ctxt->input->id;
8436
2.17M
      unsigned long cons = CUR_CONSUMED;
8437
8438
2.17M
      SKIP_BLANKS;
8439
2.17M
      xmlParseMarkupDecl(ctxt);
8440
2.17M
      xmlParsePEReference(ctxt);
8441
8442
            /*
8443
             * Conditional sections are allowed from external entities included
8444
             * by PE References in the internal subset.
8445
             */
8446
2.17M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8447
2.17M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8448
0
                xmlParseConditionalSections(ctxt);
8449
0
            }
8450
8451
2.17M
      if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8452
79.0k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8453
79.0k
       "xmlParseInternalSubset: error detected in Markup declaration\n");
8454
79.0k
                if (ctxt->inputNr > baseInputNr)
8455
13.1k
                    xmlPopInput(ctxt);
8456
65.9k
                else
8457
65.9k
        break;
8458
79.0k
      }
8459
2.17M
  }
8460
233k
  if (RAW == ']') {
8461
156k
      NEXT;
8462
156k
      SKIP_BLANKS;
8463
156k
  }
8464
233k
    }
8465
8466
    /*
8467
     * We should be at the end of the DOCTYPE declaration.
8468
     */
8469
233k
    if (RAW != '>') {
8470
77.3k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8471
77.3k
  return;
8472
77.3k
    }
8473
156k
    NEXT;
8474
156k
}
8475
8476
#ifdef LIBXML_SAX1_ENABLED
8477
/**
8478
 * xmlParseAttribute:
8479
 * @ctxt:  an XML parser context
8480
 * @value:  a xmlChar ** used to store the value of the attribute
8481
 *
8482
 * DEPRECATED: Internal function, don't use.
8483
 *
8484
 * parse an attribute
8485
 *
8486
 * [41] Attribute ::= Name Eq AttValue
8487
 *
8488
 * [ WFC: No External Entity References ]
8489
 * Attribute values cannot contain direct or indirect entity references
8490
 * to external entities.
8491
 *
8492
 * [ WFC: No < in Attribute Values ]
8493
 * The replacement text of any entity referred to directly or indirectly in
8494
 * an attribute value (other than "&lt;") must not contain a <.
8495
 *
8496
 * [ VC: Attribute Value Type ]
8497
 * The attribute must have been declared; the value must be of the type
8498
 * declared for it.
8499
 *
8500
 * [25] Eq ::= S? '=' S?
8501
 *
8502
 * With namespace:
8503
 *
8504
 * [NS 11] Attribute ::= QName Eq AttValue
8505
 *
8506
 * Also the case QName == xmlns:??? is handled independently as a namespace
8507
 * definition.
8508
 *
8509
 * Returns the attribute name, and the value in *value.
8510
 */
8511
8512
const xmlChar *
8513
8.90M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8514
8.90M
    const xmlChar *name;
8515
8.90M
    xmlChar *val;
8516
8517
8.90M
    *value = NULL;
8518
8.90M
    GROW;
8519
8.90M
    name = xmlParseName(ctxt);
8520
8.90M
    if (name == NULL) {
8521
1.50M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8522
1.50M
                 "error parsing attribute name\n");
8523
1.50M
        return(NULL);
8524
1.50M
    }
8525
8526
    /*
8527
     * read the value
8528
     */
8529
7.40M
    SKIP_BLANKS;
8530
7.40M
    if (RAW == '=') {
8531
6.57M
        NEXT;
8532
6.57M
  SKIP_BLANKS;
8533
6.57M
  val = xmlParseAttValue(ctxt);
8534
6.57M
  ctxt->instate = XML_PARSER_CONTENT;
8535
6.57M
    } else {
8536
830k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8537
830k
         "Specification mandates value for attribute %s\n", name);
8538
830k
  return(NULL);
8539
830k
    }
8540
8541
    /*
8542
     * Check that xml:lang conforms to the specification
8543
     * No more registered as an error, just generate a warning now
8544
     * since this was deprecated in XML second edition
8545
     */
8546
6.57M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8547
14.3k
  if (!xmlCheckLanguageID(val)) {
8548
7.62k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8549
7.62k
              "Malformed value for xml:lang : %s\n",
8550
7.62k
        val, NULL);
8551
7.62k
  }
8552
14.3k
    }
8553
8554
    /*
8555
     * Check that xml:space conforms to the specification
8556
     */
8557
6.57M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8558
1.55k
  if (xmlStrEqual(val, BAD_CAST "default"))
8559
0
      *(ctxt->space) = 0;
8560
1.55k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8561
791
      *(ctxt->space) = 1;
8562
768
  else {
8563
768
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8564
768
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8565
768
                                 val, NULL);
8566
768
  }
8567
1.55k
    }
8568
8569
6.57M
    *value = val;
8570
6.57M
    return(name);
8571
7.40M
}
8572
8573
/**
8574
 * xmlParseStartTag:
8575
 * @ctxt:  an XML parser context
8576
 *
8577
 * DEPRECATED: Internal function, don't use.
8578
 *
8579
 * parse a start of tag either for rule element or
8580
 * EmptyElement. In both case we don't parse the tag closing chars.
8581
 *
8582
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8583
 *
8584
 * [ WFC: Unique Att Spec ]
8585
 * No attribute name may appear more than once in the same start-tag or
8586
 * empty-element tag.
8587
 *
8588
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8589
 *
8590
 * [ WFC: Unique Att Spec ]
8591
 * No attribute name may appear more than once in the same start-tag or
8592
 * empty-element tag.
8593
 *
8594
 * With namespace:
8595
 *
8596
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8597
 *
8598
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8599
 *
8600
 * Returns the element name parsed
8601
 */
8602
8603
const xmlChar *
8604
15.1M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8605
15.1M
    const xmlChar *name;
8606
15.1M
    const xmlChar *attname;
8607
15.1M
    xmlChar *attvalue;
8608
15.1M
    const xmlChar **atts = ctxt->atts;
8609
15.1M
    int nbatts = 0;
8610
15.1M
    int maxatts = ctxt->maxatts;
8611
15.1M
    int i;
8612
8613
15.1M
    if (RAW != '<') return(NULL);
8614
15.1M
    NEXT1;
8615
8616
15.1M
    name = xmlParseName(ctxt);
8617
15.1M
    if (name == NULL) {
8618
2.72M
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8619
2.72M
       "xmlParseStartTag: invalid element name\n");
8620
2.72M
        return(NULL);
8621
2.72M
    }
8622
8623
    /*
8624
     * Now parse the attributes, it ends up with the ending
8625
     *
8626
     * (S Attribute)* S?
8627
     */
8628
12.3M
    SKIP_BLANKS;
8629
12.3M
    GROW;
8630
8631
14.4M
    while (((RAW != '>') &&
8632
14.4M
     ((RAW != '/') || (NXT(1) != '>')) &&
8633
14.4M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8634
8.90M
        int id = ctxt->input->id;
8635
8.90M
  unsigned long cons = CUR_CONSUMED;
8636
8637
8.90M
  attname = xmlParseAttribute(ctxt, &attvalue);
8638
8.90M
        if ((attname != NULL) && (attvalue != NULL)) {
8639
      /*
8640
       * [ WFC: Unique Att Spec ]
8641
       * No attribute name may appear more than once in the same
8642
       * start-tag or empty-element tag.
8643
       */
8644
7.31M
      for (i = 0; i < nbatts;i += 2) {
8645
844k
          if (xmlStrEqual(atts[i], attname)) {
8646
19.3k
        xmlErrAttributeDup(ctxt, NULL, attname);
8647
19.3k
        xmlFree(attvalue);
8648
19.3k
        goto failed;
8649
19.3k
    }
8650
844k
      }
8651
      /*
8652
       * Add the pair to atts
8653
       */
8654
6.46M
      if (atts == NULL) {
8655
1.99M
          maxatts = 22; /* allow for 10 attrs by default */
8656
1.99M
          atts = (const xmlChar **)
8657
1.99M
           xmlMalloc(maxatts * sizeof(xmlChar *));
8658
1.99M
    if (atts == NULL) {
8659
0
        xmlErrMemory(ctxt, NULL);
8660
0
        if (attvalue != NULL)
8661
0
      xmlFree(attvalue);
8662
0
        goto failed;
8663
0
    }
8664
1.99M
    ctxt->atts = atts;
8665
1.99M
    ctxt->maxatts = maxatts;
8666
4.46M
      } else if (nbatts + 4 > maxatts) {
8667
114
          const xmlChar **n;
8668
8669
114
          maxatts *= 2;
8670
114
          n = (const xmlChar **) xmlRealloc((void *) atts,
8671
114
               maxatts * sizeof(const xmlChar *));
8672
114
    if (n == NULL) {
8673
0
        xmlErrMemory(ctxt, NULL);
8674
0
        if (attvalue != NULL)
8675
0
      xmlFree(attvalue);
8676
0
        goto failed;
8677
0
    }
8678
114
    atts = n;
8679
114
    ctxt->atts = atts;
8680
114
    ctxt->maxatts = maxatts;
8681
114
      }
8682
6.46M
      atts[nbatts++] = attname;
8683
6.46M
      atts[nbatts++] = attvalue;
8684
6.46M
      atts[nbatts] = NULL;
8685
6.46M
      atts[nbatts + 1] = NULL;
8686
6.46M
  } else {
8687
2.41M
      if (attvalue != NULL)
8688
0
    xmlFree(attvalue);
8689
2.41M
  }
8690
8691
8.90M
failed:
8692
8693
8.90M
  GROW
8694
8.90M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8695
5.34M
      break;
8696
3.55M
  if (SKIP_BLANKS == 0) {
8697
2.81M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8698
2.81M
         "attributes construct error\n");
8699
2.81M
  }
8700
3.55M
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8701
3.55M
            (attname == NULL) && (attvalue == NULL)) {
8702
1.50M
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8703
1.50M
         "xmlParseStartTag: problem parsing attributes\n");
8704
1.50M
      break;
8705
1.50M
  }
8706
2.05M
  SHRINK;
8707
2.05M
        GROW;
8708
2.05M
    }
8709
8710
    /*
8711
     * SAX: Start of Element !
8712
     */
8713
12.3M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8714
12.3M
  (!ctxt->disableSAX)) {
8715
7.05M
  if (nbatts > 0)
8716
4.01M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8717
3.03M
  else
8718
3.03M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8719
7.05M
    }
8720
8721
12.3M
    if (atts != NULL) {
8722
        /* Free only the content strings */
8723
17.3M
        for (i = 1;i < nbatts;i+=2)
8724
6.46M
      if (atts[i] != NULL)
8725
6.46M
         xmlFree((xmlChar *) atts[i]);
8726
10.8M
    }
8727
12.3M
    return(name);
8728
12.3M
}
8729
8730
/**
8731
 * xmlParseEndTag1:
8732
 * @ctxt:  an XML parser context
8733
 * @line:  line of the start tag
8734
 * @nsNr:  number of namespaces on the start tag
8735
 *
8736
 * parse an end of tag
8737
 *
8738
 * [42] ETag ::= '</' Name S? '>'
8739
 *
8740
 * With namespace
8741
 *
8742
 * [NS 9] ETag ::= '</' QName S? '>'
8743
 */
8744
8745
static void
8746
3.74M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8747
3.74M
    const xmlChar *name;
8748
8749
3.74M
    GROW;
8750
3.74M
    if ((RAW != '<') || (NXT(1) != '/')) {
8751
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8752
0
           "xmlParseEndTag: '</' not found\n");
8753
0
  return;
8754
0
    }
8755
3.74M
    SKIP(2);
8756
8757
3.74M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8758
8759
    /*
8760
     * We should definitely be at the ending "S? '>'" part
8761
     */
8762
3.74M
    GROW;
8763
3.74M
    SKIP_BLANKS;
8764
3.74M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8765
993k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8766
993k
    } else
8767
2.75M
  NEXT1;
8768
8769
    /*
8770
     * [ WFC: Element Type Match ]
8771
     * The Name in an element's end-tag must match the element type in the
8772
     * start-tag.
8773
     *
8774
     */
8775
3.74M
    if (name != (xmlChar*)1) {
8776
1.63M
        if (name == NULL) name = BAD_CAST "unparsable";
8777
1.63M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8778
1.63M
         "Opening and ending tag mismatch: %s line %d and %s\n",
8779
1.63M
                    ctxt->name, line, name);
8780
1.63M
    }
8781
8782
    /*
8783
     * SAX: End of Tag
8784
     */
8785
3.74M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8786
3.74M
  (!ctxt->disableSAX))
8787
849k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8788
8789
3.74M
    namePop(ctxt);
8790
3.74M
    spacePop(ctxt);
8791
3.74M
    return;
8792
3.74M
}
8793
8794
/**
8795
 * xmlParseEndTag:
8796
 * @ctxt:  an XML parser context
8797
 *
8798
 * DEPRECATED: Internal function, don't use.
8799
 *
8800
 * parse an end of tag
8801
 *
8802
 * [42] ETag ::= '</' Name S? '>'
8803
 *
8804
 * With namespace
8805
 *
8806
 * [NS 9] ETag ::= '</' QName S? '>'
8807
 */
8808
8809
void
8810
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8811
0
    xmlParseEndTag1(ctxt, 0);
8812
0
}
8813
#endif /* LIBXML_SAX1_ENABLED */
8814
8815
/************************************************************************
8816
 *                  *
8817
 *          SAX 2 specific operations       *
8818
 *                  *
8819
 ************************************************************************/
8820
8821
/*
8822
 * xmlGetNamespace:
8823
 * @ctxt:  an XML parser context
8824
 * @prefix:  the prefix to lookup
8825
 *
8826
 * Lookup the namespace name for the @prefix (which ca be NULL)
8827
 * The prefix must come from the @ctxt->dict dictionary
8828
 *
8829
 * Returns the namespace name or NULL if not bound
8830
 */
8831
static const xmlChar *
8832
3.58M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8833
3.58M
    int i;
8834
8835
3.58M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8836
8.31M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8837
5.68M
        if (ctxt->nsTab[i] == prefix) {
8838
907k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8839
6.01k
          return(NULL);
8840
901k
      return(ctxt->nsTab[i + 1]);
8841
907k
  }
8842
2.63M
    return(NULL);
8843
3.53M
}
8844
8845
/**
8846
 * xmlParseQName:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  pointer to store the prefix part
8849
 *
8850
 * parse an XML Namespace QName
8851
 *
8852
 * [6]  QName  ::= (Prefix ':')? LocalPart
8853
 * [7]  Prefix  ::= NCName
8854
 * [8]  LocalPart  ::= NCName
8855
 *
8856
 * Returns the Name parsed or NULL
8857
 */
8858
8859
static const xmlChar *
8860
5.37M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8861
5.37M
    const xmlChar *l, *p;
8862
8863
5.37M
    GROW;
8864
8865
5.37M
    l = xmlParseNCName(ctxt);
8866
5.37M
    if (l == NULL) {
8867
311k
        if (CUR == ':') {
8868
12.0k
      l = xmlParseName(ctxt);
8869
12.0k
      if (l != NULL) {
8870
12.0k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8871
12.0k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8872
12.0k
    *prefix = NULL;
8873
12.0k
    return(l);
8874
12.0k
      }
8875
12.0k
  }
8876
299k
        return(NULL);
8877
311k
    }
8878
5.05M
    if (CUR == ':') {
8879
1.83M
        NEXT;
8880
1.83M
  p = l;
8881
1.83M
  l = xmlParseNCName(ctxt);
8882
1.83M
  if (l == NULL) {
8883
24.5k
      xmlChar *tmp;
8884
8885
24.5k
            if (ctxt->instate == XML_PARSER_EOF)
8886
0
                return(NULL);
8887
24.5k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8888
24.5k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8889
24.5k
      l = xmlParseNmtoken(ctxt);
8890
24.5k
      if (l == NULL) {
8891
13.1k
                if (ctxt->instate == XML_PARSER_EOF)
8892
0
                    return(NULL);
8893
13.1k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8894
13.1k
            } else {
8895
11.4k
    tmp = xmlBuildQName(l, p, NULL, 0);
8896
11.4k
    xmlFree((char *)l);
8897
11.4k
      }
8898
24.5k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8899
24.5k
      if (tmp != NULL) xmlFree(tmp);
8900
24.5k
      *prefix = NULL;
8901
24.5k
      return(p);
8902
24.5k
  }
8903
1.80M
  if (CUR == ':') {
8904
11.5k
      xmlChar *tmp;
8905
8906
11.5k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8907
11.5k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8908
11.5k
      NEXT;
8909
11.5k
      tmp = (xmlChar *) xmlParseName(ctxt);
8910
11.5k
      if (tmp != NULL) {
8911
10.0k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8912
10.0k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8913
10.0k
    if (tmp != NULL) xmlFree(tmp);
8914
10.0k
    *prefix = p;
8915
10.0k
    return(l);
8916
10.0k
      }
8917
1.52k
            if (ctxt->instate == XML_PARSER_EOF)
8918
0
                return(NULL);
8919
1.52k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8920
1.52k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8921
1.52k
      if (tmp != NULL) xmlFree(tmp);
8922
1.52k
      *prefix = p;
8923
1.52k
      return(l);
8924
1.52k
  }
8925
1.79M
  *prefix = p;
8926
1.79M
    } else
8927
3.22M
        *prefix = NULL;
8928
5.02M
    return(l);
8929
5.05M
}
8930
8931
/**
8932
 * xmlParseQNameAndCompare:
8933
 * @ctxt:  an XML parser context
8934
 * @name:  the localname
8935
 * @prefix:  the prefix, if any.
8936
 *
8937
 * parse an XML name and compares for match
8938
 * (specialized for endtag parsing)
8939
 *
8940
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8941
 * and the name for mismatch
8942
 */
8943
8944
static const xmlChar *
8945
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8946
347k
                        xmlChar const *prefix) {
8947
347k
    const xmlChar *cmp;
8948
347k
    const xmlChar *in;
8949
347k
    const xmlChar *ret;
8950
347k
    const xmlChar *prefix2;
8951
8952
347k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8953
8954
347k
    GROW;
8955
347k
    in = ctxt->input->cur;
8956
8957
347k
    cmp = prefix;
8958
1.17M
    while (*in != 0 && *in == *cmp) {
8959
831k
  ++in;
8960
831k
  ++cmp;
8961
831k
    }
8962
347k
    if ((*cmp == 0) && (*in == ':')) {
8963
304k
        in++;
8964
304k
  cmp = name;
8965
2.46M
  while (*in != 0 && *in == *cmp) {
8966
2.15M
      ++in;
8967
2.15M
      ++cmp;
8968
2.15M
  }
8969
304k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8970
      /* success */
8971
284k
            ctxt->input->col += in - ctxt->input->cur;
8972
284k
      ctxt->input->cur = in;
8973
284k
      return((const xmlChar*) 1);
8974
284k
  }
8975
304k
    }
8976
    /*
8977
     * all strings coms from the dictionary, equality can be done directly
8978
     */
8979
63.0k
    ret = xmlParseQName (ctxt, &prefix2);
8980
63.0k
    if ((ret == name) && (prefix == prefix2))
8981
816
  return((const xmlChar*) 1);
8982
62.2k
    return ret;
8983
63.0k
}
8984
8985
/**
8986
 * xmlParseAttValueInternal:
8987
 * @ctxt:  an XML parser context
8988
 * @len:  attribute len result
8989
 * @alloc:  whether the attribute was reallocated as a new string
8990
 * @normalize:  if 1 then further non-CDATA normalization must be done
8991
 *
8992
 * parse a value for an attribute.
8993
 * NOTE: if no normalization is needed, the routine will return pointers
8994
 *       directly from the data buffer.
8995
 *
8996
 * 3.3.3 Attribute-Value Normalization:
8997
 * Before the value of an attribute is passed to the application or
8998
 * checked for validity, the XML processor must normalize it as follows:
8999
 * - a character reference is processed by appending the referenced
9000
 *   character to the attribute value
9001
 * - an entity reference is processed by recursively processing the
9002
 *   replacement text of the entity
9003
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9004
 *   appending #x20 to the normalized value, except that only a single
9005
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9006
 *   parsed entity or the literal entity value of an internal parsed entity
9007
 * - other characters are processed by appending them to the normalized value
9008
 * If the declared value is not CDATA, then the XML processor must further
9009
 * process the normalized attribute value by discarding any leading and
9010
 * trailing space (#x20) characters, and by replacing sequences of space
9011
 * (#x20) characters by a single space (#x20) character.
9012
 * All attributes for which no declaration has been read should be treated
9013
 * by a non-validating parser as if declared CDATA.
9014
 *
9015
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9016
 *     caller if it was copied, this can be detected by val[*len] == 0.
9017
 */
9018
9019
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9020
20.1k
    const xmlChar *oldbase = ctxt->input->base;\
9021
20.1k
    GROW;\
9022
20.1k
    if (ctxt->instate == XML_PARSER_EOF)\
9023
20.1k
        return(NULL);\
9024
20.1k
    if (oldbase != ctxt->input->base) {\
9025
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9026
0
        start = start + delta;\
9027
0
        in = in + delta;\
9028
0
    }\
9029
20.1k
    end = ctxt->input->end;
9030
9031
static xmlChar *
9032
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9033
                         int normalize)
9034
8.45M
{
9035
8.45M
    xmlChar limit = 0;
9036
8.45M
    const xmlChar *in = NULL, *start, *end, *last;
9037
8.45M
    xmlChar *ret = NULL;
9038
8.45M
    int line, col;
9039
8.45M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9040
6.33M
                    XML_MAX_HUGE_LENGTH :
9041
8.45M
                    XML_MAX_TEXT_LENGTH;
9042
9043
8.45M
    GROW;
9044
8.45M
    in = (xmlChar *) CUR_PTR;
9045
8.45M
    line = ctxt->input->line;
9046
8.45M
    col = ctxt->input->col;
9047
8.45M
    if (*in != '"' && *in != '\'') {
9048
140k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9049
140k
        return (NULL);
9050
140k
    }
9051
8.31M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9052
9053
    /*
9054
     * try to handle in this routine the most common case where no
9055
     * allocation of a new string is required and where content is
9056
     * pure ASCII.
9057
     */
9058
8.31M
    limit = *in++;
9059
8.31M
    col++;
9060
8.31M
    end = ctxt->input->end;
9061
8.31M
    start = in;
9062
8.31M
    if (in >= end) {
9063
4.51k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9064
4.51k
    }
9065
8.31M
    if (normalize) {
9066
        /*
9067
   * Skip any leading spaces
9068
   */
9069
446k
  while ((in < end) && (*in != limit) &&
9070
446k
         ((*in == 0x20) || (*in == 0x9) ||
9071
444k
          (*in == 0xA) || (*in == 0xD))) {
9072
328k
      if (*in == 0xA) {
9073
24.7k
          line++; col = 1;
9074
304k
      } else {
9075
304k
          col++;
9076
304k
      }
9077
328k
      in++;
9078
328k
      start = in;
9079
328k
      if (in >= end) {
9080
162
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9081
162
                if ((in - start) > maxLength) {
9082
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9083
0
                                   "AttValue length too long\n");
9084
0
                    return(NULL);
9085
0
                }
9086
162
      }
9087
328k
  }
9088
514k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9089
514k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9090
399k
      col++;
9091
399k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9092
396k
      if (in >= end) {
9093
244
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9094
244
                if ((in - start) > maxLength) {
9095
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9096
0
                                   "AttValue length too long\n");
9097
0
                    return(NULL);
9098
0
                }
9099
244
      }
9100
396k
  }
9101
117k
  last = in;
9102
  /*
9103
   * skip the trailing blanks
9104
   */
9105
122k
  while ((last[-1] == 0x20) && (last > start)) last--;
9106
438k
  while ((in < end) && (*in != limit) &&
9107
438k
         ((*in == 0x20) || (*in == 0x9) ||
9108
353k
          (*in == 0xA) || (*in == 0xD))) {
9109
320k
      if (*in == 0xA) {
9110
15.4k
          line++, col = 1;
9111
305k
      } else {
9112
305k
          col++;
9113
305k
      }
9114
320k
      in++;
9115
320k
      if (in >= end) {
9116
228
    const xmlChar *oldbase = ctxt->input->base;
9117
228
    GROW;
9118
228
                if (ctxt->instate == XML_PARSER_EOF)
9119
0
                    return(NULL);
9120
228
    if (oldbase != ctxt->input->base) {
9121
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9122
0
        start = start + delta;
9123
0
        in = in + delta;
9124
0
        last = last + delta;
9125
0
    }
9126
228
    end = ctxt->input->end;
9127
228
                if ((in - start) > maxLength) {
9128
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9129
0
                                   "AttValue length too long\n");
9130
0
                    return(NULL);
9131
0
                }
9132
228
      }
9133
320k
  }
9134
117k
        if ((in - start) > maxLength) {
9135
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9136
0
                           "AttValue length too long\n");
9137
0
            return(NULL);
9138
0
        }
9139
117k
  if (*in != limit) goto need_complex;
9140
8.19M
    } else {
9141
145M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9142
145M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9143
137M
      in++;
9144
137M
      col++;
9145
137M
      if (in >= end) {
9146
15.2k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9147
15.2k
                if ((in - start) > maxLength) {
9148
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9149
0
                                   "AttValue length too long\n");
9150
0
                    return(NULL);
9151
0
                }
9152
15.2k
      }
9153
137M
  }
9154
8.19M
  last = in;
9155
8.19M
        if ((in - start) > maxLength) {
9156
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9157
0
                           "AttValue length too long\n");
9158
0
            return(NULL);
9159
0
        }
9160
8.19M
  if (*in != limit) goto need_complex;
9161
8.19M
    }
9162
7.04M
    in++;
9163
7.04M
    col++;
9164
7.04M
    if (len != NULL) {
9165
1.32M
        if (alloc) *alloc = 0;
9166
1.32M
        *len = last - start;
9167
1.32M
        ret = (xmlChar *) start;
9168
5.71M
    } else {
9169
5.71M
        if (alloc) *alloc = 1;
9170
5.71M
        ret = xmlStrndup(start, last - start);
9171
5.71M
    }
9172
7.04M
    CUR_PTR = in;
9173
7.04M
    ctxt->input->line = line;
9174
7.04M
    ctxt->input->col = col;
9175
7.04M
    return ret;
9176
1.27M
need_complex:
9177
1.27M
    if (alloc) *alloc = 1;
9178
1.27M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9179
8.31M
}
9180
9181
/**
9182
 * xmlParseAttribute2:
9183
 * @ctxt:  an XML parser context
9184
 * @pref:  the element prefix
9185
 * @elem:  the element name
9186
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9187
 * @value:  a xmlChar ** used to store the value of the attribute
9188
 * @len:  an int * to save the length of the attribute
9189
 * @alloc:  an int * to indicate if the attribute was allocated
9190
 *
9191
 * parse an attribute in the new SAX2 framework.
9192
 *
9193
 * Returns the attribute name, and the value in *value, .
9194
 */
9195
9196
static const xmlChar *
9197
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9198
                   const xmlChar * pref, const xmlChar * elem,
9199
                   const xmlChar ** prefix, xmlChar ** value,
9200
                   int *len, int *alloc)
9201
1.92M
{
9202
1.92M
    const xmlChar *name;
9203
1.92M
    xmlChar *val, *internal_val = NULL;
9204
1.92M
    int normalize = 0;
9205
9206
1.92M
    *value = NULL;
9207
1.92M
    GROW;
9208
1.92M
    name = xmlParseQName(ctxt, prefix);
9209
1.92M
    if (name == NULL) {
9210
133k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9211
133k
                       "error parsing attribute name\n");
9212
133k
        return (NULL);
9213
133k
    }
9214
9215
    /*
9216
     * get the type if needed
9217
     */
9218
1.79M
    if (ctxt->attsSpecial != NULL) {
9219
299k
        int type;
9220
9221
299k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9222
299k
                                                 pref, elem, *prefix, name);
9223
299k
        if (type != 0)
9224
118k
            normalize = 1;
9225
299k
    }
9226
9227
    /*
9228
     * read the value
9229
     */
9230
1.79M
    SKIP_BLANKS;
9231
1.79M
    if (RAW == '=') {
9232
1.72M
        NEXT;
9233
1.72M
        SKIP_BLANKS;
9234
1.72M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9235
1.72M
  if (normalize) {
9236
      /*
9237
       * Sometimes a second normalisation pass for spaces is needed
9238
       * but that only happens if charrefs or entities references
9239
       * have been used in the attribute value, i.e. the attribute
9240
       * value have been extracted in an allocated string already.
9241
       */
9242
118k
      if (*alloc) {
9243
33.4k
          const xmlChar *val2;
9244
9245
33.4k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9246
33.4k
    if ((val2 != NULL) && (val2 != val)) {
9247
11.2k
        xmlFree(val);
9248
11.2k
        val = (xmlChar *) val2;
9249
11.2k
    }
9250
33.4k
      }
9251
118k
  }
9252
1.72M
        ctxt->instate = XML_PARSER_CONTENT;
9253
1.72M
    } else {
9254
71.6k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9255
71.6k
                          "Specification mandates value for attribute %s\n",
9256
71.6k
                          name);
9257
71.6k
        return (NULL);
9258
71.6k
    }
9259
9260
1.72M
    if (*prefix == ctxt->str_xml) {
9261
        /*
9262
         * Check that xml:lang conforms to the specification
9263
         * No more registered as an error, just generate a warning now
9264
         * since this was deprecated in XML second edition
9265
         */
9266
45.4k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9267
9.59k
            internal_val = xmlStrndup(val, *len);
9268
9.59k
            if (!xmlCheckLanguageID(internal_val)) {
9269
5.28k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9270
5.28k
                              "Malformed value for xml:lang : %s\n",
9271
5.28k
                              internal_val, NULL);
9272
5.28k
            }
9273
9.59k
        }
9274
9275
        /*
9276
         * Check that xml:space conforms to the specification
9277
         */
9278
45.4k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9279
1.55k
            internal_val = xmlStrndup(val, *len);
9280
1.55k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9281
0
                *(ctxt->space) = 0;
9282
1.55k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9283
520
                *(ctxt->space) = 1;
9284
1.03k
            else {
9285
1.03k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9286
1.03k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9287
1.03k
                              internal_val, NULL);
9288
1.03k
            }
9289
1.55k
        }
9290
45.4k
        if (internal_val) {
9291
10.5k
            xmlFree(internal_val);
9292
10.5k
        }
9293
45.4k
    }
9294
9295
1.72M
    *value = val;
9296
1.72M
    return (name);
9297
1.79M
}
9298
/**
9299
 * xmlParseStartTag2:
9300
 * @ctxt:  an XML parser context
9301
 *
9302
 * parse a start of tag either for rule element or
9303
 * EmptyElement. In both case we don't parse the tag closing chars.
9304
 * This routine is called when running SAX2 parsing
9305
 *
9306
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9307
 *
9308
 * [ WFC: Unique Att Spec ]
9309
 * No attribute name may appear more than once in the same start-tag or
9310
 * empty-element tag.
9311
 *
9312
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9313
 *
9314
 * [ WFC: Unique Att Spec ]
9315
 * No attribute name may appear more than once in the same start-tag or
9316
 * empty-element tag.
9317
 *
9318
 * With namespace:
9319
 *
9320
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9321
 *
9322
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9323
 *
9324
 * Returns the element name parsed
9325
 */
9326
9327
static const xmlChar *
9328
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9329
3.38M
                  const xmlChar **URI, int *tlen) {
9330
3.38M
    const xmlChar *localname;
9331
3.38M
    const xmlChar *prefix;
9332
3.38M
    const xmlChar *attname;
9333
3.38M
    const xmlChar *aprefix;
9334
3.38M
    const xmlChar *nsname;
9335
3.38M
    xmlChar *attvalue;
9336
3.38M
    const xmlChar **atts = ctxt->atts;
9337
3.38M
    int maxatts = ctxt->maxatts;
9338
3.38M
    int nratts, nbatts, nbdef, inputid;
9339
3.38M
    int i, j, nbNs, attval;
9340
3.38M
    unsigned long cur;
9341
3.38M
    int nsNr = ctxt->nsNr;
9342
9343
3.38M
    if (RAW != '<') return(NULL);
9344
3.38M
    NEXT1;
9345
9346
    /*
9347
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9348
     *       point since the attribute values may be stored as pointers to
9349
     *       the buffer and calling SHRINK would destroy them !
9350
     *       The Shrinking is only possible once the full set of attribute
9351
     *       callbacks have been done.
9352
     */
9353
3.38M
    SHRINK;
9354
3.38M
    cur = ctxt->input->cur - ctxt->input->base;
9355
3.38M
    inputid = ctxt->input->id;
9356
3.38M
    nbatts = 0;
9357
3.38M
    nratts = 0;
9358
3.38M
    nbdef = 0;
9359
3.38M
    nbNs = 0;
9360
3.38M
    attval = 0;
9361
    /* Forget any namespaces added during an earlier parse of this element. */
9362
3.38M
    ctxt->nsNr = nsNr;
9363
9364
3.38M
    localname = xmlParseQName(ctxt, &prefix);
9365
3.38M
    if (localname == NULL) {
9366
163k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9367
163k
           "StartTag: invalid element name\n");
9368
163k
        return(NULL);
9369
163k
    }
9370
3.21M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9371
9372
    /*
9373
     * Now parse the attributes, it ends up with the ending
9374
     *
9375
     * (S Attribute)* S?
9376
     */
9377
3.21M
    SKIP_BLANKS;
9378
3.21M
    GROW;
9379
9380
3.50M
    while (((RAW != '>') &&
9381
3.50M
     ((RAW != '/') || (NXT(1) != '>')) &&
9382
3.50M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9383
1.92M
  int id = ctxt->input->id;
9384
1.92M
  unsigned long cons = CUR_CONSUMED;
9385
1.92M
  int len = -1, alloc = 0;
9386
9387
1.92M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9388
1.92M
                               &aprefix, &attvalue, &len, &alloc);
9389
1.92M
        if ((attname == NULL) || (attvalue == NULL))
9390
216k
            goto next_attr;
9391
1.70M
  if (len < 0) len = xmlStrlen(attvalue);
9392
9393
1.70M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9394
37.5k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9395
37.5k
            xmlURIPtr uri;
9396
9397
37.5k
            if (URL == NULL) {
9398
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9399
0
                if ((attvalue != NULL) && (alloc != 0))
9400
0
                    xmlFree(attvalue);
9401
0
                localname = NULL;
9402
0
                goto done;
9403
0
            }
9404
37.5k
            if (*URL != 0) {
9405
34.9k
                uri = xmlParseURI((const char *) URL);
9406
34.9k
                if (uri == NULL) {
9407
14.8k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9408
14.8k
                             "xmlns: '%s' is not a valid URI\n",
9409
14.8k
                                       URL, NULL, NULL);
9410
20.0k
                } else {
9411
20.0k
                    if (uri->scheme == NULL) {
9412
4.55k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9413
4.55k
                                  "xmlns: URI %s is not absolute\n",
9414
4.55k
                                  URL, NULL, NULL);
9415
4.55k
                    }
9416
20.0k
                    xmlFreeURI(uri);
9417
20.0k
                }
9418
34.9k
                if (URL == ctxt->str_xml_ns) {
9419
0
                    if (attname != ctxt->str_xml) {
9420
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9421
0
                     "xml namespace URI cannot be the default namespace\n",
9422
0
                                 NULL, NULL, NULL);
9423
0
                    }
9424
0
                    goto next_attr;
9425
0
                }
9426
34.9k
                if ((len == 29) &&
9427
34.9k
                    (xmlStrEqual(URL,
9428
728
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9429
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9430
0
                         "reuse of the xmlns namespace name is forbidden\n",
9431
0
                             NULL, NULL, NULL);
9432
0
                    goto next_attr;
9433
0
                }
9434
34.9k
            }
9435
            /*
9436
             * check that it's not a defined namespace
9437
             */
9438
48.4k
            for (j = 1;j <= nbNs;j++)
9439
14.6k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9440
3.79k
                    break;
9441
37.5k
            if (j <= nbNs)
9442
3.79k
                xmlErrAttributeDup(ctxt, NULL, attname);
9443
33.7k
            else
9444
33.7k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9445
9446
1.67M
        } else if (aprefix == ctxt->str_xmlns) {
9447
263k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9448
263k
            xmlURIPtr uri;
9449
9450
263k
            if (attname == ctxt->str_xml) {
9451
314
                if (URL != ctxt->str_xml_ns) {
9452
314
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9453
314
                             "xml namespace prefix mapped to wrong URI\n",
9454
314
                             NULL, NULL, NULL);
9455
314
                }
9456
                /*
9457
                 * Do not keep a namespace definition node
9458
                 */
9459
314
                goto next_attr;
9460
314
            }
9461
262k
            if (URL == ctxt->str_xml_ns) {
9462
0
                if (attname != ctxt->str_xml) {
9463
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9464
0
                             "xml namespace URI mapped to wrong prefix\n",
9465
0
                             NULL, NULL, NULL);
9466
0
                }
9467
0
                goto next_attr;
9468
0
            }
9469
262k
            if (attname == ctxt->str_xmlns) {
9470
269
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9471
269
                         "redefinition of the xmlns prefix is forbidden\n",
9472
269
                         NULL, NULL, NULL);
9473
269
                goto next_attr;
9474
269
            }
9475
262k
            if ((len == 29) &&
9476
262k
                (xmlStrEqual(URL,
9477
1.67k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9478
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9479
0
                         "reuse of the xmlns namespace name is forbidden\n",
9480
0
                         NULL, NULL, NULL);
9481
0
                goto next_attr;
9482
0
            }
9483
262k
            if ((URL == NULL) || (URL[0] == 0)) {
9484
548
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9485
548
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9486
548
                              attname, NULL, NULL);
9487
548
                goto next_attr;
9488
262k
            } else {
9489
262k
                uri = xmlParseURI((const char *) URL);
9490
262k
                if (uri == NULL) {
9491
24.8k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9492
24.8k
                         "xmlns:%s: '%s' is not a valid URI\n",
9493
24.8k
                                       attname, URL, NULL);
9494
237k
                } else {
9495
237k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9496
1.70k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9497
1.70k
                                  "xmlns:%s: URI %s is not absolute\n",
9498
1.70k
                                  attname, URL, NULL);
9499
1.70k
                    }
9500
237k
                    xmlFreeURI(uri);
9501
237k
                }
9502
262k
            }
9503
9504
            /*
9505
             * check that it's not a defined namespace
9506
             */
9507
299k
            for (j = 1;j <= nbNs;j++)
9508
41.3k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9509
3.81k
                    break;
9510
262k
            if (j <= nbNs)
9511
3.81k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9512
258k
            else
9513
258k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9514
9515
1.40M
        } else {
9516
            /*
9517
             * Add the pair to atts
9518
             */
9519
1.40M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9520
319k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9521
0
                    goto next_attr;
9522
0
                }
9523
319k
                maxatts = ctxt->maxatts;
9524
319k
                atts = ctxt->atts;
9525
319k
            }
9526
1.40M
            ctxt->attallocs[nratts++] = alloc;
9527
1.40M
            atts[nbatts++] = attname;
9528
1.40M
            atts[nbatts++] = aprefix;
9529
            /*
9530
             * The namespace URI field is used temporarily to point at the
9531
             * base of the current input buffer for non-alloced attributes.
9532
             * When the input buffer is reallocated, all the pointers become
9533
             * invalid, but they can be reconstructed later.
9534
             */
9535
1.40M
            if (alloc)
9536
348k
                atts[nbatts++] = NULL;
9537
1.06M
            else
9538
1.06M
                atts[nbatts++] = ctxt->input->base;
9539
1.40M
            atts[nbatts++] = attvalue;
9540
1.40M
            attvalue += len;
9541
1.40M
            atts[nbatts++] = attvalue;
9542
            /*
9543
             * tag if some deallocation is needed
9544
             */
9545
1.40M
            if (alloc != 0) attval = 1;
9546
1.40M
            attvalue = NULL; /* moved into atts */
9547
1.40M
        }
9548
9549
1.92M
next_attr:
9550
1.92M
        if ((attvalue != NULL) && (alloc != 0)) {
9551
34.5k
            xmlFree(attvalue);
9552
34.5k
            attvalue = NULL;
9553
34.5k
        }
9554
9555
1.92M
  GROW
9556
1.92M
        if (ctxt->instate == XML_PARSER_EOF)
9557
0
            break;
9558
1.92M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9559
1.30M
      break;
9560
621k
  if (SKIP_BLANKS == 0) {
9561
334k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9562
334k
         "attributes construct error\n");
9563
334k
      break;
9564
334k
  }
9565
287k
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9566
287k
            (attname == NULL) && (attvalue == NULL)) {
9567
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9568
0
           "xmlParseStartTag: problem parsing attributes\n");
9569
0
      break;
9570
0
  }
9571
287k
        GROW;
9572
287k
    }
9573
9574
3.21M
    if (ctxt->input->id != inputid) {
9575
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9576
0
                    "Unexpected change of input\n");
9577
0
        localname = NULL;
9578
0
        goto done;
9579
0
    }
9580
9581
    /* Reconstruct attribute value pointers. */
9582
4.62M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9583
1.40M
        if (atts[i+2] != NULL) {
9584
            /*
9585
             * Arithmetic on dangling pointers is technically undefined
9586
             * behavior, but well...
9587
             */
9588
1.06M
            ptrdiff_t offset = ctxt->input->base - atts[i+2];
9589
1.06M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9590
1.06M
            atts[i+3] += offset;  /* value */
9591
1.06M
            atts[i+4] += offset;  /* valuend */
9592
1.06M
        }
9593
1.40M
    }
9594
9595
    /*
9596
     * The attributes defaulting
9597
     */
9598
3.21M
    if (ctxt->attsDefault != NULL) {
9599
140k
        xmlDefAttrsPtr defaults;
9600
9601
140k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9602
140k
  if (defaults != NULL) {
9603
96.6k
      for (i = 0;i < defaults->nbAttrs;i++) {
9604
66.4k
          attname = defaults->values[5 * i];
9605
66.4k
    aprefix = defaults->values[5 * i + 1];
9606
9607
                /*
9608
     * special work for namespaces defaulted defs
9609
     */
9610
66.4k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9611
        /*
9612
         * check that it's not a defined namespace
9613
         */
9614
5.68k
        for (j = 1;j <= nbNs;j++)
9615
1.88k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9616
1.22k
          break;
9617
5.02k
              if (j <= nbNs) continue;
9618
9619
3.80k
        nsname = xmlGetNamespace(ctxt, NULL);
9620
3.80k
        if (nsname != defaults->values[5 * i + 2]) {
9621
1.60k
      if (nsPush(ctxt, NULL,
9622
1.60k
                 defaults->values[5 * i + 2]) > 0)
9623
1.50k
          nbNs++;
9624
1.60k
        }
9625
61.4k
    } else if (aprefix == ctxt->str_xmlns) {
9626
        /*
9627
         * check that it's not a defined namespace
9628
         */
9629
12.5k
        for (j = 1;j <= nbNs;j++)
9630
4.69k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9631
3.85k
          break;
9632
11.7k
              if (j <= nbNs) continue;
9633
9634
7.90k
        nsname = xmlGetNamespace(ctxt, attname);
9635
7.90k
        if (nsname != defaults->values[2]) {
9636
5.24k
      if (nsPush(ctxt, attname,
9637
5.24k
                 defaults->values[5 * i + 2]) > 0)
9638
4.00k
          nbNs++;
9639
5.24k
        }
9640
49.6k
    } else {
9641
        /*
9642
         * check that it's not a defined attribute
9643
         */
9644
148k
        for (j = 0;j < nbatts;j+=5) {
9645
99.4k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9646
489
          break;
9647
99.4k
        }
9648
49.6k
        if (j < nbatts) continue;
9649
9650
49.1k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9651
4.60k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9652
0
                            localname = NULL;
9653
0
                            goto done;
9654
0
      }
9655
4.60k
      maxatts = ctxt->maxatts;
9656
4.60k
      atts = ctxt->atts;
9657
4.60k
        }
9658
49.1k
        atts[nbatts++] = attname;
9659
49.1k
        atts[nbatts++] = aprefix;
9660
49.1k
        if (aprefix == NULL)
9661
37.5k
      atts[nbatts++] = NULL;
9662
11.6k
        else
9663
11.6k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9664
49.1k
        atts[nbatts++] = defaults->values[5 * i + 2];
9665
49.1k
        atts[nbatts++] = defaults->values[5 * i + 3];
9666
49.1k
        if ((ctxt->standalone == 1) &&
9667
49.1k
            (defaults->values[5 * i + 4] != NULL)) {
9668
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9669
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9670
0
                                   attname, localname);
9671
0
        }
9672
49.1k
        nbdef++;
9673
49.1k
    }
9674
66.4k
      }
9675
30.2k
  }
9676
140k
    }
9677
9678
    /*
9679
     * The attributes checkings
9680
     */
9681
4.67M
    for (i = 0; i < nbatts;i += 5) {
9682
        /*
9683
  * The default namespace does not apply to attribute names.
9684
  */
9685
1.45M
  if (atts[i + 1] != NULL) {
9686
345k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9687
345k
      if (nsname == NULL) {
9688
173k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9689
173k
        "Namespace prefix %s for %s on %s is not defined\n",
9690
173k
        atts[i + 1], atts[i], localname);
9691
173k
      }
9692
345k
      atts[i + 2] = nsname;
9693
345k
  } else
9694
1.11M
      nsname = NULL;
9695
  /*
9696
   * [ WFC: Unique Att Spec ]
9697
   * No attribute name may appear more than once in the same
9698
   * start-tag or empty-element tag.
9699
   * As extended by the Namespace in XML REC.
9700
   */
9701
1.79M
        for (j = 0; j < i;j += 5) {
9702
345k
      if (atts[i] == atts[j]) {
9703
21.6k
          if (atts[i+1] == atts[j+1]) {
9704
5.71k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9705
5.71k
        break;
9706
5.71k
    }
9707
15.8k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9708
286
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9709
286
           "Namespaced Attribute %s in '%s' redefined\n",
9710
286
           atts[i], nsname, NULL);
9711
286
        break;
9712
286
    }
9713
15.8k
      }
9714
345k
  }
9715
1.45M
    }
9716
9717
3.21M
    nsname = xmlGetNamespace(ctxt, prefix);
9718
3.21M
    if ((prefix != NULL) && (nsname == NULL)) {
9719
473k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9720
473k
           "Namespace prefix %s on %s is not defined\n",
9721
473k
     prefix, localname, NULL);
9722
473k
    }
9723
3.21M
    *pref = prefix;
9724
3.21M
    *URI = nsname;
9725
9726
    /*
9727
     * SAX: Start of Element !
9728
     */
9729
3.21M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9730
3.21M
  (!ctxt->disableSAX)) {
9731
3.03M
  if (nbNs > 0)
9732
236k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9733
236k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9734
236k
        nbatts / 5, nbdef, atts);
9735
2.79M
  else
9736
2.79M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9737
2.79M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9738
3.03M
    }
9739
9740
3.21M
done:
9741
    /*
9742
     * Free up attribute allocated strings if needed
9743
     */
9744
3.21M
    if (attval != 0) {
9745
704k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9746
369k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9747
348k
          xmlFree((xmlChar *) atts[i]);
9748
334k
    }
9749
9750
3.21M
    return(localname);
9751
3.21M
}
9752
9753
/**
9754
 * xmlParseEndTag2:
9755
 * @ctxt:  an XML parser context
9756
 * @line:  line of the start tag
9757
 * @nsNr:  number of namespaces on the start tag
9758
 *
9759
 * parse an end of tag
9760
 *
9761
 * [42] ETag ::= '</' Name S? '>'
9762
 *
9763
 * With namespace
9764
 *
9765
 * [NS 9] ETag ::= '</' QName S? '>'
9766
 */
9767
9768
static void
9769
858k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9770
858k
    const xmlChar *name;
9771
9772
858k
    GROW;
9773
858k
    if ((RAW != '<') || (NXT(1) != '/')) {
9774
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9775
0
  return;
9776
0
    }
9777
858k
    SKIP(2);
9778
9779
858k
    if (tag->prefix == NULL)
9780
510k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9781
347k
    else
9782
347k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9783
9784
    /*
9785
     * We should definitely be at the ending "S? '>'" part
9786
     */
9787
858k
    GROW;
9788
858k
    if (ctxt->instate == XML_PARSER_EOF)
9789
0
        return;
9790
858k
    SKIP_BLANKS;
9791
858k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9792
64.2k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9793
64.2k
    } else
9794
793k
  NEXT1;
9795
9796
    /*
9797
     * [ WFC: Element Type Match ]
9798
     * The Name in an element's end-tag must match the element type in the
9799
     * start-tag.
9800
     *
9801
     */
9802
858k
    if (name != (xmlChar*)1) {
9803
136k
        if (name == NULL) name = BAD_CAST "unparsable";
9804
136k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9805
136k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9806
136k
                    ctxt->name, tag->line, name);
9807
136k
    }
9808
9809
    /*
9810
     * SAX: End of Tag
9811
     */
9812
858k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9813
858k
  (!ctxt->disableSAX))
9814
760k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9815
760k
                                tag->URI);
9816
9817
858k
    spacePop(ctxt);
9818
858k
    if (tag->nsNr != 0)
9819
51.2k
  nsPop(ctxt, tag->nsNr);
9820
858k
}
9821
9822
/**
9823
 * xmlParseCDSect:
9824
 * @ctxt:  an XML parser context
9825
 *
9826
 * DEPRECATED: Internal function, don't use.
9827
 *
9828
 * Parse escaped pure raw content.
9829
 *
9830
 * [18] CDSect ::= CDStart CData CDEnd
9831
 *
9832
 * [19] CDStart ::= '<![CDATA['
9833
 *
9834
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9835
 *
9836
 * [21] CDEnd ::= ']]>'
9837
 */
9838
void
9839
89.9k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9840
89.9k
    xmlChar *buf = NULL;
9841
89.9k
    int len = 0;
9842
89.9k
    int size = XML_PARSER_BUFFER_SIZE;
9843
89.9k
    int r, rl;
9844
89.9k
    int s, sl;
9845
89.9k
    int cur, l;
9846
89.9k
    int count = 0;
9847
89.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9848
66.6k
                    XML_MAX_HUGE_LENGTH :
9849
89.9k
                    XML_MAX_TEXT_LENGTH;
9850
9851
    /* Check 2.6.0 was NXT(0) not RAW */
9852
89.9k
    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9853
89.9k
  SKIP(9);
9854
89.9k
    } else
9855
0
        return;
9856
9857
89.9k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9858
89.9k
    r = CUR_CHAR(rl);
9859
89.9k
    if (!IS_CHAR(r)) {
9860
20.4k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9861
20.4k
  ctxt->instate = XML_PARSER_CONTENT;
9862
20.4k
        return;
9863
20.4k
    }
9864
69.5k
    NEXTL(rl);
9865
69.5k
    s = CUR_CHAR(sl);
9866
69.5k
    if (!IS_CHAR(s)) {
9867
2.94k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9868
2.94k
  ctxt->instate = XML_PARSER_CONTENT;
9869
2.94k
        return;
9870
2.94k
    }
9871
66.5k
    NEXTL(sl);
9872
66.5k
    cur = CUR_CHAR(l);
9873
66.5k
    buf = (xmlChar *) xmlMallocAtomic(size);
9874
66.5k
    if (buf == NULL) {
9875
0
  xmlErrMemory(ctxt, NULL);
9876
0
  return;
9877
0
    }
9878
12.0M
    while (IS_CHAR(cur) &&
9879
12.0M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9880
12.0M
  if (len + 5 >= size) {
9881
50.2k
      xmlChar *tmp;
9882
9883
50.2k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9884
50.2k
      if (tmp == NULL) {
9885
0
          xmlFree(buf);
9886
0
    xmlErrMemory(ctxt, NULL);
9887
0
    return;
9888
0
      }
9889
50.2k
      buf = tmp;
9890
50.2k
      size *= 2;
9891
50.2k
  }
9892
12.0M
  COPY_BUF(rl,buf,len,r);
9893
12.0M
  r = s;
9894
12.0M
  rl = sl;
9895
12.0M
  s = cur;
9896
12.0M
  sl = l;
9897
12.0M
  count++;
9898
12.0M
  if (count > 50) {
9899
215k
      SHRINK;
9900
215k
      GROW;
9901
215k
            if (ctxt->instate == XML_PARSER_EOF) {
9902
0
    xmlFree(buf);
9903
0
    return;
9904
0
            }
9905
215k
      count = 0;
9906
215k
  }
9907
12.0M
  NEXTL(l);
9908
12.0M
  cur = CUR_CHAR(l);
9909
12.0M
        if (len > maxLength) {
9910
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9911
0
                           "CData section too big found\n");
9912
0
            xmlFree(buf);
9913
0
            return;
9914
0
        }
9915
12.0M
    }
9916
66.5k
    buf[len] = 0;
9917
66.5k
    ctxt->instate = XML_PARSER_CONTENT;
9918
66.5k
    if (cur != '>') {
9919
30.3k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9920
30.3k
                       "CData section not finished\n%.50s\n", buf);
9921
30.3k
  xmlFree(buf);
9922
30.3k
        return;
9923
30.3k
    }
9924
36.2k
    NEXTL(l);
9925
9926
    /*
9927
     * OK the buffer is to be consumed as cdata.
9928
     */
9929
36.2k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9930
17.0k
  if (ctxt->sax->cdataBlock != NULL)
9931
11.1k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9932
5.91k
  else if (ctxt->sax->characters != NULL)
9933
5.91k
      ctxt->sax->characters(ctxt->userData, buf, len);
9934
17.0k
    }
9935
36.2k
    xmlFree(buf);
9936
36.2k
}
9937
9938
/**
9939
 * xmlParseContentInternal:
9940
 * @ctxt:  an XML parser context
9941
 *
9942
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9943
 * unexpected EOF to the caller.
9944
 */
9945
9946
static void
9947
2.62M
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9948
2.62M
    int nameNr = ctxt->nameNr;
9949
9950
2.62M
    GROW;
9951
54.4M
    while ((RAW != 0) &&
9952
54.4M
     (ctxt->instate != XML_PARSER_EOF)) {
9953
52.0M
        int id = ctxt->input->id;
9954
52.0M
  unsigned long cons = CUR_CONSUMED;
9955
52.0M
  const xmlChar *cur = ctxt->input->cur;
9956
9957
  /*
9958
   * First case : a Processing Instruction.
9959
   */
9960
52.0M
  if ((*cur == '<') && (cur[1] == '?')) {
9961
253k
      xmlParsePI(ctxt);
9962
253k
  }
9963
9964
  /*
9965
   * Second case : a CDSection
9966
   */
9967
  /* 2.6.0 test was *cur not RAW */
9968
51.8M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9969
89.9k
      xmlParseCDSect(ctxt);
9970
89.9k
  }
9971
9972
  /*
9973
   * Third case :  a comment
9974
   */
9975
51.7M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9976
51.7M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9977
885k
      xmlParseComment(ctxt);
9978
885k
      ctxt->instate = XML_PARSER_CONTENT;
9979
885k
  }
9980
9981
  /*
9982
   * Fourth case :  a sub-element.
9983
   */
9984
50.8M
  else if (*cur == '<') {
9985
19.7M
            if (NXT(1) == '/') {
9986
4.00M
                if (ctxt->nameNr <= nameNr)
9987
172k
                    break;
9988
3.82M
          xmlParseElementEnd(ctxt);
9989
15.7M
            } else {
9990
15.7M
          xmlParseElementStart(ctxt);
9991
15.7M
            }
9992
19.7M
  }
9993
9994
  /*
9995
   * Fifth case : a reference. If if has not been resolved,
9996
   *    parsing returns it's Name, create the node
9997
   */
9998
9999
31.0M
  else if (*cur == '&') {
10000
5.63M
      xmlParseReference(ctxt);
10001
5.63M
  }
10002
10003
  /*
10004
   * Last case, text. Note that References are handled directly.
10005
   */
10006
25.4M
  else {
10007
25.4M
      xmlParseCharData(ctxt, 0);
10008
25.4M
  }
10009
10010
51.9M
  GROW;
10011
51.9M
  SHRINK;
10012
10013
51.9M
  if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
10014
87.8k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10015
87.8k
                  "detected an error in element content\n");
10016
87.8k
      xmlHaltParser(ctxt);
10017
87.8k
            break;
10018
87.8k
  }
10019
51.9M
    }
10020
2.62M
}
10021
10022
/**
10023
 * xmlParseContent:
10024
 * @ctxt:  an XML parser context
10025
 *
10026
 * Parse a content sequence. Stops at EOF or '</'.
10027
 *
10028
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10029
 */
10030
10031
void
10032
2.41M
xmlParseContent(xmlParserCtxtPtr ctxt) {
10033
2.41M
    int nameNr = ctxt->nameNr;
10034
10035
2.41M
    xmlParseContentInternal(ctxt);
10036
10037
2.41M
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10038
1.11M
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10039
1.11M
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10040
1.11M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10041
1.11M
                "Premature end of data in tag %s line %d\n",
10042
1.11M
    name, line, NULL);
10043
1.11M
    }
10044
2.41M
}
10045
10046
/**
10047
 * xmlParseElement:
10048
 * @ctxt:  an XML parser context
10049
 *
10050
 * DEPRECATED: Internal function, don't use.
10051
 *
10052
 * parse an XML element
10053
 *
10054
 * [39] element ::= EmptyElemTag | STag content ETag
10055
 *
10056
 * [ WFC: Element Type Match ]
10057
 * The Name in an element's end-tag must match the element type in the
10058
 * start-tag.
10059
 *
10060
 */
10061
10062
void
10063
271k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10064
271k
    if (xmlParseElementStart(ctxt) != 0)
10065
57.5k
        return;
10066
10067
213k
    xmlParseContentInternal(ctxt);
10068
213k
    if (ctxt->instate == XML_PARSER_EOF)
10069
3.26k
  return;
10070
10071
210k
    if (CUR == 0) {
10072
132k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10073
132k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10074
132k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10075
132k
                "Premature end of data in tag %s line %d\n",
10076
132k
    name, line, NULL);
10077
132k
        return;
10078
132k
    }
10079
10080
78.3k
    xmlParseElementEnd(ctxt);
10081
78.3k
}
10082
10083
/**
10084
 * xmlParseElementStart:
10085
 * @ctxt:  an XML parser context
10086
 *
10087
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10088
 * opening tag was parsed, 1 if an empty element was parsed.
10089
 */
10090
static int
10091
16.0M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10092
16.0M
    const xmlChar *name;
10093
16.0M
    const xmlChar *prefix = NULL;
10094
16.0M
    const xmlChar *URI = NULL;
10095
16.0M
    xmlParserNodeInfo node_info;
10096
16.0M
    int line, tlen = 0;
10097
16.0M
    xmlNodePtr ret;
10098
16.0M
    int nsNr = ctxt->nsNr;
10099
10100
16.0M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10101
16.0M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10102
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10103
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10104
0
        xmlParserMaxDepth);
10105
0
  xmlHaltParser(ctxt);
10106
0
  return(-1);
10107
0
    }
10108
10109
    /* Capture start position */
10110
16.0M
    if (ctxt->record_info) {
10111
0
        node_info.begin_pos = ctxt->input->consumed +
10112
0
                          (CUR_PTR - ctxt->input->base);
10113
0
  node_info.begin_line = ctxt->input->line;
10114
0
    }
10115
10116
16.0M
    if (ctxt->spaceNr == 0)
10117
0
  spacePush(ctxt, -1);
10118
16.0M
    else if (*ctxt->space == -2)
10119
4.25M
  spacePush(ctxt, -1);
10120
11.8M
    else
10121
11.8M
  spacePush(ctxt, *ctxt->space);
10122
10123
16.0M
    line = ctxt->input->line;
10124
16.0M
#ifdef LIBXML_SAX1_ENABLED
10125
16.0M
    if (ctxt->sax2)
10126
2.07M
#endif /* LIBXML_SAX1_ENABLED */
10127
2.07M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10128
13.9M
#ifdef LIBXML_SAX1_ENABLED
10129
13.9M
    else
10130
13.9M
  name = xmlParseStartTag(ctxt);
10131
16.0M
#endif /* LIBXML_SAX1_ENABLED */
10132
16.0M
    if (ctxt->instate == XML_PARSER_EOF)
10133
1.69k
  return(-1);
10134
16.0M
    if (name == NULL) {
10135
2.85M
  spacePop(ctxt);
10136
2.85M
        return(-1);
10137
2.85M
    }
10138
13.2M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10139
13.2M
    ret = ctxt->node;
10140
10141
13.2M
#ifdef LIBXML_VALID_ENABLED
10142
    /*
10143
     * [ VC: Root Element Type ]
10144
     * The Name in the document type declaration must match the element
10145
     * type of the root element.
10146
     */
10147
13.2M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10148
13.2M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10149
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10150
13.2M
#endif /* LIBXML_VALID_ENABLED */
10151
10152
    /*
10153
     * Check for an Empty Element.
10154
     */
10155
13.2M
    if ((RAW == '/') && (NXT(1) == '>')) {
10156
3.83M
        SKIP(2);
10157
3.83M
  if (ctxt->sax2) {
10158
589k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10159
589k
    (!ctxt->disableSAX))
10160
555k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10161
589k
#ifdef LIBXML_SAX1_ENABLED
10162
3.24M
  } else {
10163
3.24M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10164
3.24M
    (!ctxt->disableSAX))
10165
2.29M
    ctxt->sax->endElement(ctxt->userData, name);
10166
3.24M
#endif /* LIBXML_SAX1_ENABLED */
10167
3.24M
  }
10168
3.83M
  namePop(ctxt);
10169
3.83M
  spacePop(ctxt);
10170
3.83M
  if (nsNr != ctxt->nsNr)
10171
44.2k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10172
3.83M
  if ( ret != NULL && ctxt->record_info ) {
10173
0
     node_info.end_pos = ctxt->input->consumed +
10174
0
            (CUR_PTR - ctxt->input->base);
10175
0
     node_info.end_line = ctxt->input->line;
10176
0
     node_info.node = ret;
10177
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10178
0
  }
10179
3.83M
  return(1);
10180
3.83M
    }
10181
9.37M
    if (RAW == '>') {
10182
7.45M
        NEXT1;
10183
7.45M
    } else {
10184
1.92M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10185
1.92M
         "Couldn't find end of Start Tag %s line %d\n",
10186
1.92M
                    name, line, NULL);
10187
10188
  /*
10189
   * end of parsing of this node.
10190
   */
10191
1.92M
  nodePop(ctxt);
10192
1.92M
  namePop(ctxt);
10193
1.92M
  spacePop(ctxt);
10194
1.92M
  if (nsNr != ctxt->nsNr)
10195
13.4k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10196
10197
  /*
10198
   * Capture end position and add node
10199
   */
10200
1.92M
  if ( ret != NULL && ctxt->record_info ) {
10201
0
     node_info.end_pos = ctxt->input->consumed +
10202
0
            (CUR_PTR - ctxt->input->base);
10203
0
     node_info.end_line = ctxt->input->line;
10204
0
     node_info.node = ret;
10205
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10206
0
  }
10207
1.92M
  return(-1);
10208
1.92M
    }
10209
10210
7.45M
    return(0);
10211
9.37M
}
10212
10213
/**
10214
 * xmlParseElementEnd:
10215
 * @ctxt:  an XML parser context
10216
 *
10217
 * Parse the end of an XML element.
10218
 */
10219
static void
10220
3.90M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10221
3.90M
    xmlParserNodeInfo node_info;
10222
3.90M
    xmlNodePtr ret = ctxt->node;
10223
10224
3.90M
    if (ctxt->nameNr <= 0)
10225
0
        return;
10226
10227
    /*
10228
     * parse the end of tag: '</' should be here.
10229
     */
10230
3.90M
    if (ctxt->sax2) {
10231
446k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10232
446k
  namePop(ctxt);
10233
446k
    }
10234
3.45M
#ifdef LIBXML_SAX1_ENABLED
10235
3.45M
    else
10236
3.45M
  xmlParseEndTag1(ctxt, 0);
10237
3.90M
#endif /* LIBXML_SAX1_ENABLED */
10238
10239
    /*
10240
     * Capture end position and add node
10241
     */
10242
3.90M
    if ( ret != NULL && ctxt->record_info ) {
10243
0
       node_info.end_pos = ctxt->input->consumed +
10244
0
                          (CUR_PTR - ctxt->input->base);
10245
0
       node_info.end_line = ctxt->input->line;
10246
0
       node_info.node = ret;
10247
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10248
0
    }
10249
3.90M
}
10250
10251
/**
10252
 * xmlParseVersionNum:
10253
 * @ctxt:  an XML parser context
10254
 *
10255
 * DEPRECATED: Internal function, don't use.
10256
 *
10257
 * parse the XML version value.
10258
 *
10259
 * [26] VersionNum ::= '1.' [0-9]+
10260
 *
10261
 * In practice allow [0-9].[0-9]+ at that level
10262
 *
10263
 * Returns the string giving the XML version number, or NULL
10264
 */
10265
xmlChar *
10266
276k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10267
276k
    xmlChar *buf = NULL;
10268
276k
    int len = 0;
10269
276k
    int size = 10;
10270
276k
    xmlChar cur;
10271
10272
276k
    buf = (xmlChar *) xmlMallocAtomic(size);
10273
276k
    if (buf == NULL) {
10274
0
  xmlErrMemory(ctxt, NULL);
10275
0
  return(NULL);
10276
0
    }
10277
276k
    cur = CUR;
10278
276k
    if (!((cur >= '0') && (cur <= '9'))) {
10279
2.29k
  xmlFree(buf);
10280
2.29k
  return(NULL);
10281
2.29k
    }
10282
274k
    buf[len++] = cur;
10283
274k
    NEXT;
10284
274k
    cur=CUR;
10285
274k
    if (cur != '.') {
10286
6.29k
  xmlFree(buf);
10287
6.29k
  return(NULL);
10288
6.29k
    }
10289
267k
    buf[len++] = cur;
10290
267k
    NEXT;
10291
267k
    cur=CUR;
10292
896k
    while ((cur >= '0') && (cur <= '9')) {
10293
628k
  if (len + 1 >= size) {
10294
1.57k
      xmlChar *tmp;
10295
10296
1.57k
      size *= 2;
10297
1.57k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10298
1.57k
      if (tmp == NULL) {
10299
0
          xmlFree(buf);
10300
0
    xmlErrMemory(ctxt, NULL);
10301
0
    return(NULL);
10302
0
      }
10303
1.57k
      buf = tmp;
10304
1.57k
  }
10305
628k
  buf[len++] = cur;
10306
628k
  NEXT;
10307
628k
  cur=CUR;
10308
628k
    }
10309
267k
    buf[len] = 0;
10310
267k
    return(buf);
10311
267k
}
10312
10313
/**
10314
 * xmlParseVersionInfo:
10315
 * @ctxt:  an XML parser context
10316
 *
10317
 * DEPRECATED: Internal function, don't use.
10318
 *
10319
 * parse the XML version.
10320
 *
10321
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10322
 *
10323
 * [25] Eq ::= S? '=' S?
10324
 *
10325
 * Returns the version string, e.g. "1.0"
10326
 */
10327
10328
xmlChar *
10329
304k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10330
304k
    xmlChar *version = NULL;
10331
10332
304k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10333
284k
  SKIP(7);
10334
284k
  SKIP_BLANKS;
10335
284k
  if (RAW != '=') {
10336
3.65k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10337
3.65k
      return(NULL);
10338
3.65k
        }
10339
280k
  NEXT;
10340
280k
  SKIP_BLANKS;
10341
280k
  if (RAW == '"') {
10342
246k
      NEXT;
10343
246k
      version = xmlParseVersionNum(ctxt);
10344
246k
      if (RAW != '"') {
10345
11.3k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10346
11.3k
      } else
10347
235k
          NEXT;
10348
246k
  } else if (RAW == '\''){
10349
29.5k
      NEXT;
10350
29.5k
      version = xmlParseVersionNum(ctxt);
10351
29.5k
      if (RAW != '\'') {
10352
1.39k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10353
1.39k
      } else
10354
28.1k
          NEXT;
10355
29.5k
  } else {
10356
3.88k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10357
3.88k
  }
10358
280k
    }
10359
300k
    return(version);
10360
304k
}
10361
10362
/**
10363
 * xmlParseEncName:
10364
 * @ctxt:  an XML parser context
10365
 *
10366
 * DEPRECATED: Internal function, don't use.
10367
 *
10368
 * parse the XML encoding name
10369
 *
10370
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10371
 *
10372
 * Returns the encoding name value or NULL
10373
 */
10374
xmlChar *
10375
126k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10376
126k
    xmlChar *buf = NULL;
10377
126k
    int len = 0;
10378
126k
    int size = 10;
10379
126k
    xmlChar cur;
10380
10381
126k
    cur = CUR;
10382
126k
    if (((cur >= 'a') && (cur <= 'z')) ||
10383
126k
        ((cur >= 'A') && (cur <= 'Z'))) {
10384
125k
  buf = (xmlChar *) xmlMallocAtomic(size);
10385
125k
  if (buf == NULL) {
10386
0
      xmlErrMemory(ctxt, NULL);
10387
0
      return(NULL);
10388
0
  }
10389
10390
125k
  buf[len++] = cur;
10391
125k
  NEXT;
10392
125k
  cur = CUR;
10393
1.79M
  while (((cur >= 'a') && (cur <= 'z')) ||
10394
1.79M
         ((cur >= 'A') && (cur <= 'Z')) ||
10395
1.79M
         ((cur >= '0') && (cur <= '9')) ||
10396
1.79M
         (cur == '.') || (cur == '_') ||
10397
1.79M
         (cur == '-')) {
10398
1.67M
      if (len + 1 >= size) {
10399
39.5k
          xmlChar *tmp;
10400
10401
39.5k
    size *= 2;
10402
39.5k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10403
39.5k
    if (tmp == NULL) {
10404
0
        xmlErrMemory(ctxt, NULL);
10405
0
        xmlFree(buf);
10406
0
        return(NULL);
10407
0
    }
10408
39.5k
    buf = tmp;
10409
39.5k
      }
10410
1.67M
      buf[len++] = cur;
10411
1.67M
      NEXT;
10412
1.67M
      cur = CUR;
10413
1.67M
      if (cur == 0) {
10414
880
          SHRINK;
10415
880
    GROW;
10416
880
    cur = CUR;
10417
880
      }
10418
1.67M
        }
10419
125k
  buf[len] = 0;
10420
125k
    } else {
10421
1.03k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10422
1.03k
    }
10423
126k
    return(buf);
10424
126k
}
10425
10426
/**
10427
 * xmlParseEncodingDecl:
10428
 * @ctxt:  an XML parser context
10429
 *
10430
 * DEPRECATED: Internal function, don't use.
10431
 *
10432
 * parse the XML encoding declaration
10433
 *
10434
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10435
 *
10436
 * this setups the conversion filters.
10437
 *
10438
 * Returns the encoding value or NULL
10439
 */
10440
10441
const xmlChar *
10442
207k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10443
207k
    xmlChar *encoding = NULL;
10444
10445
207k
    SKIP_BLANKS;
10446
207k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10447
128k
  SKIP(8);
10448
128k
  SKIP_BLANKS;
10449
128k
  if (RAW != '=') {
10450
981
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10451
981
      return(NULL);
10452
981
        }
10453
127k
  NEXT;
10454
127k
  SKIP_BLANKS;
10455
127k
  if (RAW == '"') {
10456
109k
      NEXT;
10457
109k
      encoding = xmlParseEncName(ctxt);
10458
109k
      if (RAW != '"') {
10459
5.12k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10460
5.12k
    xmlFree((xmlChar *) encoding);
10461
5.12k
    return(NULL);
10462
5.12k
      } else
10463
104k
          NEXT;
10464
109k
  } else if (RAW == '\''){
10465
16.4k
      NEXT;
10466
16.4k
      encoding = xmlParseEncName(ctxt);
10467
16.4k
      if (RAW != '\'') {
10468
1.12k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10469
1.12k
    xmlFree((xmlChar *) encoding);
10470
1.12k
    return(NULL);
10471
1.12k
      } else
10472
15.3k
          NEXT;
10473
16.4k
  } else {
10474
1.00k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10475
1.00k
  }
10476
10477
        /*
10478
         * Non standard parsing, allowing the user to ignore encoding
10479
         */
10480
120k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10481
55.1k
      xmlFree((xmlChar *) encoding);
10482
55.1k
            return(NULL);
10483
55.1k
  }
10484
10485
  /*
10486
   * UTF-16 encoding switch has already taken place at this stage,
10487
   * more over the little-endian/big-endian selection is already done
10488
   */
10489
65.8k
        if ((encoding != NULL) &&
10490
65.8k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10491
65.3k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10492
      /*
10493
       * If no encoding was passed to the parser, that we are
10494
       * using UTF-16 and no decoder is present i.e. the
10495
       * document is apparently UTF-8 compatible, then raise an
10496
       * encoding mismatch fatal error
10497
       */
10498
1.00k
      if ((ctxt->encoding == NULL) &&
10499
1.00k
          (ctxt->input->buf != NULL) &&
10500
1.00k
          (ctxt->input->buf->encoder == NULL)) {
10501
1.00k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10502
1.00k
      "Document labelled UTF-16 but has UTF-8 content\n");
10503
1.00k
      }
10504
1.00k
      if (ctxt->encoding != NULL)
10505
0
    xmlFree((xmlChar *) ctxt->encoding);
10506
1.00k
      ctxt->encoding = encoding;
10507
1.00k
  }
10508
  /*
10509
   * UTF-8 encoding is handled natively
10510
   */
10511
64.8k
        else if ((encoding != NULL) &&
10512
64.8k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10513
64.3k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10514
36.4k
      if (ctxt->encoding != NULL)
10515
0
    xmlFree((xmlChar *) ctxt->encoding);
10516
36.4k
      ctxt->encoding = encoding;
10517
36.4k
  }
10518
28.3k
  else if (encoding != NULL) {
10519
27.9k
      xmlCharEncodingHandlerPtr handler;
10520
10521
27.9k
      if (ctxt->input->encoding != NULL)
10522
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10523
27.9k
      ctxt->input->encoding = encoding;
10524
10525
27.9k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10526
27.9k
      if (handler != NULL) {
10527
27.1k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10528
        /* failed to convert */
10529
315
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10530
315
        return(NULL);
10531
315
    }
10532
27.1k
      } else {
10533
777
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10534
777
      "Unsupported encoding %s\n", encoding);
10535
777
    return(NULL);
10536
777
      }
10537
27.9k
  }
10538
65.8k
    }
10539
144k
    return(encoding);
10540
207k
}
10541
10542
/**
10543
 * xmlParseSDDecl:
10544
 * @ctxt:  an XML parser context
10545
 *
10546
 * DEPRECATED: Internal function, don't use.
10547
 *
10548
 * parse the XML standalone declaration
10549
 *
10550
 * [32] SDDecl ::= S 'standalone' Eq
10551
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10552
 *
10553
 * [ VC: Standalone Document Declaration ]
10554
 * TODO The standalone document declaration must have the value "no"
10555
 * if any external markup declarations contain declarations of:
10556
 *  - attributes with default values, if elements to which these
10557
 *    attributes apply appear in the document without specifications
10558
 *    of values for these attributes, or
10559
 *  - entities (other than amp, lt, gt, apos, quot), if references
10560
 *    to those entities appear in the document, or
10561
 *  - attributes with values subject to normalization, where the
10562
 *    attribute appears in the document with a value which will change
10563
 *    as a result of normalization, or
10564
 *  - element types with element content, if white space occurs directly
10565
 *    within any instance of those types.
10566
 *
10567
 * Returns:
10568
 *   1 if standalone="yes"
10569
 *   0 if standalone="no"
10570
 *  -2 if standalone attribute is missing or invalid
10571
 *    (A standalone value of -2 means that the XML declaration was found,
10572
 *     but no value was specified for the standalone attribute).
10573
 */
10574
10575
int
10576
179k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10577
179k
    int standalone = -2;
10578
10579
179k
    SKIP_BLANKS;
10580
179k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10581
29.5k
  SKIP(10);
10582
29.5k
        SKIP_BLANKS;
10583
29.5k
  if (RAW != '=') {
10584
567
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10585
567
      return(standalone);
10586
567
        }
10587
28.9k
  NEXT;
10588
28.9k
  SKIP_BLANKS;
10589
28.9k
        if (RAW == '\''){
10590
12.4k
      NEXT;
10591
12.4k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10592
8.25k
          standalone = 0;
10593
8.25k
                SKIP(2);
10594
8.25k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10595
4.18k
                 (NXT(2) == 's')) {
10596
3.03k
          standalone = 1;
10597
3.03k
    SKIP(3);
10598
3.03k
            } else {
10599
1.14k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10600
1.14k
      }
10601
12.4k
      if (RAW != '\'') {
10602
1.61k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10603
1.61k
      } else
10604
10.8k
          NEXT;
10605
16.5k
  } else if (RAW == '"'){
10606
16.2k
      NEXT;
10607
16.2k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10608
6.44k
          standalone = 0;
10609
6.44k
    SKIP(2);
10610
9.77k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10611
9.77k
                 (NXT(2) == 's')) {
10612
8.80k
          standalone = 1;
10613
8.80k
                SKIP(3);
10614
8.80k
            } else {
10615
969
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10616
969
      }
10617
16.2k
      if (RAW != '"') {
10618
1.46k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10619
1.46k
      } else
10620
14.7k
          NEXT;
10621
16.2k
  } else {
10622
318
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10623
318
        }
10624
28.9k
    }
10625
178k
    return(standalone);
10626
179k
}
10627
10628
/**
10629
 * xmlParseXMLDecl:
10630
 * @ctxt:  an XML parser context
10631
 *
10632
 * DEPRECATED: Internal function, don't use.
10633
 *
10634
 * parse an XML declaration header
10635
 *
10636
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10637
 */
10638
10639
void
10640
291k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10641
291k
    xmlChar *version;
10642
10643
    /*
10644
     * This value for standalone indicates that the document has an
10645
     * XML declaration but it does not have a standalone attribute.
10646
     * It will be overwritten later if a standalone attribute is found.
10647
     */
10648
291k
    ctxt->input->standalone = -2;
10649
10650
    /*
10651
     * We know that '<?xml' is here.
10652
     */
10653
291k
    SKIP(5);
10654
10655
291k
    if (!IS_BLANK_CH(RAW)) {
10656
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10657
0
                 "Blank needed after '<?xml'\n");
10658
0
    }
10659
291k
    SKIP_BLANKS;
10660
10661
    /*
10662
     * We must have the VersionInfo here.
10663
     */
10664
291k
    version = xmlParseVersionInfo(ctxt);
10665
291k
    if (version == NULL) {
10666
35.3k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10667
256k
    } else {
10668
256k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10669
      /*
10670
       * Changed here for XML-1.0 5th edition
10671
       */
10672
3.41k
      if (ctxt->options & XML_PARSE_OLD10) {
10673
1.29k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10674
1.29k
                "Unsupported version '%s'\n",
10675
1.29k
                version);
10676
2.11k
      } else {
10677
2.11k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10678
1.72k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10679
1.72k
                      "Unsupported version '%s'\n",
10680
1.72k
          version, NULL);
10681
1.72k
    } else {
10682
391
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10683
391
              "Unsupported version '%s'\n",
10684
391
              version);
10685
391
    }
10686
2.11k
      }
10687
3.41k
  }
10688
256k
  if (ctxt->version != NULL)
10689
0
      xmlFree((void *) ctxt->version);
10690
256k
  ctxt->version = version;
10691
256k
    }
10692
10693
    /*
10694
     * We may have the encoding declaration
10695
     */
10696
291k
    if (!IS_BLANK_CH(RAW)) {
10697
136k
        if ((RAW == '?') && (NXT(1) == '>')) {
10698
96.1k
      SKIP(2);
10699
96.1k
      return;
10700
96.1k
  }
10701
40.6k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10702
40.6k
    }
10703
195k
    xmlParseEncodingDecl(ctxt);
10704
195k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10705
195k
         (ctxt->instate == XML_PARSER_EOF)) {
10706
  /*
10707
   * The XML REC instructs us to stop parsing right here
10708
   */
10709
999
        return;
10710
999
    }
10711
10712
    /*
10713
     * We may have the standalone status.
10714
     */
10715
194k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10716
16.9k
        if ((RAW == '?') && (NXT(1) == '>')) {
10717
15.3k
      SKIP(2);
10718
15.3k
      return;
10719
15.3k
  }
10720
1.62k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10721
1.62k
    }
10722
10723
    /*
10724
     * We can grow the input buffer freely at that point
10725
     */
10726
179k
    GROW;
10727
10728
179k
    SKIP_BLANKS;
10729
179k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10730
10731
179k
    SKIP_BLANKS;
10732
179k
    if ((RAW == '?') && (NXT(1) == '>')) {
10733
108k
        SKIP(2);
10734
108k
    } else if (RAW == '>') {
10735
        /* Deprecated old WD ... */
10736
1.38k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10737
1.38k
  NEXT;
10738
68.9k
    } else {
10739
68.9k
        int c;
10740
10741
68.9k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10742
4.22M
        while ((c = CUR) != 0) {
10743
4.21M
            NEXT;
10744
4.21M
            if (c == '>')
10745
57.9k
                break;
10746
4.21M
        }
10747
68.9k
    }
10748
179k
}
10749
10750
/**
10751
 * xmlParseMisc:
10752
 * @ctxt:  an XML parser context
10753
 *
10754
 * DEPRECATED: Internal function, don't use.
10755
 *
10756
 * parse an XML Misc* optional field.
10757
 *
10758
 * [27] Misc ::= Comment | PI |  S
10759
 */
10760
10761
void
10762
765k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10763
826k
    while (ctxt->instate != XML_PARSER_EOF) {
10764
826k
        SKIP_BLANKS;
10765
826k
        GROW;
10766
826k
        if ((RAW == '<') && (NXT(1) == '?')) {
10767
37.5k
      xmlParsePI(ctxt);
10768
788k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10769
23.2k
      xmlParseComment(ctxt);
10770
765k
        } else {
10771
765k
            break;
10772
765k
        }
10773
826k
    }
10774
765k
}
10775
10776
/**
10777
 * xmlParseDocument:
10778
 * @ctxt:  an XML parser context
10779
 *
10780
 * parse an XML document (and build a tree if using the standard SAX
10781
 * interface).
10782
 *
10783
 * [1] document ::= prolog element Misc*
10784
 *
10785
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10786
 *
10787
 * Returns 0, -1 in case of error. the parser context is augmented
10788
 *                as a result of the parsing.
10789
 */
10790
10791
int
10792
359k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10793
359k
    xmlChar start[4];
10794
359k
    xmlCharEncoding enc;
10795
10796
359k
    xmlInitParser();
10797
10798
359k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10799
0
        return(-1);
10800
10801
359k
    GROW;
10802
10803
    /*
10804
     * SAX: detecting the level.
10805
     */
10806
359k
    xmlDetectSAX2(ctxt);
10807
10808
    /*
10809
     * SAX: beginning of the document processing.
10810
     */
10811
359k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10812
359k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10813
359k
    if (ctxt->instate == XML_PARSER_EOF)
10814
0
  return(-1);
10815
10816
359k
    if ((ctxt->encoding == NULL) &&
10817
359k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10818
  /*
10819
   * Get the 4 first bytes and decode the charset
10820
   * if enc != XML_CHAR_ENCODING_NONE
10821
   * plug some encoding conversion routines.
10822
   */
10823
357k
  start[0] = RAW;
10824
357k
  start[1] = NXT(1);
10825
357k
  start[2] = NXT(2);
10826
357k
  start[3] = NXT(3);
10827
357k
  enc = xmlDetectCharEncoding(&start[0], 4);
10828
357k
  if (enc != XML_CHAR_ENCODING_NONE) {
10829
141k
      xmlSwitchEncoding(ctxt, enc);
10830
141k
  }
10831
357k
    }
10832
10833
10834
359k
    if (CUR == 0) {
10835
1.54k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10836
1.54k
  return(-1);
10837
1.54k
    }
10838
10839
    /*
10840
     * Check for the XMLDecl in the Prolog.
10841
     * do not GROW here to avoid the detected encoder to decode more
10842
     * than just the first line, unless the amount of data is really
10843
     * too small to hold "<?xml version="1.0" encoding="foo"
10844
     */
10845
357k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10846
14.6k
       GROW;
10847
14.6k
    }
10848
357k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10849
10850
  /*
10851
   * Note that we will switch encoding on the fly.
10852
   */
10853
129k
  xmlParseXMLDecl(ctxt);
10854
129k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10855
129k
      (ctxt->instate == XML_PARSER_EOF)) {
10856
      /*
10857
       * The XML REC instructs us to stop parsing right here
10858
       */
10859
627
      return(-1);
10860
627
  }
10861
128k
  ctxt->standalone = ctxt->input->standalone;
10862
128k
  SKIP_BLANKS;
10863
228k
    } else {
10864
228k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10865
228k
    }
10866
357k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10867
342k
        ctxt->sax->startDocument(ctxt->userData);
10868
357k
    if (ctxt->instate == XML_PARSER_EOF)
10869
0
  return(-1);
10870
357k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10871
357k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10872
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10873
0
    }
10874
10875
    /*
10876
     * The Misc part of the Prolog
10877
     */
10878
357k
    xmlParseMisc(ctxt);
10879
10880
    /*
10881
     * Then possibly doc type declaration(s) and more Misc
10882
     * (doctypedecl Misc*)?
10883
     */
10884
357k
    GROW;
10885
357k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10886
10887
147k
  ctxt->inSubset = 1;
10888
147k
  xmlParseDocTypeDecl(ctxt);
10889
147k
  if (RAW == '[') {
10890
115k
      ctxt->instate = XML_PARSER_DTD;
10891
115k
      xmlParseInternalSubset(ctxt);
10892
115k
      if (ctxt->instate == XML_PARSER_EOF)
10893
8.30k
    return(-1);
10894
115k
  }
10895
10896
  /*
10897
   * Create and update the external subset.
10898
   */
10899
139k
  ctxt->inSubset = 2;
10900
139k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10901
139k
      (!ctxt->disableSAX))
10902
108k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10903
108k
                                ctxt->extSubSystem, ctxt->extSubURI);
10904
139k
  if (ctxt->instate == XML_PARSER_EOF)
10905
2.03k
      return(-1);
10906
137k
  ctxt->inSubset = 0;
10907
10908
137k
        xmlCleanSpecialAttr(ctxt);
10909
10910
137k
  ctxt->instate = XML_PARSER_PROLOG;
10911
137k
  xmlParseMisc(ctxt);
10912
137k
    }
10913
10914
    /*
10915
     * Time to start parsing the tree itself
10916
     */
10917
346k
    GROW;
10918
346k
    if (RAW != '<') {
10919
75.4k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10920
75.4k
           "Start tag expected, '<' not found\n");
10921
271k
    } else {
10922
271k
  ctxt->instate = XML_PARSER_CONTENT;
10923
271k
  xmlParseElement(ctxt);
10924
271k
  ctxt->instate = XML_PARSER_EPILOG;
10925
10926
10927
  /*
10928
   * The Misc part at the end
10929
   */
10930
271k
  xmlParseMisc(ctxt);
10931
10932
271k
  if (RAW != 0) {
10933
64.2k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10934
64.2k
  }
10935
271k
  ctxt->instate = XML_PARSER_EOF;
10936
271k
    }
10937
10938
    /*
10939
     * SAX: end of the document processing.
10940
     */
10941
346k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10942
346k
        ctxt->sax->endDocument(ctxt->userData);
10943
10944
    /*
10945
     * Remove locally kept entity definitions if the tree was not built
10946
     */
10947
346k
    if ((ctxt->myDoc != NULL) &&
10948
346k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10949
1.21k
  xmlFreeDoc(ctxt->myDoc);
10950
1.21k
  ctxt->myDoc = NULL;
10951
1.21k
    }
10952
10953
346k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10954
46.8k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10955
46.8k
  if (ctxt->valid)
10956
30.2k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10957
46.8k
  if (ctxt->nsWellFormed)
10958
45.4k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10959
46.8k
  if (ctxt->options & XML_PARSE_OLD10)
10960
18.3k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10961
46.8k
    }
10962
346k
    if (! ctxt->wellFormed) {
10963
299k
  ctxt->valid = 0;
10964
299k
  return(-1);
10965
299k
    }
10966
46.8k
    return(0);
10967
346k
}
10968
10969
/**
10970
 * xmlParseExtParsedEnt:
10971
 * @ctxt:  an XML parser context
10972
 *
10973
 * parse a general parsed entity
10974
 * An external general parsed entity is well-formed if it matches the
10975
 * production labeled extParsedEnt.
10976
 *
10977
 * [78] extParsedEnt ::= TextDecl? content
10978
 *
10979
 * Returns 0, -1 in case of error. the parser context is augmented
10980
 *                as a result of the parsing.
10981
 */
10982
10983
int
10984
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10985
0
    xmlChar start[4];
10986
0
    xmlCharEncoding enc;
10987
10988
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10989
0
        return(-1);
10990
10991
0
    xmlDetectSAX2(ctxt);
10992
10993
0
    GROW;
10994
10995
    /*
10996
     * SAX: beginning of the document processing.
10997
     */
10998
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10999
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11000
11001
    /*
11002
     * Get the 4 first bytes and decode the charset
11003
     * if enc != XML_CHAR_ENCODING_NONE
11004
     * plug some encoding conversion routines.
11005
     */
11006
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11007
0
  start[0] = RAW;
11008
0
  start[1] = NXT(1);
11009
0
  start[2] = NXT(2);
11010
0
  start[3] = NXT(3);
11011
0
  enc = xmlDetectCharEncoding(start, 4);
11012
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11013
0
      xmlSwitchEncoding(ctxt, enc);
11014
0
  }
11015
0
    }
11016
11017
11018
0
    if (CUR == 0) {
11019
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11020
0
    }
11021
11022
    /*
11023
     * Check for the XMLDecl in the Prolog.
11024
     */
11025
0
    GROW;
11026
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11027
11028
  /*
11029
   * Note that we will switch encoding on the fly.
11030
   */
11031
0
  xmlParseXMLDecl(ctxt);
11032
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11033
      /*
11034
       * The XML REC instructs us to stop parsing right here
11035
       */
11036
0
      return(-1);
11037
0
  }
11038
0
  SKIP_BLANKS;
11039
0
    } else {
11040
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11041
0
    }
11042
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11043
0
        ctxt->sax->startDocument(ctxt->userData);
11044
0
    if (ctxt->instate == XML_PARSER_EOF)
11045
0
  return(-1);
11046
11047
    /*
11048
     * Doing validity checking on chunk doesn't make sense
11049
     */
11050
0
    ctxt->instate = XML_PARSER_CONTENT;
11051
0
    ctxt->validate = 0;
11052
0
    ctxt->loadsubset = 0;
11053
0
    ctxt->depth = 0;
11054
11055
0
    xmlParseContent(ctxt);
11056
0
    if (ctxt->instate == XML_PARSER_EOF)
11057
0
  return(-1);
11058
11059
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11060
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11061
0
    } else if (RAW != 0) {
11062
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11063
0
    }
11064
11065
    /*
11066
     * SAX: end of the document processing.
11067
     */
11068
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11069
0
        ctxt->sax->endDocument(ctxt->userData);
11070
11071
0
    if (! ctxt->wellFormed) return(-1);
11072
0
    return(0);
11073
0
}
11074
11075
#ifdef LIBXML_PUSH_ENABLED
11076
/************************************************************************
11077
 *                  *
11078
 *    Progressive parsing interfaces        *
11079
 *                  *
11080
 ************************************************************************/
11081
11082
/**
11083
 * xmlParseLookupSequence:
11084
 * @ctxt:  an XML parser context
11085
 * @first:  the first char to lookup
11086
 * @next:  the next char to lookup or zero
11087
 * @third:  the next char to lookup or zero
11088
 *
11089
 * Try to find if a sequence (first, next, third) or  just (first next) or
11090
 * (first) is available in the input stream.
11091
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11092
 * to avoid rescanning sequences of bytes, it DOES change the state of the
11093
 * parser, do not use liberally.
11094
 *
11095
 * Returns the index to the current parsing point if the full sequence
11096
 *      is available, -1 otherwise.
11097
 */
11098
static int
11099
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11100
1.06M
                       xmlChar next, xmlChar third) {
11101
1.06M
    int base, len;
11102
1.06M
    xmlParserInputPtr in;
11103
1.06M
    const xmlChar *buf;
11104
11105
1.06M
    in = ctxt->input;
11106
1.06M
    if (in == NULL) return(-1);
11107
1.06M
    base = in->cur - in->base;
11108
1.06M
    if (base < 0) return(-1);
11109
1.06M
    if (ctxt->checkIndex > base)
11110
253k
        base = ctxt->checkIndex;
11111
1.06M
    if (in->buf == NULL) {
11112
0
  buf = in->base;
11113
0
  len = in->length;
11114
1.06M
    } else {
11115
1.06M
  buf = xmlBufContent(in->buf->buffer);
11116
1.06M
  len = xmlBufUse(in->buf->buffer);
11117
1.06M
    }
11118
    /* take into account the sequence length */
11119
1.06M
    if (third) len -= 2;
11120
878k
    else if (next) len --;
11121
185M
    for (;base < len;base++) {
11122
184M
        if (buf[base] == first) {
11123
1.26M
      if (third != 0) {
11124
327k
    if ((buf[base + 1] != next) ||
11125
327k
        (buf[base + 2] != third)) continue;
11126
935k
      } else if (next != 0) {
11127
519k
    if (buf[base + 1] != next) continue;
11128
519k
      }
11129
739k
      ctxt->checkIndex = 0;
11130
#ifdef DEBUG_PUSH
11131
      if (next == 0)
11132
    xmlGenericError(xmlGenericErrorContext,
11133
      "PP: lookup '%c' found at %d\n",
11134
      first, base);
11135
      else if (third == 0)
11136
    xmlGenericError(xmlGenericErrorContext,
11137
      "PP: lookup '%c%c' found at %d\n",
11138
      first, next, base);
11139
      else
11140
    xmlGenericError(xmlGenericErrorContext,
11141
      "PP: lookup '%c%c%c' found at %d\n",
11142
      first, next, third, base);
11143
#endif
11144
739k
      return(base - (in->cur - in->base));
11145
1.26M
  }
11146
184M
    }
11147
325k
    ctxt->checkIndex = base;
11148
#ifdef DEBUG_PUSH
11149
    if (next == 0)
11150
  xmlGenericError(xmlGenericErrorContext,
11151
    "PP: lookup '%c' failed\n", first);
11152
    else if (third == 0)
11153
  xmlGenericError(xmlGenericErrorContext,
11154
    "PP: lookup '%c%c' failed\n", first, next);
11155
    else
11156
  xmlGenericError(xmlGenericErrorContext,
11157
    "PP: lookup '%c%c%c' failed\n", first, next, third);
11158
#endif
11159
325k
    return(-1);
11160
1.06M
}
11161
11162
/**
11163
 * xmlParseGetLasts:
11164
 * @ctxt:  an XML parser context
11165
 * @lastlt:  pointer to store the last '<' from the input
11166
 * @lastgt:  pointer to store the last '>' from the input
11167
 *
11168
 * Lookup the last < and > in the current chunk
11169
 */
11170
static void
11171
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11172
2.33M
                 const xmlChar **lastgt) {
11173
2.33M
    const xmlChar *tmp;
11174
11175
2.33M
    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11176
0
  xmlGenericError(xmlGenericErrorContext,
11177
0
        "Internal error: xmlParseGetLasts\n");
11178
0
  return;
11179
0
    }
11180
2.33M
    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11181
1.50M
        tmp = ctxt->input->end;
11182
1.50M
  tmp--;
11183
691M
  while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11184
1.50M
  if (tmp < ctxt->input->base) {
11185
104k
      *lastlt = NULL;
11186
104k
      *lastgt = NULL;
11187
1.40M
  } else {
11188
1.40M
      *lastlt = tmp;
11189
1.40M
      tmp++;
11190
153M
      while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11191
152M
          if (*tmp == '\'') {
11192
69.2k
        tmp++;
11193
42.3M
        while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11194
69.2k
        if (tmp < ctxt->input->end) tmp++;
11195
152M
    } else if (*tmp == '"') {
11196
531k
        tmp++;
11197
66.1M
        while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11198
531k
        if (tmp < ctxt->input->end) tmp++;
11199
531k
    } else
11200
151M
        tmp++;
11201
152M
      }
11202
1.40M
      if (tmp < ctxt->input->end)
11203
672k
          *lastgt = tmp;
11204
727k
      else {
11205
727k
          tmp = *lastlt;
11206
727k
    tmp--;
11207
73.3M
    while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11208
727k
    if (tmp >= ctxt->input->base)
11209
683k
        *lastgt = tmp;
11210
44.5k
    else
11211
44.5k
        *lastgt = NULL;
11212
727k
      }
11213
1.40M
  }
11214
1.50M
    } else {
11215
830k
        *lastlt = NULL;
11216
830k
  *lastgt = NULL;
11217
830k
    }
11218
2.33M
}
11219
/**
11220
 * xmlCheckCdataPush:
11221
 * @cur: pointer to the block of characters
11222
 * @len: length of the block in bytes
11223
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11224
 *
11225
 * Check that the block of characters is okay as SCdata content [20]
11226
 *
11227
 * Returns the number of bytes to pass if okay, a negative index where an
11228
 *         UTF-8 error occurred otherwise
11229
 */
11230
static int
11231
59.6k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11232
59.6k
    int ix;
11233
59.6k
    unsigned char c;
11234
59.6k
    int codepoint;
11235
11236
59.6k
    if ((utf == NULL) || (len <= 0))
11237
3.35k
        return(0);
11238
11239
3.92M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11240
3.91M
        c = utf[ix];
11241
3.91M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11242
2.61M
      if (c >= 0x20)
11243
2.53M
    ix++;
11244
72.7k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11245
66.6k
          ix++;
11246
6.10k
      else
11247
6.10k
          return(-ix);
11248
2.61M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11249
374k
      if (ix + 2 > len) return(complete ? -ix : ix);
11250
373k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11251
3.63k
          return(-ix);
11252
369k
      codepoint = (utf[ix] & 0x1f) << 6;
11253
369k
      codepoint |= utf[ix+1] & 0x3f;
11254
369k
      if (!xmlIsCharQ(codepoint))
11255
2.21k
          return(-ix);
11256
367k
      ix += 2;
11257
926k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11258
435k
      if (ix + 3 > len) return(complete ? -ix : ix);
11259
433k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11260
433k
          ((utf[ix+2] & 0xc0) != 0x80))
11261
5.08k
        return(-ix);
11262
428k
      codepoint = (utf[ix] & 0xf) << 12;
11263
428k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11264
428k
      codepoint |= utf[ix+2] & 0x3f;
11265
428k
      if (!xmlIsCharQ(codepoint))
11266
1.36k
          return(-ix);
11267
426k
      ix += 3;
11268
490k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11269
485k
      if (ix + 4 > len) return(complete ? -ix : ix);
11270
483k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11271
483k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11272
483k
    ((utf[ix+3] & 0xc0) != 0x80))
11273
7.62k
        return(-ix);
11274
475k
      codepoint = (utf[ix] & 0x7) << 18;
11275
475k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11276
475k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11277
475k
      codepoint |= utf[ix+3] & 0x3f;
11278
475k
      if (!xmlIsCharQ(codepoint))
11279
3.56k
          return(-ix);
11280
471k
      ix += 4;
11281
471k
  } else       /* unknown encoding */
11282
5.30k
      return(-ix);
11283
3.91M
      }
11284
14.9k
      return(ix);
11285
56.2k
}
11286
11287
/**
11288
 * xmlParseTryOrFinish:
11289
 * @ctxt:  an XML parser context
11290
 * @terminate:  last chunk indicator
11291
 *
11292
 * Try to progress on parsing
11293
 *
11294
 * Returns zero if no parsing was possible
11295
 */
11296
static int
11297
1.92M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11298
1.92M
    int ret = 0;
11299
1.92M
    int avail, tlen;
11300
1.92M
    xmlChar cur, next;
11301
1.92M
    const xmlChar *lastlt, *lastgt;
11302
11303
1.92M
    if (ctxt->input == NULL)
11304
0
        return(0);
11305
11306
#ifdef DEBUG_PUSH
11307
    switch (ctxt->instate) {
11308
  case XML_PARSER_EOF:
11309
      xmlGenericError(xmlGenericErrorContext,
11310
        "PP: try EOF\n"); break;
11311
  case XML_PARSER_START:
11312
      xmlGenericError(xmlGenericErrorContext,
11313
        "PP: try START\n"); break;
11314
  case XML_PARSER_MISC:
11315
      xmlGenericError(xmlGenericErrorContext,
11316
        "PP: try MISC\n");break;
11317
  case XML_PARSER_COMMENT:
11318
      xmlGenericError(xmlGenericErrorContext,
11319
        "PP: try COMMENT\n");break;
11320
  case XML_PARSER_PROLOG:
11321
      xmlGenericError(xmlGenericErrorContext,
11322
        "PP: try PROLOG\n");break;
11323
  case XML_PARSER_START_TAG:
11324
      xmlGenericError(xmlGenericErrorContext,
11325
        "PP: try START_TAG\n");break;
11326
  case XML_PARSER_CONTENT:
11327
      xmlGenericError(xmlGenericErrorContext,
11328
        "PP: try CONTENT\n");break;
11329
  case XML_PARSER_CDATA_SECTION:
11330
      xmlGenericError(xmlGenericErrorContext,
11331
        "PP: try CDATA_SECTION\n");break;
11332
  case XML_PARSER_END_TAG:
11333
      xmlGenericError(xmlGenericErrorContext,
11334
        "PP: try END_TAG\n");break;
11335
  case XML_PARSER_ENTITY_DECL:
11336
      xmlGenericError(xmlGenericErrorContext,
11337
        "PP: try ENTITY_DECL\n");break;
11338
  case XML_PARSER_ENTITY_VALUE:
11339
      xmlGenericError(xmlGenericErrorContext,
11340
        "PP: try ENTITY_VALUE\n");break;
11341
  case XML_PARSER_ATTRIBUTE_VALUE:
11342
      xmlGenericError(xmlGenericErrorContext,
11343
        "PP: try ATTRIBUTE_VALUE\n");break;
11344
  case XML_PARSER_DTD:
11345
      xmlGenericError(xmlGenericErrorContext,
11346
        "PP: try DTD\n");break;
11347
  case XML_PARSER_EPILOG:
11348
      xmlGenericError(xmlGenericErrorContext,
11349
        "PP: try EPILOG\n");break;
11350
  case XML_PARSER_PI:
11351
      xmlGenericError(xmlGenericErrorContext,
11352
        "PP: try PI\n");break;
11353
        case XML_PARSER_IGNORE:
11354
            xmlGenericError(xmlGenericErrorContext,
11355
        "PP: try IGNORE\n");break;
11356
    }
11357
#endif
11358
11359
1.92M
    if ((ctxt->input != NULL) &&
11360
1.92M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11361
10.6k
  xmlSHRINK(ctxt);
11362
10.6k
  ctxt->checkIndex = 0;
11363
10.6k
    }
11364
1.92M
    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11365
11366
12.8M
    while (ctxt->instate != XML_PARSER_EOF) {
11367
12.7M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11368
93.4k
      return(0);
11369
11370
12.6M
  if (ctxt->input == NULL) break;
11371
12.6M
  if (ctxt->input->buf == NULL)
11372
0
      avail = ctxt->input->length -
11373
0
              (ctxt->input->cur - ctxt->input->base);
11374
12.6M
  else {
11375
      /*
11376
       * If we are operating on converted input, try to flush
11377
       * remaining chars to avoid them stalling in the non-converted
11378
       * buffer. But do not do this in document start where
11379
       * encoding="..." may not have been read and we work on a
11380
       * guessed encoding.
11381
       */
11382
12.6M
      if ((ctxt->instate != XML_PARSER_START) &&
11383
12.6M
          (ctxt->input->buf->raw != NULL) &&
11384
12.6M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11385
68.9k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11386
68.9k
                                                 ctxt->input);
11387
68.9k
    size_t current = ctxt->input->cur - ctxt->input->base;
11388
11389
68.9k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11390
68.9k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11391
68.9k
                                      base, current);
11392
68.9k
      }
11393
12.6M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11394
12.6M
        (ctxt->input->cur - ctxt->input->base);
11395
12.6M
  }
11396
12.6M
        if (avail < 1)
11397
248k
      goto done;
11398
12.4M
        switch (ctxt->instate) {
11399
0
            case XML_PARSER_EOF:
11400
          /*
11401
     * Document parsing is done !
11402
     */
11403
0
          goto done;
11404
828k
            case XML_PARSER_START:
11405
828k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11406
240k
        xmlChar start[4];
11407
240k
        xmlCharEncoding enc;
11408
11409
        /*
11410
         * Very first chars read from the document flow.
11411
         */
11412
240k
        if (avail < 4)
11413
4.06k
      goto done;
11414
11415
        /*
11416
         * Get the 4 first bytes and decode the charset
11417
         * if enc != XML_CHAR_ENCODING_NONE
11418
         * plug some encoding conversion routines,
11419
         * else xmlSwitchEncoding will set to (default)
11420
         * UTF8.
11421
         */
11422
236k
        start[0] = RAW;
11423
236k
        start[1] = NXT(1);
11424
236k
        start[2] = NXT(2);
11425
236k
        start[3] = NXT(3);
11426
236k
        enc = xmlDetectCharEncoding(start, 4);
11427
236k
        xmlSwitchEncoding(ctxt, enc);
11428
236k
        break;
11429
240k
    }
11430
11431
587k
    if (avail < 2)
11432
228
        goto done;
11433
587k
    cur = ctxt->input->cur[0];
11434
587k
    next = ctxt->input->cur[1];
11435
587k
    if (cur == 0) {
11436
470
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11437
470
      ctxt->sax->setDocumentLocator(ctxt->userData,
11438
470
                  &xmlDefaultSAXLocator);
11439
470
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11440
470
        xmlHaltParser(ctxt);
11441
#ifdef DEBUG_PUSH
11442
        xmlGenericError(xmlGenericErrorContext,
11443
          "PP: entering EOF\n");
11444
#endif
11445
470
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11446
470
      ctxt->sax->endDocument(ctxt->userData);
11447
470
        goto done;
11448
470
    }
11449
587k
          if ((cur == '<') && (next == '?')) {
11450
        /* PI or XML decl */
11451
297k
        if (avail < 5) return(ret);
11452
297k
        if ((!terminate) &&
11453
297k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11454
116k
      return(ret);
11455
180k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11456
180k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11457
180k
                  &xmlDefaultSAXLocator);
11458
180k
        if ((ctxt->input->cur[2] == 'x') &&
11459
180k
      (ctxt->input->cur[3] == 'm') &&
11460
180k
      (ctxt->input->cur[4] == 'l') &&
11461
180k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11462
162k
      ret += 5;
11463
#ifdef DEBUG_PUSH
11464
      xmlGenericError(xmlGenericErrorContext,
11465
        "PP: Parsing XML Decl\n");
11466
#endif
11467
162k
      xmlParseXMLDecl(ctxt);
11468
162k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11469
          /*
11470
           * The XML REC instructs us to stop parsing right
11471
           * here
11472
           */
11473
372
          xmlHaltParser(ctxt);
11474
372
          return(0);
11475
372
      }
11476
162k
      ctxt->standalone = ctxt->input->standalone;
11477
162k
      if ((ctxt->encoding == NULL) &&
11478
162k
          (ctxt->input->encoding != NULL))
11479
13.7k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11480
162k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11481
162k
          (!ctxt->disableSAX))
11482
153k
          ctxt->sax->startDocument(ctxt->userData);
11483
162k
      ctxt->instate = XML_PARSER_MISC;
11484
#ifdef DEBUG_PUSH
11485
      xmlGenericError(xmlGenericErrorContext,
11486
        "PP: entering MISC\n");
11487
#endif
11488
162k
        } else {
11489
18.3k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11490
18.3k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11491
18.3k
          (!ctxt->disableSAX))
11492
18.3k
          ctxt->sax->startDocument(ctxt->userData);
11493
18.3k
      ctxt->instate = XML_PARSER_MISC;
11494
#ifdef DEBUG_PUSH
11495
      xmlGenericError(xmlGenericErrorContext,
11496
        "PP: entering MISC\n");
11497
#endif
11498
18.3k
        }
11499
289k
    } else {
11500
289k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11501
289k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11502
289k
                  &xmlDefaultSAXLocator);
11503
289k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11504
289k
        if (ctxt->version == NULL) {
11505
0
            xmlErrMemory(ctxt, NULL);
11506
0
      break;
11507
0
        }
11508
289k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11509
289k
            (!ctxt->disableSAX))
11510
289k
      ctxt->sax->startDocument(ctxt->userData);
11511
289k
        ctxt->instate = XML_PARSER_MISC;
11512
#ifdef DEBUG_PUSH
11513
        xmlGenericError(xmlGenericErrorContext,
11514
          "PP: entering MISC\n");
11515
#endif
11516
289k
    }
11517
469k
    break;
11518
2.81M
            case XML_PARSER_START_TAG: {
11519
2.81M
          const xmlChar *name;
11520
2.81M
    const xmlChar *prefix = NULL;
11521
2.81M
    const xmlChar *URI = NULL;
11522
2.81M
                int line = ctxt->input->line;
11523
2.81M
    int nsNr = ctxt->nsNr;
11524
11525
2.81M
    if ((avail < 2) && (ctxt->inputNr == 1))
11526
0
        goto done;
11527
2.81M
    cur = ctxt->input->cur[0];
11528
2.81M
          if (cur != '<') {
11529
29.1k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11530
29.1k
        xmlHaltParser(ctxt);
11531
29.1k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11532
29.1k
      ctxt->sax->endDocument(ctxt->userData);
11533
29.1k
        goto done;
11534
29.1k
    }
11535
2.79M
    if (!terminate) {
11536
2.63M
        if (ctxt->progressive) {
11537
            /* > can be found unescaped in attribute values */
11538
2.63M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11539
368k
          goto done;
11540
2.63M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11541
0
      goto done;
11542
0
        }
11543
2.63M
    }
11544
2.42M
    if (ctxt->spaceNr == 0)
11545
32.7k
        spacePush(ctxt, -1);
11546
2.38M
    else if (*ctxt->space == -2)
11547
242k
        spacePush(ctxt, -1);
11548
2.14M
    else
11549
2.14M
        spacePush(ctxt, *ctxt->space);
11550
2.42M
#ifdef LIBXML_SAX1_ENABLED
11551
2.42M
    if (ctxt->sax2)
11552
1.31M
#endif /* LIBXML_SAX1_ENABLED */
11553
1.31M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11554
1.11M
#ifdef LIBXML_SAX1_ENABLED
11555
1.11M
    else
11556
1.11M
        name = xmlParseStartTag(ctxt);
11557
2.42M
#endif /* LIBXML_SAX1_ENABLED */
11558
2.42M
    if (ctxt->instate == XML_PARSER_EOF)
11559
0
        goto done;
11560
2.42M
    if (name == NULL) {
11561
29.4k
        spacePop(ctxt);
11562
29.4k
        xmlHaltParser(ctxt);
11563
29.4k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11564
29.4k
      ctxt->sax->endDocument(ctxt->userData);
11565
29.4k
        goto done;
11566
29.4k
    }
11567
2.39M
#ifdef LIBXML_VALID_ENABLED
11568
    /*
11569
     * [ VC: Root Element Type ]
11570
     * The Name in the document type declaration must match
11571
     * the element type of the root element.
11572
     */
11573
2.39M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11574
2.39M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11575
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11576
2.39M
#endif /* LIBXML_VALID_ENABLED */
11577
11578
    /*
11579
     * Check for an Empty Element.
11580
     */
11581
2.39M
    if ((RAW == '/') && (NXT(1) == '>')) {
11582
673k
        SKIP(2);
11583
11584
673k
        if (ctxt->sax2) {
11585
352k
      if ((ctxt->sax != NULL) &&
11586
352k
          (ctxt->sax->endElementNs != NULL) &&
11587
352k
          (!ctxt->disableSAX))
11588
350k
          ctxt->sax->endElementNs(ctxt->userData, name,
11589
350k
                                  prefix, URI);
11590
352k
      if (ctxt->nsNr - nsNr > 0)
11591
7.26k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11592
352k
#ifdef LIBXML_SAX1_ENABLED
11593
352k
        } else {
11594
320k
      if ((ctxt->sax != NULL) &&
11595
320k
          (ctxt->sax->endElement != NULL) &&
11596
320k
          (!ctxt->disableSAX))
11597
320k
          ctxt->sax->endElement(ctxt->userData, name);
11598
320k
#endif /* LIBXML_SAX1_ENABLED */
11599
320k
        }
11600
673k
        if (ctxt->instate == XML_PARSER_EOF)
11601
0
      goto done;
11602
673k
        spacePop(ctxt);
11603
673k
        if (ctxt->nameNr == 0) {
11604
11.2k
      ctxt->instate = XML_PARSER_EPILOG;
11605
661k
        } else {
11606
661k
      ctxt->instate = XML_PARSER_CONTENT;
11607
661k
        }
11608
673k
                    ctxt->progressive = 1;
11609
673k
        break;
11610
673k
    }
11611
1.71M
    if (RAW == '>') {
11612
1.37M
        NEXT;
11613
1.37M
    } else {
11614
345k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11615
345k
           "Couldn't find end of Start Tag %s\n",
11616
345k
           name);
11617
345k
        nodePop(ctxt);
11618
345k
        spacePop(ctxt);
11619
345k
    }
11620
1.71M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11621
11622
1.71M
    ctxt->instate = XML_PARSER_CONTENT;
11623
1.71M
                ctxt->progressive = 1;
11624
1.71M
                break;
11625
2.39M
      }
11626
6.78M
            case XML_PARSER_CONTENT: {
11627
6.78M
    int id;
11628
6.78M
    unsigned long cons;
11629
6.78M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
45.0k
        goto done;
11631
6.74M
    cur = ctxt->input->cur[0];
11632
6.74M
    next = ctxt->input->cur[1];
11633
11634
6.74M
    id = ctxt->input->id;
11635
6.74M
          cons = CUR_CONSUMED;
11636
6.74M
    if ((cur == '<') && (next == '/')) {
11637
698k
        ctxt->instate = XML_PARSER_END_TAG;
11638
698k
        break;
11639
6.04M
          } else if ((cur == '<') && (next == '?')) {
11640
40.1k
        if ((!terminate) &&
11641
40.1k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11642
12.4k
                        ctxt->progressive = XML_PARSER_PI;
11643
12.4k
      goto done;
11644
12.4k
                    }
11645
27.6k
        xmlParsePI(ctxt);
11646
27.6k
        ctxt->instate = XML_PARSER_CONTENT;
11647
27.6k
                    ctxt->progressive = 1;
11648
6.00M
    } else if ((cur == '<') && (next != '!')) {
11649
2.06M
        ctxt->instate = XML_PARSER_START_TAG;
11650
2.06M
        break;
11651
3.93M
    } else if ((cur == '<') && (next == '!') &&
11652
3.93M
               (ctxt->input->cur[2] == '-') &&
11653
3.93M
         (ctxt->input->cur[3] == '-')) {
11654
72.6k
        int term;
11655
11656
72.6k
              if (avail < 4)
11657
0
            goto done;
11658
72.6k
        ctxt->input->cur += 4;
11659
72.6k
        term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11660
72.6k
        ctxt->input->cur -= 4;
11661
72.6k
        if ((!terminate) && (term < 0)) {
11662
19.7k
                        ctxt->progressive = XML_PARSER_COMMENT;
11663
19.7k
      goto done;
11664
19.7k
                    }
11665
52.8k
        xmlParseComment(ctxt);
11666
52.8k
        ctxt->instate = XML_PARSER_CONTENT;
11667
52.8k
                    ctxt->progressive = 1;
11668
3.86M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11669
3.86M
        (ctxt->input->cur[2] == '[') &&
11670
3.86M
        (ctxt->input->cur[3] == 'C') &&
11671
3.86M
        (ctxt->input->cur[4] == 'D') &&
11672
3.86M
        (ctxt->input->cur[5] == 'A') &&
11673
3.86M
        (ctxt->input->cur[6] == 'T') &&
11674
3.86M
        (ctxt->input->cur[7] == 'A') &&
11675
3.86M
        (ctxt->input->cur[8] == '[')) {
11676
20.7k
        SKIP(9);
11677
20.7k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11678
20.7k
        break;
11679
3.84M
    } else if ((cur == '<') && (next == '!') &&
11680
3.84M
               (avail < 9)) {
11681
19.9k
        goto done;
11682
3.82M
    } else if (cur == '&') {
11683
372k
        if ((!terminate) &&
11684
372k
            (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11685
83.3k
      goto done;
11686
289k
        xmlParseReference(ctxt);
11687
3.45M
    } else {
11688
        /* TODO Avoid the extra copy, handle directly !!! */
11689
        /*
11690
         * Goal of the following test is:
11691
         *  - minimize calls to the SAX 'character' callback
11692
         *    when they are mergeable
11693
         *  - handle an problem for isBlank when we only parse
11694
         *    a sequence of blank chars and the next one is
11695
         *    not available to check against '<' presence.
11696
         *  - tries to homogenize the differences in SAX
11697
         *    callbacks between the push and pull versions
11698
         *    of the parser.
11699
         */
11700
3.45M
        if ((ctxt->inputNr == 1) &&
11701
3.45M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11702
2.78M
      if (!terminate) {
11703
2.64M
          if (ctxt->progressive) {
11704
2.64M
        if ((lastlt == NULL) ||
11705
2.64M
            (ctxt->input->cur > lastlt))
11706
259k
            goto done;
11707
2.64M
          } else if (xmlParseLookupSequence(ctxt,
11708
0
                                            '<', 0, 0) < 0) {
11709
0
        goto done;
11710
0
          }
11711
2.64M
      }
11712
2.78M
                    }
11713
3.19M
        ctxt->checkIndex = 0;
11714
3.19M
        xmlParseCharData(ctxt, 0);
11715
3.19M
    }
11716
3.56M
    if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11717
83.8k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11718
83.8k
                    "detected an error in element content\n");
11719
83.8k
        xmlHaltParser(ctxt);
11720
83.8k
        break;
11721
83.8k
    }
11722
3.47M
    break;
11723
3.56M
      }
11724
3.47M
            case XML_PARSER_END_TAG:
11725
733k
    if (avail < 2)
11726
0
        goto done;
11727
733k
    if (!terminate) {
11728
689k
        if (ctxt->progressive) {
11729
            /* > can be found unescaped in attribute values */
11730
689k
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11731
36.7k
          goto done;
11732
689k
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11733
0
      goto done;
11734
0
        }
11735
689k
    }
11736
696k
    if (ctxt->sax2) {
11737
411k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11738
411k
        nameNsPop(ctxt);
11739
411k
    }
11740
284k
#ifdef LIBXML_SAX1_ENABLED
11741
284k
      else
11742
284k
        xmlParseEndTag1(ctxt, 0);
11743
696k
#endif /* LIBXML_SAX1_ENABLED */
11744
696k
    if (ctxt->instate == XML_PARSER_EOF) {
11745
        /* Nothing */
11746
696k
    } else if (ctxt->nameNr == 0) {
11747
101k
        ctxt->instate = XML_PARSER_EPILOG;
11748
595k
    } else {
11749
595k
        ctxt->instate = XML_PARSER_CONTENT;
11750
595k
    }
11751
696k
    break;
11752
73.9k
            case XML_PARSER_CDATA_SECTION: {
11753
          /*
11754
     * The Push mode need to have the SAX callback for
11755
     * cdataBlock merge back contiguous callbacks.
11756
     */
11757
73.9k
    int base;
11758
11759
73.9k
    base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11760
73.9k
    if (base < 0) {
11761
47.1k
        if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11762
32.9k
            int tmp;
11763
11764
32.9k
      tmp = xmlCheckCdataPush(ctxt->input->cur,
11765
32.9k
                              XML_PARSER_BIG_BUFFER_SIZE, 0);
11766
32.9k
      if (tmp < 0) {
11767
2.63k
          tmp = -tmp;
11768
2.63k
          ctxt->input->cur += tmp;
11769
2.63k
          goto encoding_error;
11770
2.63k
      }
11771
30.2k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11772
30.2k
          if (ctxt->sax->cdataBlock != NULL)
11773
17.3k
        ctxt->sax->cdataBlock(ctxt->userData,
11774
17.3k
                              ctxt->input->cur, tmp);
11775
12.8k
          else if (ctxt->sax->characters != NULL)
11776
12.8k
        ctxt->sax->characters(ctxt->userData,
11777
12.8k
                              ctxt->input->cur, tmp);
11778
30.2k
      }
11779
30.2k
      if (ctxt->instate == XML_PARSER_EOF)
11780
0
          goto done;
11781
30.2k
      SKIPL(tmp);
11782
30.2k
      ctxt->checkIndex = 0;
11783
30.2k
        }
11784
44.5k
        goto done;
11785
47.1k
    } else {
11786
26.7k
        int tmp;
11787
11788
26.7k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11789
26.7k
        if ((tmp < 0) || (tmp != base)) {
11790
15.3k
      tmp = -tmp;
11791
15.3k
      ctxt->input->cur += tmp;
11792
15.3k
      goto encoding_error;
11793
15.3k
        }
11794
11.4k
        if ((ctxt->sax != NULL) && (base == 0) &&
11795
11.4k
            (ctxt->sax->cdataBlock != NULL) &&
11796
11.4k
            (!ctxt->disableSAX)) {
11797
      /*
11798
       * Special case to provide identical behaviour
11799
       * between pull and push parsers on enpty CDATA
11800
       * sections
11801
       */
11802
2.25k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11803
2.25k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11804
2.25k
                     "<![CDATA[", 9)))
11805
2.24k
           ctxt->sax->cdataBlock(ctxt->userData,
11806
2.24k
                                 BAD_CAST "", 0);
11807
9.17k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11808
9.17k
      (!ctxt->disableSAX)) {
11809
8.07k
      if (ctxt->sax->cdataBlock != NULL)
11810
6.14k
          ctxt->sax->cdataBlock(ctxt->userData,
11811
6.14k
              ctxt->input->cur, base);
11812
1.92k
      else if (ctxt->sax->characters != NULL)
11813
1.92k
          ctxt->sax->characters(ctxt->userData,
11814
1.92k
              ctxt->input->cur, base);
11815
8.07k
        }
11816
11.4k
        if (ctxt->instate == XML_PARSER_EOF)
11817
0
      goto done;
11818
11.4k
        SKIPL(base + 3);
11819
11.4k
        ctxt->checkIndex = 0;
11820
11.4k
        ctxt->instate = XML_PARSER_CONTENT;
11821
#ifdef DEBUG_PUSH
11822
        xmlGenericError(xmlGenericErrorContext,
11823
          "PP: entering CONTENT\n");
11824
#endif
11825
11.4k
    }
11826
11.4k
    break;
11827
73.9k
      }
11828
551k
            case XML_PARSER_MISC:
11829
551k
    SKIP_BLANKS;
11830
551k
    if (ctxt->input->buf == NULL)
11831
0
        avail = ctxt->input->length -
11832
0
                (ctxt->input->cur - ctxt->input->base);
11833
551k
    else
11834
551k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11835
551k
                (ctxt->input->cur - ctxt->input->base);
11836
551k
    if (avail < 2)
11837
6.56k
        goto done;
11838
544k
    cur = ctxt->input->cur[0];
11839
544k
    next = ctxt->input->cur[1];
11840
544k
          if ((cur == '<') && (next == '?')) {
11841
35.3k
        if ((!terminate) &&
11842
35.3k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11843
3.31k
                        ctxt->progressive = XML_PARSER_PI;
11844
3.31k
      goto done;
11845
3.31k
                    }
11846
#ifdef DEBUG_PUSH
11847
        xmlGenericError(xmlGenericErrorContext,
11848
          "PP: Parsing PI\n");
11849
#endif
11850
32.0k
        xmlParsePI(ctxt);
11851
32.0k
        if (ctxt->instate == XML_PARSER_EOF)
11852
0
      goto done;
11853
32.0k
        ctxt->instate = XML_PARSER_MISC;
11854
32.0k
                    ctxt->progressive = 1;
11855
32.0k
        ctxt->checkIndex = 0;
11856
509k
    } else if ((cur == '<') && (next == '!') &&
11857
509k
        (ctxt->input->cur[2] == '-') &&
11858
509k
        (ctxt->input->cur[3] == '-')) {
11859
22.7k
        if ((!terminate) &&
11860
22.7k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11861
9.49k
                        ctxt->progressive = XML_PARSER_COMMENT;
11862
9.49k
      goto done;
11863
9.49k
                    }
11864
#ifdef DEBUG_PUSH
11865
        xmlGenericError(xmlGenericErrorContext,
11866
          "PP: Parsing Comment\n");
11867
#endif
11868
13.2k
        xmlParseComment(ctxt);
11869
13.2k
        if (ctxt->instate == XML_PARSER_EOF)
11870
0
      goto done;
11871
13.2k
        ctxt->instate = XML_PARSER_MISC;
11872
13.2k
                    ctxt->progressive = 1;
11873
13.2k
        ctxt->checkIndex = 0;
11874
486k
    } else if ((cur == '<') && (next == '!') &&
11875
486k
        (ctxt->input->cur[2] == 'D') &&
11876
486k
        (ctxt->input->cur[3] == 'O') &&
11877
486k
        (ctxt->input->cur[4] == 'C') &&
11878
486k
        (ctxt->input->cur[5] == 'T') &&
11879
486k
        (ctxt->input->cur[6] == 'Y') &&
11880
486k
        (ctxt->input->cur[7] == 'P') &&
11881
486k
        (ctxt->input->cur[8] == 'E')) {
11882
198k
        if ((!terminate) &&
11883
198k
            (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11884
21.0k
                        ctxt->progressive = XML_PARSER_DTD;
11885
21.0k
      goto done;
11886
21.0k
                    }
11887
#ifdef DEBUG_PUSH
11888
        xmlGenericError(xmlGenericErrorContext,
11889
          "PP: Parsing internal subset\n");
11890
#endif
11891
177k
        ctxt->inSubset = 1;
11892
177k
                    ctxt->progressive = 0;
11893
177k
        ctxt->checkIndex = 0;
11894
177k
        xmlParseDocTypeDecl(ctxt);
11895
177k
        if (ctxt->instate == XML_PARSER_EOF)
11896
0
      goto done;
11897
177k
        if (RAW == '[') {
11898
133k
      ctxt->instate = XML_PARSER_DTD;
11899
#ifdef DEBUG_PUSH
11900
      xmlGenericError(xmlGenericErrorContext,
11901
        "PP: entering DTD\n");
11902
#endif
11903
133k
        } else {
11904
      /*
11905
       * Create and update the external subset.
11906
       */
11907
43.5k
      ctxt->inSubset = 2;
11908
43.5k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11909
43.5k
          (ctxt->sax->externalSubset != NULL))
11910
41.4k
          ctxt->sax->externalSubset(ctxt->userData,
11911
41.4k
            ctxt->intSubName, ctxt->extSubSystem,
11912
41.4k
            ctxt->extSubURI);
11913
43.5k
      ctxt->inSubset = 0;
11914
43.5k
      xmlCleanSpecialAttr(ctxt);
11915
43.5k
      ctxt->instate = XML_PARSER_PROLOG;
11916
#ifdef DEBUG_PUSH
11917
      xmlGenericError(xmlGenericErrorContext,
11918
        "PP: entering PROLOG\n");
11919
#endif
11920
43.5k
        }
11921
288k
    } else if ((cur == '<') && (next == '!') &&
11922
288k
               (avail < 9)) {
11923
12.2k
        goto done;
11924
276k
    } else {
11925
276k
        ctxt->instate = XML_PARSER_START_TAG;
11926
276k
        ctxt->progressive = XML_PARSER_START_TAG;
11927
276k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11928
#ifdef DEBUG_PUSH
11929
        xmlGenericError(xmlGenericErrorContext,
11930
          "PP: entering START_TAG\n");
11931
#endif
11932
276k
    }
11933
498k
    break;
11934
498k
            case XML_PARSER_PROLOG:
11935
159k
    SKIP_BLANKS;
11936
159k
    if (ctxt->input->buf == NULL)
11937
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11938
159k
    else
11939
159k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11940
159k
                            (ctxt->input->cur - ctxt->input->base);
11941
159k
    if (avail < 2)
11942
3.15k
        goto done;
11943
156k
    cur = ctxt->input->cur[0];
11944
156k
    next = ctxt->input->cur[1];
11945
156k
          if ((cur == '<') && (next == '?')) {
11946
7.66k
        if ((!terminate) &&
11947
7.66k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11948
2.51k
                        ctxt->progressive = XML_PARSER_PI;
11949
2.51k
      goto done;
11950
2.51k
                    }
11951
#ifdef DEBUG_PUSH
11952
        xmlGenericError(xmlGenericErrorContext,
11953
          "PP: Parsing PI\n");
11954
#endif
11955
5.15k
        xmlParsePI(ctxt);
11956
5.15k
        if (ctxt->instate == XML_PARSER_EOF)
11957
0
      goto done;
11958
5.15k
        ctxt->instate = XML_PARSER_PROLOG;
11959
5.15k
                    ctxt->progressive = 1;
11960
148k
    } else if ((cur == '<') && (next == '!') &&
11961
148k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11962
19.1k
        if ((!terminate) &&
11963
19.1k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11964
2.58k
                        ctxt->progressive = XML_PARSER_COMMENT;
11965
2.58k
      goto done;
11966
2.58k
                    }
11967
#ifdef DEBUG_PUSH
11968
        xmlGenericError(xmlGenericErrorContext,
11969
          "PP: Parsing Comment\n");
11970
#endif
11971
16.5k
        xmlParseComment(ctxt);
11972
16.5k
        if (ctxt->instate == XML_PARSER_EOF)
11973
0
      goto done;
11974
16.5k
        ctxt->instate = XML_PARSER_PROLOG;
11975
16.5k
                    ctxt->progressive = 1;
11976
129k
    } else if ((cur == '<') && (next == '!') &&
11977
129k
               (avail < 4)) {
11978
262
        goto done;
11979
129k
    } else {
11980
129k
        ctxt->instate = XML_PARSER_START_TAG;
11981
129k
        if (ctxt->progressive == 0)
11982
113k
      ctxt->progressive = XML_PARSER_START_TAG;
11983
129k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11984
#ifdef DEBUG_PUSH
11985
        xmlGenericError(xmlGenericErrorContext,
11986
          "PP: entering START_TAG\n");
11987
#endif
11988
129k
    }
11989
151k
    break;
11990
151k
            case XML_PARSER_EPILOG:
11991
121k
    SKIP_BLANKS;
11992
121k
    if (ctxt->input->buf == NULL)
11993
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11994
121k
    else
11995
121k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11996
121k
                            (ctxt->input->cur - ctxt->input->base);
11997
121k
    if (avail < 2)
11998
98.5k
        goto done;
11999
22.6k
    cur = ctxt->input->cur[0];
12000
22.6k
    next = ctxt->input->cur[1];
12001
22.6k
          if ((cur == '<') && (next == '?')) {
12002
4.82k
        if ((!terminate) &&
12003
4.82k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12004
2.26k
                        ctxt->progressive = XML_PARSER_PI;
12005
2.26k
      goto done;
12006
2.26k
                    }
12007
#ifdef DEBUG_PUSH
12008
        xmlGenericError(xmlGenericErrorContext,
12009
          "PP: Parsing PI\n");
12010
#endif
12011
2.55k
        xmlParsePI(ctxt);
12012
2.55k
        if (ctxt->instate == XML_PARSER_EOF)
12013
0
      goto done;
12014
2.55k
        ctxt->instate = XML_PARSER_EPILOG;
12015
2.55k
                    ctxt->progressive = 1;
12016
17.8k
    } else if ((cur == '<') && (next == '!') &&
12017
17.8k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12018
3.61k
        if ((!terminate) &&
12019
3.61k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12020
2.17k
                        ctxt->progressive = XML_PARSER_COMMENT;
12021
2.17k
      goto done;
12022
2.17k
                    }
12023
#ifdef DEBUG_PUSH
12024
        xmlGenericError(xmlGenericErrorContext,
12025
          "PP: Parsing Comment\n");
12026
#endif
12027
1.44k
        xmlParseComment(ctxt);
12028
1.44k
        if (ctxt->instate == XML_PARSER_EOF)
12029
0
      goto done;
12030
1.44k
        ctxt->instate = XML_PARSER_EPILOG;
12031
1.44k
                    ctxt->progressive = 1;
12032
14.2k
    } else if ((cur == '<') && (next == '!') &&
12033
14.2k
               (avail < 4)) {
12034
2.10k
        goto done;
12035
12.1k
    } else {
12036
12.1k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12037
12.1k
        xmlHaltParser(ctxt);
12038
#ifdef DEBUG_PUSH
12039
        xmlGenericError(xmlGenericErrorContext,
12040
          "PP: entering EOF\n");
12041
#endif
12042
12.1k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12043
12.1k
      ctxt->sax->endDocument(ctxt->userData);
12044
12.1k
        goto done;
12045
12.1k
    }
12046
4.00k
    break;
12047
346k
            case XML_PARSER_DTD: {
12048
          /*
12049
     * Sorry but progressive parsing of the internal subset
12050
     * is not expected to be supported. We first check that
12051
     * the full content of the internal subset is available and
12052
     * the parsing is launched only at that point.
12053
     * Internal subset ends up with "']' S? '>'" in an unescaped
12054
     * section and not in a ']]>' sequence which are conditional
12055
     * sections (whoever argued to keep that crap in XML deserve
12056
     * a place in hell !).
12057
     */
12058
346k
    int base, i;
12059
346k
    xmlChar *buf;
12060
346k
          xmlChar quote = 0;
12061
346k
                size_t use;
12062
12063
346k
    base = ctxt->input->cur - ctxt->input->base;
12064
346k
    if (base < 0) return(0);
12065
346k
    if (ctxt->checkIndex > base)
12066
165k
        base = ctxt->checkIndex;
12067
346k
    buf = xmlBufContent(ctxt->input->buf->buffer);
12068
346k
                use = xmlBufUse(ctxt->input->buf->buffer);
12069
154M
    for (;(unsigned int) base < use; base++) {
12070
154M
        if (quote != 0) {
12071
80.7M
            if (buf[base] == quote)
12072
1.04M
          quote = 0;
12073
80.7M
      continue;
12074
80.7M
        }
12075
73.8M
        if ((quote == 0) && (buf[base] == '<')) {
12076
2.05M
            int found  = 0;
12077
      /* special handling of comments */
12078
2.05M
            if (((unsigned int) base + 4 < use) &&
12079
2.05M
          (buf[base + 1] == '!') &&
12080
2.05M
          (buf[base + 2] == '-') &&
12081
2.05M
          (buf[base + 3] == '-')) {
12082
31.0M
          for (;(unsigned int) base + 3 < use; base++) {
12083
30.9M
        if ((buf[base] == '-') &&
12084
30.9M
            (buf[base + 1] == '-') &&
12085
30.9M
            (buf[base + 2] == '>')) {
12086
366k
            found = 1;
12087
366k
            base += 2;
12088
366k
            break;
12089
366k
        }
12090
30.9M
                }
12091
395k
          if (!found) {
12092
#if 0
12093
              fprintf(stderr, "unfinished comment\n");
12094
#endif
12095
28.2k
              break; /* for */
12096
28.2k
                }
12097
366k
                continue;
12098
395k
      }
12099
2.05M
        }
12100
73.4M
        if (buf[base] == '"') {
12101
960k
            quote = '"';
12102
960k
      continue;
12103
960k
        }
12104
72.4M
        if (buf[base] == '\'') {
12105
140k
            quote = '\'';
12106
140k
      continue;
12107
140k
        }
12108
72.3M
        if (buf[base] == ']') {
12109
#if 0
12110
            fprintf(stderr, "%c%c%c%c: ", buf[base],
12111
              buf[base + 1], buf[base + 2], buf[base + 3]);
12112
#endif
12113
141k
            if ((unsigned int) base +1 >= use)
12114
494
          break;
12115
141k
      if (buf[base + 1] == ']') {
12116
          /* conditional crap, skip both ']' ! */
12117
6.39k
          base++;
12118
6.39k
          continue;
12119
6.39k
      }
12120
281k
            for (i = 1; (unsigned int) base + i < use; i++) {
12121
281k
          if (buf[base + i] == '>') {
12122
#if 0
12123
              fprintf(stderr, "found\n");
12124
#endif
12125
118k
              goto found_end_int_subset;
12126
118k
          }
12127
162k
          if (!IS_BLANK_CH(buf[base + i])) {
12128
#if 0
12129
              fprintf(stderr, "not found\n");
12130
#endif
12131
15.8k
              goto not_end_of_int_subset;
12132
15.8k
          }
12133
162k
      }
12134
#if 0
12135
      fprintf(stderr, "end of stream\n");
12136
#endif
12137
199
            break;
12138
12139
134k
        }
12140
72.2M
not_end_of_int_subset:
12141
72.2M
                    continue; /* for */
12142
72.3M
    }
12143
    /*
12144
     * We didn't found the end of the Internal subset
12145
     */
12146
228k
                if (quote == 0)
12147
174k
                    ctxt->checkIndex = base;
12148
53.5k
                else
12149
53.5k
                    ctxt->checkIndex = 0;
12150
#ifdef DEBUG_PUSH
12151
    if (next == 0)
12152
        xmlGenericError(xmlGenericErrorContext,
12153
          "PP: lookup of int subset end filed\n");
12154
#endif
12155
228k
          goto done;
12156
12157
118k
found_end_int_subset:
12158
118k
                ctxt->checkIndex = 0;
12159
118k
    xmlParseInternalSubset(ctxt);
12160
118k
    if (ctxt->instate == XML_PARSER_EOF)
12161
3.36k
        goto done;
12162
115k
    ctxt->inSubset = 2;
12163
115k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12164
115k
        (ctxt->sax->externalSubset != NULL))
12165
98.3k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12166
98.3k
          ctxt->extSubSystem, ctxt->extSubURI);
12167
115k
    ctxt->inSubset = 0;
12168
115k
    xmlCleanSpecialAttr(ctxt);
12169
115k
    if (ctxt->instate == XML_PARSER_EOF)
12170
1.33k
        goto done;
12171
113k
    ctxt->instate = XML_PARSER_PROLOG;
12172
113k
    ctxt->checkIndex = 0;
12173
#ifdef DEBUG_PUSH
12174
    xmlGenericError(xmlGenericErrorContext,
12175
      "PP: entering PROLOG\n");
12176
#endif
12177
113k
                break;
12178
115k
      }
12179
0
            case XML_PARSER_COMMENT:
12180
0
    xmlGenericError(xmlGenericErrorContext,
12181
0
      "PP: internal error, state == COMMENT\n");
12182
0
    ctxt->instate = XML_PARSER_CONTENT;
12183
#ifdef DEBUG_PUSH
12184
    xmlGenericError(xmlGenericErrorContext,
12185
      "PP: entering CONTENT\n");
12186
#endif
12187
0
    break;
12188
0
            case XML_PARSER_IGNORE:
12189
0
    xmlGenericError(xmlGenericErrorContext,
12190
0
      "PP: internal error, state == IGNORE");
12191
0
          ctxt->instate = XML_PARSER_DTD;
12192
#ifdef DEBUG_PUSH
12193
    xmlGenericError(xmlGenericErrorContext,
12194
      "PP: entering DTD\n");
12195
#endif
12196
0
          break;
12197
0
            case XML_PARSER_PI:
12198
0
    xmlGenericError(xmlGenericErrorContext,
12199
0
      "PP: internal error, state == PI\n");
12200
0
    ctxt->instate = XML_PARSER_CONTENT;
12201
#ifdef DEBUG_PUSH
12202
    xmlGenericError(xmlGenericErrorContext,
12203
      "PP: entering CONTENT\n");
12204
#endif
12205
0
    break;
12206
0
            case XML_PARSER_ENTITY_DECL:
12207
0
    xmlGenericError(xmlGenericErrorContext,
12208
0
      "PP: internal error, state == ENTITY_DECL\n");
12209
0
    ctxt->instate = XML_PARSER_DTD;
12210
#ifdef DEBUG_PUSH
12211
    xmlGenericError(xmlGenericErrorContext,
12212
      "PP: entering DTD\n");
12213
#endif
12214
0
    break;
12215
0
            case XML_PARSER_ENTITY_VALUE:
12216
0
    xmlGenericError(xmlGenericErrorContext,
12217
0
      "PP: internal error, state == ENTITY_VALUE\n");
12218
0
    ctxt->instate = XML_PARSER_CONTENT;
12219
#ifdef DEBUG_PUSH
12220
    xmlGenericError(xmlGenericErrorContext,
12221
      "PP: entering DTD\n");
12222
#endif
12223
0
    break;
12224
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12225
0
    xmlGenericError(xmlGenericErrorContext,
12226
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12227
0
    ctxt->instate = XML_PARSER_START_TAG;
12228
#ifdef DEBUG_PUSH
12229
    xmlGenericError(xmlGenericErrorContext,
12230
      "PP: entering START_TAG\n");
12231
#endif
12232
0
    break;
12233
0
            case XML_PARSER_SYSTEM_LITERAL:
12234
0
    xmlGenericError(xmlGenericErrorContext,
12235
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12236
0
    ctxt->instate = XML_PARSER_START_TAG;
12237
#ifdef DEBUG_PUSH
12238
    xmlGenericError(xmlGenericErrorContext,
12239
      "PP: entering START_TAG\n");
12240
#endif
12241
0
    break;
12242
0
            case XML_PARSER_PUBLIC_LITERAL:
12243
0
    xmlGenericError(xmlGenericErrorContext,
12244
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12245
0
    ctxt->instate = XML_PARSER_START_TAG;
12246
#ifdef DEBUG_PUSH
12247
    xmlGenericError(xmlGenericErrorContext,
12248
      "PP: entering START_TAG\n");
12249
#endif
12250
0
    break;
12251
12.4M
  }
12252
12.4M
    }
12253
1.70M
done:
12254
#ifdef DEBUG_PUSH
12255
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12256
#endif
12257
1.70M
    return(ret);
12258
17.9k
encoding_error:
12259
17.9k
    {
12260
17.9k
        char buffer[150];
12261
12262
17.9k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12263
17.9k
      ctxt->input->cur[0], ctxt->input->cur[1],
12264
17.9k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12265
17.9k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12266
17.9k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12267
17.9k
         BAD_CAST buffer, NULL);
12268
17.9k
    }
12269
17.9k
    return(0);
12270
1.92M
}
12271
12272
/**
12273
 * xmlParseCheckTransition:
12274
 * @ctxt:  an XML parser context
12275
 * @chunk:  a char array
12276
 * @size:  the size in byte of the chunk
12277
 *
12278
 * Check depending on the current parser state if the chunk given must be
12279
 * processed immediately or one need more data to advance on parsing.
12280
 *
12281
 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12282
 */
12283
static int
12284
1.88M
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12285
1.88M
    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12286
0
        return(-1);
12287
1.88M
    if (ctxt->instate == XML_PARSER_START_TAG) {
12288
589k
        if (memchr(chunk, '>', size) != NULL)
12289
286k
            return(1);
12290
302k
        return(0);
12291
589k
    }
12292
1.29M
    if (ctxt->progressive == XML_PARSER_COMMENT) {
12293
56.1k
        if (memchr(chunk, '>', size) != NULL)
12294
27.5k
            return(1);
12295
28.6k
        return(0);
12296
56.1k
    }
12297
1.23M
    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12298
107k
        if (memchr(chunk, '>', size) != NULL)
12299
47.4k
            return(1);
12300
59.7k
        return(0);
12301
107k
    }
12302
1.13M
    if (ctxt->progressive == XML_PARSER_PI) {
12303
26.4k
        if (memchr(chunk, '>', size) != NULL)
12304
15.9k
            return(1);
12305
10.4k
        return(0);
12306
26.4k
    }
12307
1.10M
    if (ctxt->instate == XML_PARSER_END_TAG) {
12308
37.8k
        if (memchr(chunk, '>', size) != NULL)
12309
24.6k
            return(1);
12310
13.1k
        return(0);
12311
37.8k
    }
12312
1.06M
    if ((ctxt->progressive == XML_PARSER_DTD) ||
12313
1.06M
        (ctxt->instate == XML_PARSER_DTD)) {
12314
325k
        if (memchr(chunk, '>', size) != NULL)
12315
211k
            return(1);
12316
113k
        return(0);
12317
325k
    }
12318
742k
    return(1);
12319
1.06M
}
12320
12321
/**
12322
 * xmlParseChunk:
12323
 * @ctxt:  an XML parser context
12324
 * @chunk:  an char array
12325
 * @size:  the size in byte of the chunk
12326
 * @terminate:  last chunk indicator
12327
 *
12328
 * Parse a Chunk of memory
12329
 *
12330
 * Returns zero if no error, the xmlParserErrors otherwise.
12331
 */
12332
int
12333
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12334
3.02M
              int terminate) {
12335
3.02M
    int end_in_lf = 0;
12336
3.02M
    int remain = 0;
12337
3.02M
    size_t old_avail = 0;
12338
3.02M
    size_t avail = 0;
12339
12340
3.02M
    if (ctxt == NULL)
12341
0
        return(XML_ERR_INTERNAL_ERROR);
12342
3.02M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12343
578k
        return(ctxt->errNo);
12344
2.44M
    if (ctxt->instate == XML_PARSER_EOF)
12345
184
        return(-1);
12346
2.44M
    if (ctxt->instate == XML_PARSER_START)
12347
577k
        xmlDetectSAX2(ctxt);
12348
2.44M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12349
2.44M
        (chunk[size - 1] == '\r')) {
12350
4.03k
  end_in_lf = 1;
12351
4.03k
  size--;
12352
4.03k
    }
12353
12354
2.45M
xmldecl_done:
12355
12356
2.45M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12357
2.45M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12358
2.14M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12359
2.14M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12360
2.14M
  int res;
12361
12362
2.14M
        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12363
        /*
12364
         * Specific handling if we autodetected an encoding, we should not
12365
         * push more than the first line ... which depend on the encoding
12366
         * And only push the rest once the final encoding was detected
12367
         */
12368
2.14M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12369
2.14M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12370
24.5k
            unsigned int len = 45;
12371
12372
24.5k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12373
24.5k
                               BAD_CAST "UTF-16")) ||
12374
24.5k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12375
6.91k
                               BAD_CAST "UTF16")))
12376
17.6k
                len = 90;
12377
6.91k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12378
6.91k
                                    BAD_CAST "UCS-4")) ||
12379
6.91k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12380
6.75k
                                    BAD_CAST "UCS4")))
12381
154
                len = 180;
12382
12383
24.5k
            if (ctxt->input->buf->rawconsumed < len)
12384
2.20k
                len -= ctxt->input->buf->rawconsumed;
12385
12386
            /*
12387
             * Change size for reading the initial declaration only
12388
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12389
             * will blindly copy extra bytes from memory.
12390
             */
12391
24.5k
            if ((unsigned int) size > len) {
12392
16.5k
                remain = size - len;
12393
16.5k
                size = len;
12394
16.5k
            } else {
12395
8.00k
                remain = 0;
12396
8.00k
            }
12397
24.5k
        }
12398
2.14M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12399
2.14M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12400
2.14M
  if (res < 0) {
12401
599
      ctxt->errNo = XML_PARSER_EOF;
12402
599
      xmlHaltParser(ctxt);
12403
599
      return (XML_PARSER_EOF);
12404
599
  }
12405
#ifdef DEBUG_PUSH
12406
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12407
#endif
12408
12409
2.14M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12410
315k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12411
315k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12412
315k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12413
315k
        (in->raw != NULL)) {
12414
11.2k
    int nbchars;
12415
11.2k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12416
11.2k
    size_t current = ctxt->input->cur - ctxt->input->base;
12417
12418
11.2k
    nbchars = xmlCharEncInput(in, terminate);
12419
11.2k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12420
11.2k
    if (nbchars < 0) {
12421
        /* TODO 2.6.0 */
12422
421
        xmlGenericError(xmlGenericErrorContext,
12423
421
            "xmlParseChunk: encoder error\n");
12424
421
                    xmlHaltParser(ctxt);
12425
421
        return(XML_ERR_INVALID_ENCODING);
12426
421
    }
12427
11.2k
      }
12428
315k
  }
12429
315k
    }
12430
2.45M
    if (remain != 0) {
12431
16.3k
        xmlParseTryOrFinish(ctxt, 0);
12432
2.44M
    } else {
12433
2.44M
        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12434
2.44M
            avail = xmlBufUse(ctxt->input->buf->buffer);
12435
        /*
12436
         * Depending on the current state it may not be such
12437
         * a good idea to try parsing if there is nothing in the chunk
12438
         * which would be worth doing a parser state transition and we
12439
         * need to wait for more data
12440
         */
12441
2.44M
        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12442
2.44M
            (old_avail == 0) || (avail == 0) ||
12443
2.44M
            (xmlParseCheckTransition(ctxt,
12444
1.88M
                       (const char *)&ctxt->input->base[old_avail],
12445
1.88M
                                     avail - old_avail)))
12446
1.91M
            xmlParseTryOrFinish(ctxt, terminate);
12447
2.44M
    }
12448
2.45M
    if (ctxt->instate == XML_PARSER_EOF)
12449
163k
        return(ctxt->errNo);
12450
12451
2.29M
    if ((ctxt->input != NULL) &&
12452
2.29M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12453
2.29M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12454
2.29M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12455
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12456
0
        xmlHaltParser(ctxt);
12457
0
    }
12458
2.29M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12459
95.0k
        return(ctxt->errNo);
12460
12461
2.19M
    if (remain != 0) {
12462
15.8k
        chunk += size;
12463
15.8k
        size = remain;
12464
15.8k
        remain = 0;
12465
15.8k
        goto xmldecl_done;
12466
15.8k
    }
12467
2.18M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12468
2.18M
        (ctxt->input->buf != NULL)) {
12469
3.25k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12470
3.25k
           ctxt->input);
12471
3.25k
  size_t current = ctxt->input->cur - ctxt->input->base;
12472
12473
3.25k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12474
12475
3.25k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12476
3.25k
            base, current);
12477
3.25k
    }
12478
2.18M
    if (terminate) {
12479
  /*
12480
   * Check for termination
12481
   */
12482
158k
  int cur_avail = 0;
12483
12484
158k
  if (ctxt->input != NULL) {
12485
158k
      if (ctxt->input->buf == NULL)
12486
0
    cur_avail = ctxt->input->length -
12487
0
          (ctxt->input->cur - ctxt->input->base);
12488
158k
      else
12489
158k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12490
158k
                    (ctxt->input->cur - ctxt->input->base);
12491
158k
  }
12492
12493
158k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12494
158k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12495
66.3k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12496
66.3k
  }
12497
158k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12498
924
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12499
924
  }
12500
158k
  if (ctxt->instate != XML_PARSER_EOF) {
12501
158k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12502
158k
    ctxt->sax->endDocument(ctxt->userData);
12503
158k
  }
12504
158k
  ctxt->instate = XML_PARSER_EOF;
12505
158k
    }
12506
2.18M
    if (ctxt->wellFormed == 0)
12507
627k
  return((xmlParserErrors) ctxt->errNo);
12508
1.55M
    else
12509
1.55M
        return(0);
12510
2.18M
}
12511
12512
/************************************************************************
12513
 *                  *
12514
 *    I/O front end functions to the parser     *
12515
 *                  *
12516
 ************************************************************************/
12517
12518
/**
12519
 * xmlCreatePushParserCtxt:
12520
 * @sax:  a SAX handler
12521
 * @user_data:  The user data returned on SAX callbacks
12522
 * @chunk:  a pointer to an array of chars
12523
 * @size:  number of chars in the array
12524
 * @filename:  an optional file name or URI
12525
 *
12526
 * Create a parser context for using the XML parser in push mode.
12527
 * If @buffer and @size are non-NULL, the data is used to detect
12528
 * the encoding.  The remaining characters will be parsed so they
12529
 * don't need to be fed in again through xmlParseChunk.
12530
 * To allow content encoding detection, @size should be >= 4
12531
 * The value of @filename is used for fetching external entities
12532
 * and error/warning reports.
12533
 *
12534
 * Returns the new parser context or NULL
12535
 */
12536
12537
xmlParserCtxtPtr
12538
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12539
473k
                        const char *chunk, int size, const char *filename) {
12540
473k
    xmlParserCtxtPtr ctxt;
12541
473k
    xmlParserInputPtr inputStream;
12542
473k
    xmlParserInputBufferPtr buf;
12543
473k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12544
12545
    /*
12546
     * plug some encoding conversion routines
12547
     */
12548
473k
    if ((chunk != NULL) && (size >= 4))
12549
234k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12550
12551
473k
    buf = xmlAllocParserInputBuffer(enc);
12552
473k
    if (buf == NULL) return(NULL);
12553
12554
473k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12555
473k
    if (ctxt == NULL) {
12556
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12557
0
  xmlFreeParserInputBuffer(buf);
12558
0
  return(NULL);
12559
0
    }
12560
473k
    ctxt->dictNames = 1;
12561
473k
    if (filename == NULL) {
12562
235k
  ctxt->directory = NULL;
12563
237k
    } else {
12564
237k
        ctxt->directory = xmlParserGetDirectory(filename);
12565
237k
    }
12566
12567
473k
    inputStream = xmlNewInputStream(ctxt);
12568
473k
    if (inputStream == NULL) {
12569
0
  xmlFreeParserCtxt(ctxt);
12570
0
  xmlFreeParserInputBuffer(buf);
12571
0
  return(NULL);
12572
0
    }
12573
12574
473k
    if (filename == NULL)
12575
235k
  inputStream->filename = NULL;
12576
237k
    else {
12577
237k
  inputStream->filename = (char *)
12578
237k
      xmlCanonicPath((const xmlChar *) filename);
12579
237k
  if (inputStream->filename == NULL) {
12580
0
      xmlFreeParserCtxt(ctxt);
12581
0
      xmlFreeParserInputBuffer(buf);
12582
0
      return(NULL);
12583
0
  }
12584
237k
    }
12585
473k
    inputStream->buf = buf;
12586
473k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12587
473k
    inputPush(ctxt, inputStream);
12588
12589
    /*
12590
     * If the caller didn't provide an initial 'chunk' for determining
12591
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12592
     * that it can be automatically determined later
12593
     */
12594
473k
    if ((size == 0) || (chunk == NULL)) {
12595
238k
  ctxt->charset = XML_CHAR_ENCODING_NONE;
12596
238k
    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12597
234k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12598
234k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12599
12600
234k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12601
12602
234k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12603
#ifdef DEBUG_PUSH
12604
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12605
#endif
12606
234k
    }
12607
12608
473k
    if (enc != XML_CHAR_ENCODING_NONE) {
12609
87.7k
        xmlSwitchEncoding(ctxt, enc);
12610
87.7k
    }
12611
12612
473k
    return(ctxt);
12613
473k
}
12614
#endif /* LIBXML_PUSH_ENABLED */
12615
12616
/**
12617
 * xmlHaltParser:
12618
 * @ctxt:  an XML parser context
12619
 *
12620
 * Blocks further parser processing don't override error
12621
 * for internal use
12622
 */
12623
static void
12624
1.17M
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12625
1.17M
    if (ctxt == NULL)
12626
0
        return;
12627
1.17M
    ctxt->instate = XML_PARSER_EOF;
12628
1.17M
    ctxt->disableSAX = 1;
12629
1.17M
    while (ctxt->inputNr > 1)
12630
1.50k
        xmlFreeInputStream(inputPop(ctxt));
12631
1.17M
    if (ctxt->input != NULL) {
12632
        /*
12633
   * in case there was a specific allocation deallocate before
12634
   * overriding base
12635
   */
12636
1.17M
        if (ctxt->input->free != NULL) {
12637
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12638
0
      ctxt->input->free = NULL;
12639
0
  }
12640
1.17M
        if (ctxt->input->buf != NULL) {
12641
1.06M
            xmlFreeParserInputBuffer(ctxt->input->buf);
12642
1.06M
            ctxt->input->buf = NULL;
12643
1.06M
        }
12644
1.17M
  ctxt->input->cur = BAD_CAST"";
12645
1.17M
        ctxt->input->length = 0;
12646
1.17M
  ctxt->input->base = ctxt->input->cur;
12647
1.17M
        ctxt->input->end = ctxt->input->cur;
12648
1.17M
    }
12649
1.17M
}
12650
12651
/**
12652
 * xmlStopParser:
12653
 * @ctxt:  an XML parser context
12654
 *
12655
 * Blocks further parser processing
12656
 */
12657
void
12658
236k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12659
236k
    if (ctxt == NULL)
12660
0
        return;
12661
236k
    xmlHaltParser(ctxt);
12662
236k
    ctxt->errNo = XML_ERR_USER_STOP;
12663
236k
}
12664
12665
/**
12666
 * xmlCreateIOParserCtxt:
12667
 * @sax:  a SAX handler
12668
 * @user_data:  The user data returned on SAX callbacks
12669
 * @ioread:  an I/O read function
12670
 * @ioclose:  an I/O close function
12671
 * @ioctx:  an I/O handler
12672
 * @enc:  the charset encoding if known
12673
 *
12674
 * Create a parser context for using the XML parser with an existing
12675
 * I/O stream
12676
 *
12677
 * Returns the new parser context or NULL
12678
 */
12679
xmlParserCtxtPtr
12680
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12681
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12682
0
  void *ioctx, xmlCharEncoding enc) {
12683
0
    xmlParserCtxtPtr ctxt;
12684
0
    xmlParserInputPtr inputStream;
12685
0
    xmlParserInputBufferPtr buf;
12686
12687
0
    if (ioread == NULL) return(NULL);
12688
12689
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12690
0
    if (buf == NULL) {
12691
0
        if (ioclose != NULL)
12692
0
            ioclose(ioctx);
12693
0
        return (NULL);
12694
0
    }
12695
12696
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12697
0
    if (ctxt == NULL) {
12698
0
  xmlFreeParserInputBuffer(buf);
12699
0
  return(NULL);
12700
0
    }
12701
12702
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12703
0
    if (inputStream == NULL) {
12704
0
  xmlFreeParserCtxt(ctxt);
12705
0
  return(NULL);
12706
0
    }
12707
0
    inputPush(ctxt, inputStream);
12708
12709
0
    return(ctxt);
12710
0
}
12711
12712
#ifdef LIBXML_VALID_ENABLED
12713
/************************************************************************
12714
 *                  *
12715
 *    Front ends when parsing a DTD       *
12716
 *                  *
12717
 ************************************************************************/
12718
12719
/**
12720
 * xmlIOParseDTD:
12721
 * @sax:  the SAX handler block or NULL
12722
 * @input:  an Input Buffer
12723
 * @enc:  the charset encoding if known
12724
 *
12725
 * Load and parse a DTD
12726
 *
12727
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12728
 * @input will be freed by the function in any case.
12729
 */
12730
12731
xmlDtdPtr
12732
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12733
0
        xmlCharEncoding enc) {
12734
0
    xmlDtdPtr ret = NULL;
12735
0
    xmlParserCtxtPtr ctxt;
12736
0
    xmlParserInputPtr pinput = NULL;
12737
0
    xmlChar start[4];
12738
12739
0
    if (input == NULL)
12740
0
  return(NULL);
12741
12742
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12743
0
    if (ctxt == NULL) {
12744
0
        xmlFreeParserInputBuffer(input);
12745
0
  return(NULL);
12746
0
    }
12747
12748
    /* We are loading a DTD */
12749
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12750
12751
0
    xmlDetectSAX2(ctxt);
12752
12753
    /*
12754
     * generate a parser input from the I/O handler
12755
     */
12756
12757
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12758
0
    if (pinput == NULL) {
12759
0
        xmlFreeParserInputBuffer(input);
12760
0
  xmlFreeParserCtxt(ctxt);
12761
0
  return(NULL);
12762
0
    }
12763
12764
    /*
12765
     * plug some encoding conversion routines here.
12766
     */
12767
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12768
0
  xmlFreeParserCtxt(ctxt);
12769
0
  return(NULL);
12770
0
    }
12771
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12772
0
        xmlSwitchEncoding(ctxt, enc);
12773
0
    }
12774
12775
0
    pinput->filename = NULL;
12776
0
    pinput->line = 1;
12777
0
    pinput->col = 1;
12778
0
    pinput->base = ctxt->input->cur;
12779
0
    pinput->cur = ctxt->input->cur;
12780
0
    pinput->free = NULL;
12781
12782
    /*
12783
     * let's parse that entity knowing it's an external subset.
12784
     */
12785
0
    ctxt->inSubset = 2;
12786
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12787
0
    if (ctxt->myDoc == NULL) {
12788
0
  xmlErrMemory(ctxt, "New Doc failed");
12789
0
  return(NULL);
12790
0
    }
12791
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12792
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793
0
                                 BAD_CAST "none", BAD_CAST "none");
12794
12795
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12796
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12797
  /*
12798
   * Get the 4 first bytes and decode the charset
12799
   * if enc != XML_CHAR_ENCODING_NONE
12800
   * plug some encoding conversion routines.
12801
   */
12802
0
  start[0] = RAW;
12803
0
  start[1] = NXT(1);
12804
0
  start[2] = NXT(2);
12805
0
  start[3] = NXT(3);
12806
0
  enc = xmlDetectCharEncoding(start, 4);
12807
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12808
0
      xmlSwitchEncoding(ctxt, enc);
12809
0
  }
12810
0
    }
12811
12812
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12813
12814
0
    if (ctxt->myDoc != NULL) {
12815
0
  if (ctxt->wellFormed) {
12816
0
      ret = ctxt->myDoc->extSubset;
12817
0
      ctxt->myDoc->extSubset = NULL;
12818
0
      if (ret != NULL) {
12819
0
    xmlNodePtr tmp;
12820
12821
0
    ret->doc = NULL;
12822
0
    tmp = ret->children;
12823
0
    while (tmp != NULL) {
12824
0
        tmp->doc = NULL;
12825
0
        tmp = tmp->next;
12826
0
    }
12827
0
      }
12828
0
  } else {
12829
0
      ret = NULL;
12830
0
  }
12831
0
        xmlFreeDoc(ctxt->myDoc);
12832
0
        ctxt->myDoc = NULL;
12833
0
    }
12834
0
    xmlFreeParserCtxt(ctxt);
12835
12836
0
    return(ret);
12837
0
}
12838
12839
/**
12840
 * xmlSAXParseDTD:
12841
 * @sax:  the SAX handler block
12842
 * @ExternalID:  a NAME* containing the External ID of the DTD
12843
 * @SystemID:  a NAME* containing the URL to the DTD
12844
 *
12845
 * DEPRECATED: Don't use.
12846
 *
12847
 * Load and parse an external subset.
12848
 *
12849
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12850
 */
12851
12852
xmlDtdPtr
12853
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12854
0
                          const xmlChar *SystemID) {
12855
0
    xmlDtdPtr ret = NULL;
12856
0
    xmlParserCtxtPtr ctxt;
12857
0
    xmlParserInputPtr input = NULL;
12858
0
    xmlCharEncoding enc;
12859
0
    xmlChar* systemIdCanonic;
12860
12861
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12862
12863
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12864
0
    if (ctxt == NULL) {
12865
0
  return(NULL);
12866
0
    }
12867
12868
    /* We are loading a DTD */
12869
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12870
12871
    /*
12872
     * Canonicalise the system ID
12873
     */
12874
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12875
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12876
0
  xmlFreeParserCtxt(ctxt);
12877
0
  return(NULL);
12878
0
    }
12879
12880
    /*
12881
     * Ask the Entity resolver to load the damn thing
12882
     */
12883
12884
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12885
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12886
0
                                   systemIdCanonic);
12887
0
    if (input == NULL) {
12888
0
  xmlFreeParserCtxt(ctxt);
12889
0
  if (systemIdCanonic != NULL)
12890
0
      xmlFree(systemIdCanonic);
12891
0
  return(NULL);
12892
0
    }
12893
12894
    /*
12895
     * plug some encoding conversion routines here.
12896
     */
12897
0
    if (xmlPushInput(ctxt, input) < 0) {
12898
0
  xmlFreeParserCtxt(ctxt);
12899
0
  if (systemIdCanonic != NULL)
12900
0
      xmlFree(systemIdCanonic);
12901
0
  return(NULL);
12902
0
    }
12903
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12904
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12905
0
  xmlSwitchEncoding(ctxt, enc);
12906
0
    }
12907
12908
0
    if (input->filename == NULL)
12909
0
  input->filename = (char *) systemIdCanonic;
12910
0
    else
12911
0
  xmlFree(systemIdCanonic);
12912
0
    input->line = 1;
12913
0
    input->col = 1;
12914
0
    input->base = ctxt->input->cur;
12915
0
    input->cur = ctxt->input->cur;
12916
0
    input->free = NULL;
12917
12918
    /*
12919
     * let's parse that entity knowing it's an external subset.
12920
     */
12921
0
    ctxt->inSubset = 2;
12922
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12923
0
    if (ctxt->myDoc == NULL) {
12924
0
  xmlErrMemory(ctxt, "New Doc failed");
12925
0
  xmlFreeParserCtxt(ctxt);
12926
0
  return(NULL);
12927
0
    }
12928
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12929
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12930
0
                                 ExternalID, SystemID);
12931
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12932
12933
0
    if (ctxt->myDoc != NULL) {
12934
0
  if (ctxt->wellFormed) {
12935
0
      ret = ctxt->myDoc->extSubset;
12936
0
      ctxt->myDoc->extSubset = NULL;
12937
0
      if (ret != NULL) {
12938
0
    xmlNodePtr tmp;
12939
12940
0
    ret->doc = NULL;
12941
0
    tmp = ret->children;
12942
0
    while (tmp != NULL) {
12943
0
        tmp->doc = NULL;
12944
0
        tmp = tmp->next;
12945
0
    }
12946
0
      }
12947
0
  } else {
12948
0
      ret = NULL;
12949
0
  }
12950
0
        xmlFreeDoc(ctxt->myDoc);
12951
0
        ctxt->myDoc = NULL;
12952
0
    }
12953
0
    xmlFreeParserCtxt(ctxt);
12954
12955
0
    return(ret);
12956
0
}
12957
12958
12959
/**
12960
 * xmlParseDTD:
12961
 * @ExternalID:  a NAME* containing the External ID of the DTD
12962
 * @SystemID:  a NAME* containing the URL to the DTD
12963
 *
12964
 * Load and parse an external subset.
12965
 *
12966
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12967
 */
12968
12969
xmlDtdPtr
12970
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12971
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12972
0
}
12973
#endif /* LIBXML_VALID_ENABLED */
12974
12975
/************************************************************************
12976
 *                  *
12977
 *    Front ends when parsing an Entity     *
12978
 *                  *
12979
 ************************************************************************/
12980
12981
/**
12982
 * xmlParseCtxtExternalEntity:
12983
 * @ctx:  the existing parsing context
12984
 * @URL:  the URL for the entity to load
12985
 * @ID:  the System ID for the entity to load
12986
 * @lst:  the return value for the set of parsed nodes
12987
 *
12988
 * Parse an external general entity within an existing parsing context
12989
 * An external general parsed entity is well-formed if it matches the
12990
 * production labeled extParsedEnt.
12991
 *
12992
 * [78] extParsedEnt ::= TextDecl? content
12993
 *
12994
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12995
 *    the parser error code otherwise
12996
 */
12997
12998
int
12999
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13000
0
                 const xmlChar *ID, xmlNodePtr *lst) {
13001
0
    void *userData;
13002
13003
0
    if (ctx == NULL) return(-1);
13004
    /*
13005
     * If the user provided their own SAX callbacks, then reuse the
13006
     * userData callback field, otherwise the expected setup in a
13007
     * DOM builder is to have userData == ctxt
13008
     */
13009
0
    if (ctx->userData == ctx)
13010
0
        userData = NULL;
13011
0
    else
13012
0
        userData = ctx->userData;
13013
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
13014
0
                                         userData, ctx->depth + 1,
13015
0
                                         URL, ID, lst);
13016
0
}
13017
13018
/**
13019
 * xmlParseExternalEntityPrivate:
13020
 * @doc:  the document the chunk pertains to
13021
 * @oldctxt:  the previous parser context if available
13022
 * @sax:  the SAX handler block (possibly NULL)
13023
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13024
 * @depth:  Used for loop detection, use 0
13025
 * @URL:  the URL for the entity to load
13026
 * @ID:  the System ID for the entity to load
13027
 * @list:  the return value for the set of parsed nodes
13028
 *
13029
 * Private version of xmlParseExternalEntity()
13030
 *
13031
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13032
 *    the parser error code otherwise
13033
 */
13034
13035
static xmlParserErrors
13036
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13037
                xmlSAXHandlerPtr sax,
13038
          void *user_data, int depth, const xmlChar *URL,
13039
2.17M
          const xmlChar *ID, xmlNodePtr *list) {
13040
2.17M
    xmlParserCtxtPtr ctxt;
13041
2.17M
    xmlDocPtr newDoc;
13042
2.17M
    xmlNodePtr newRoot;
13043
2.17M
    xmlParserErrors ret = XML_ERR_OK;
13044
2.17M
    xmlChar start[4];
13045
2.17M
    xmlCharEncoding enc;
13046
13047
2.17M
    if (((depth > 40) &&
13048
2.17M
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13049
2.17M
  (depth > 1024)) {
13050
3.40k
  return(XML_ERR_ENTITY_LOOP);
13051
3.40k
    }
13052
13053
2.16M
    if (list != NULL)
13054
2.14M
        *list = NULL;
13055
2.16M
    if ((URL == NULL) && (ID == NULL))
13056
269
  return(XML_ERR_INTERNAL_ERROR);
13057
2.16M
    if (doc == NULL)
13058
0
  return(XML_ERR_INTERNAL_ERROR);
13059
13060
2.16M
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
13061
2.16M
                                             oldctxt);
13062
2.16M
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13063
2.11M
    xmlDetectSAX2(ctxt);
13064
13065
2.11M
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13066
2.11M
    if (newDoc == NULL) {
13067
0
  xmlFreeParserCtxt(ctxt);
13068
0
  return(XML_ERR_INTERNAL_ERROR);
13069
0
    }
13070
2.11M
    newDoc->properties = XML_DOC_INTERNAL;
13071
2.11M
    if (doc) {
13072
2.11M
        newDoc->intSubset = doc->intSubset;
13073
2.11M
        newDoc->extSubset = doc->extSubset;
13074
2.11M
        if (doc->dict) {
13075
1.01M
            newDoc->dict = doc->dict;
13076
1.01M
            xmlDictReference(newDoc->dict);
13077
1.01M
        }
13078
2.11M
        if (doc->URL != NULL) {
13079
1.38M
            newDoc->URL = xmlStrdup(doc->URL);
13080
1.38M
        }
13081
2.11M
    }
13082
2.11M
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13083
2.11M
    if (newRoot == NULL) {
13084
0
  if (sax != NULL)
13085
0
  xmlFreeParserCtxt(ctxt);
13086
0
  newDoc->intSubset = NULL;
13087
0
  newDoc->extSubset = NULL;
13088
0
        xmlFreeDoc(newDoc);
13089
0
  return(XML_ERR_INTERNAL_ERROR);
13090
0
    }
13091
2.11M
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13092
2.11M
    nodePush(ctxt, newDoc->children);
13093
2.11M
    if (doc == NULL) {
13094
0
        ctxt->myDoc = newDoc;
13095
2.11M
    } else {
13096
2.11M
        ctxt->myDoc = doc;
13097
2.11M
        newRoot->doc = doc;
13098
2.11M
    }
13099
13100
    /*
13101
     * Get the 4 first bytes and decode the charset
13102
     * if enc != XML_CHAR_ENCODING_NONE
13103
     * plug some encoding conversion routines.
13104
     */
13105
2.11M
    GROW;
13106
2.11M
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13107
2.10M
  start[0] = RAW;
13108
2.10M
  start[1] = NXT(1);
13109
2.10M
  start[2] = NXT(2);
13110
2.10M
  start[3] = NXT(3);
13111
2.10M
  enc = xmlDetectCharEncoding(start, 4);
13112
2.10M
  if (enc != XML_CHAR_ENCODING_NONE) {
13113
12.7k
      xmlSwitchEncoding(ctxt, enc);
13114
12.7k
  }
13115
2.10M
    }
13116
13117
    /*
13118
     * Parse a possible text declaration first
13119
     */
13120
2.11M
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13121
9.14k
  xmlParseTextDecl(ctxt);
13122
        /*
13123
         * An XML-1.0 document can't reference an entity not XML-1.0
13124
         */
13125
9.14k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13126
9.14k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13127
96
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13128
96
                           "Version mismatch between document and entity\n");
13129
96
        }
13130
9.14k
    }
13131
13132
2.11M
    ctxt->instate = XML_PARSER_CONTENT;
13133
2.11M
    ctxt->depth = depth;
13134
2.11M
    if (oldctxt != NULL) {
13135
2.11M
  ctxt->_private = oldctxt->_private;
13136
2.11M
  ctxt->loadsubset = oldctxt->loadsubset;
13137
2.11M
  ctxt->validate = oldctxt->validate;
13138
2.11M
  ctxt->valid = oldctxt->valid;
13139
2.11M
  ctxt->replaceEntities = oldctxt->replaceEntities;
13140
2.11M
        if (oldctxt->validate) {
13141
1.95M
            ctxt->vctxt.error = oldctxt->vctxt.error;
13142
1.95M
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
13143
1.95M
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
13144
1.95M
        }
13145
2.11M
  ctxt->external = oldctxt->external;
13146
2.11M
        if (ctxt->dict) xmlDictFree(ctxt->dict);
13147
2.11M
        ctxt->dict = oldctxt->dict;
13148
2.11M
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13149
2.11M
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13150
2.11M
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13151
2.11M
        ctxt->dictNames = oldctxt->dictNames;
13152
2.11M
        ctxt->attsDefault = oldctxt->attsDefault;
13153
2.11M
        ctxt->attsSpecial = oldctxt->attsSpecial;
13154
2.11M
        ctxt->linenumbers = oldctxt->linenumbers;
13155
2.11M
  ctxt->record_info = oldctxt->record_info;
13156
2.11M
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13157
2.11M
  ctxt->node_seq.length = oldctxt->node_seq.length;
13158
2.11M
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13159
2.11M
    } else {
13160
  /*
13161
   * Doing validity checking on chunk without context
13162
   * doesn't make sense
13163
   */
13164
0
  ctxt->_private = NULL;
13165
0
  ctxt->validate = 0;
13166
0
  ctxt->external = 2;
13167
0
  ctxt->loadsubset = 0;
13168
0
    }
13169
13170
2.11M
    xmlParseContent(ctxt);
13171
13172
2.11M
    if ((RAW == '<') && (NXT(1) == '/')) {
13173
93.6k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13174
2.01M
    } else if (RAW != 0) {
13175
1.58k
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13176
1.58k
    }
13177
2.11M
    if (ctxt->node != newDoc->children) {
13178
1.55M
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13179
1.55M
    }
13180
13181
2.11M
    if (!ctxt->wellFormed) {
13182
2.09M
        if (ctxt->errNo == 0)
13183
0
      ret = XML_ERR_INTERNAL_ERROR;
13184
2.09M
  else
13185
2.09M
      ret = (xmlParserErrors)ctxt->errNo;
13186
2.09M
    } else {
13187
14.6k
  if (list != NULL) {
13188
11.5k
      xmlNodePtr cur;
13189
13190
      /*
13191
       * Return the newly created nodeset after unlinking it from
13192
       * they pseudo parent.
13193
       */
13194
11.5k
      cur = newDoc->children->children;
13195
11.5k
      *list = cur;
13196
28.7k
      while (cur != NULL) {
13197
17.2k
    cur->parent = NULL;
13198
17.2k
    cur = cur->next;
13199
17.2k
      }
13200
11.5k
            newDoc->children->children = NULL;
13201
11.5k
  }
13202
14.6k
  ret = XML_ERR_OK;
13203
14.6k
    }
13204
13205
    /*
13206
     * Record in the parent context the number of entities replacement
13207
     * done when parsing that reference.
13208
     */
13209
2.11M
    if (oldctxt != NULL)
13210
2.11M
        oldctxt->nbentities += ctxt->nbentities;
13211
13212
    /*
13213
     * Also record the size of the entity parsed
13214
     */
13215
2.11M
    if (ctxt->input != NULL && oldctxt != NULL) {
13216
2.11M
  oldctxt->sizeentities += ctxt->input->consumed;
13217
2.11M
  oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13218
2.11M
    }
13219
    /*
13220
     * And record the last error if any
13221
     */
13222
2.11M
    if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13223
2.09M
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13224
13225
2.11M
    if (oldctxt != NULL) {
13226
2.11M
        ctxt->dict = NULL;
13227
2.11M
        ctxt->attsDefault = NULL;
13228
2.11M
        ctxt->attsSpecial = NULL;
13229
2.11M
        oldctxt->validate = ctxt->validate;
13230
2.11M
        oldctxt->valid = ctxt->valid;
13231
2.11M
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13232
2.11M
        oldctxt->node_seq.length = ctxt->node_seq.length;
13233
2.11M
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13234
2.11M
    }
13235
2.11M
    ctxt->node_seq.maximum = 0;
13236
2.11M
    ctxt->node_seq.length = 0;
13237
2.11M
    ctxt->node_seq.buffer = NULL;
13238
2.11M
    xmlFreeParserCtxt(ctxt);
13239
2.11M
    newDoc->intSubset = NULL;
13240
2.11M
    newDoc->extSubset = NULL;
13241
2.11M
    xmlFreeDoc(newDoc);
13242
13243
2.11M
    return(ret);
13244
2.11M
}
13245
13246
#ifdef LIBXML_SAX1_ENABLED
13247
/**
13248
 * xmlParseExternalEntity:
13249
 * @doc:  the document the chunk pertains to
13250
 * @sax:  the SAX handler block (possibly NULL)
13251
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13252
 * @depth:  Used for loop detection, use 0
13253
 * @URL:  the URL for the entity to load
13254
 * @ID:  the System ID for the entity to load
13255
 * @lst:  the return value for the set of parsed nodes
13256
 *
13257
 * Parse an external general entity
13258
 * An external general parsed entity is well-formed if it matches the
13259
 * production labeled extParsedEnt.
13260
 *
13261
 * [78] extParsedEnt ::= TextDecl? content
13262
 *
13263
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13264
 *    the parser error code otherwise
13265
 */
13266
13267
int
13268
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13269
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13270
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13271
0
                           ID, lst));
13272
0
}
13273
13274
/**
13275
 * xmlParseBalancedChunkMemory:
13276
 * @doc:  the document the chunk pertains to (must not be NULL)
13277
 * @sax:  the SAX handler block (possibly NULL)
13278
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13279
 * @depth:  Used for loop detection, use 0
13280
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13281
 * @lst:  the return value for the set of parsed nodes
13282
 *
13283
 * Parse a well-balanced chunk of an XML document
13284
 * called by the parser
13285
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13286
 * the content production in the XML grammar:
13287
 *
13288
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13289
 *
13290
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13291
 *    the parser error code otherwise
13292
 */
13293
13294
int
13295
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13296
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13297
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13298
0
                                                depth, string, lst, 0 );
13299
0
}
13300
#endif /* LIBXML_SAX1_ENABLED */
13301
13302
/**
13303
 * xmlParseBalancedChunkMemoryInternal:
13304
 * @oldctxt:  the existing parsing context
13305
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13306
 * @user_data:  the user data field for the parser context
13307
 * @lst:  the return value for the set of parsed nodes
13308
 *
13309
 *
13310
 * Parse a well-balanced chunk of an XML document
13311
 * called by the parser
13312
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13313
 * the content production in the XML grammar:
13314
 *
13315
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13316
 *
13317
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13318
 * error code otherwise
13319
 *
13320
 * In case recover is set to 1, the nodelist will not be empty even if
13321
 * the parsed chunk is not well balanced.
13322
 */
13323
static xmlParserErrors
13324
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13325
306k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13326
306k
    xmlParserCtxtPtr ctxt;
13327
306k
    xmlDocPtr newDoc = NULL;
13328
306k
    xmlNodePtr newRoot;
13329
306k
    xmlSAXHandlerPtr oldsax = NULL;
13330
306k
    xmlNodePtr content = NULL;
13331
306k
    xmlNodePtr last = NULL;
13332
306k
    int size;
13333
306k
    xmlParserErrors ret = XML_ERR_OK;
13334
306k
#ifdef SAX2
13335
306k
    int i;
13336
306k
#endif
13337
13338
306k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13339
306k
        (oldctxt->depth >  1024)) {
13340
1.46k
  return(XML_ERR_ENTITY_LOOP);
13341
1.46k
    }
13342
13343
13344
304k
    if (lst != NULL)
13345
304k
        *lst = NULL;
13346
304k
    if (string == NULL)
13347
76
        return(XML_ERR_INTERNAL_ERROR);
13348
13349
304k
    size = xmlStrlen(string);
13350
13351
304k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13352
304k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13353
303k
    if (user_data != NULL)
13354
0
  ctxt->userData = user_data;
13355
303k
    else
13356
303k
  ctxt->userData = ctxt;
13357
303k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13358
303k
    ctxt->dict = oldctxt->dict;
13359
303k
    ctxt->input_id = oldctxt->input_id + 1;
13360
303k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13361
303k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13362
303k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13363
13364
303k
#ifdef SAX2
13365
    /* propagate namespaces down the entity */
13366
2.27M
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13367
1.97M
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13368
1.97M
    }
13369
303k
#endif
13370
13371
303k
    oldsax = ctxt->sax;
13372
303k
    ctxt->sax = oldctxt->sax;
13373
303k
    xmlDetectSAX2(ctxt);
13374
303k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13375
303k
    ctxt->options = oldctxt->options;
13376
13377
303k
    ctxt->_private = oldctxt->_private;
13378
303k
    if (oldctxt->myDoc == NULL) {
13379
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13380
0
  if (newDoc == NULL) {
13381
0
      ctxt->sax = oldsax;
13382
0
      ctxt->dict = NULL;
13383
0
      xmlFreeParserCtxt(ctxt);
13384
0
      return(XML_ERR_INTERNAL_ERROR);
13385
0
  }
13386
0
  newDoc->properties = XML_DOC_INTERNAL;
13387
0
  newDoc->dict = ctxt->dict;
13388
0
  xmlDictReference(newDoc->dict);
13389
0
  ctxt->myDoc = newDoc;
13390
303k
    } else {
13391
303k
  ctxt->myDoc = oldctxt->myDoc;
13392
303k
        content = ctxt->myDoc->children;
13393
303k
  last = ctxt->myDoc->last;
13394
303k
    }
13395
303k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13396
303k
    if (newRoot == NULL) {
13397
0
  ctxt->sax = oldsax;
13398
0
  ctxt->dict = NULL;
13399
0
  xmlFreeParserCtxt(ctxt);
13400
0
  if (newDoc != NULL) {
13401
0
      xmlFreeDoc(newDoc);
13402
0
  }
13403
0
  return(XML_ERR_INTERNAL_ERROR);
13404
0
    }
13405
303k
    ctxt->myDoc->children = NULL;
13406
303k
    ctxt->myDoc->last = NULL;
13407
303k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13408
303k
    nodePush(ctxt, ctxt->myDoc->children);
13409
303k
    ctxt->instate = XML_PARSER_CONTENT;
13410
303k
    ctxt->depth = oldctxt->depth + 1;
13411
13412
303k
    ctxt->validate = 0;
13413
303k
    ctxt->loadsubset = oldctxt->loadsubset;
13414
303k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13415
  /*
13416
   * ID/IDREF registration will be done in xmlValidateElement below
13417
   */
13418
158k
  ctxt->loadsubset |= XML_SKIP_IDS;
13419
158k
    }
13420
303k
    ctxt->dictNames = oldctxt->dictNames;
13421
303k
    ctxt->attsDefault = oldctxt->attsDefault;
13422
303k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13423
13424
303k
    xmlParseContent(ctxt);
13425
303k
    if ((RAW == '<') && (NXT(1) == '/')) {
13426
136
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13427
303k
    } else if (RAW != 0) {
13428
114
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13429
114
    }
13430
303k
    if (ctxt->node != ctxt->myDoc->children) {
13431
118k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13432
118k
    }
13433
13434
303k
    if (!ctxt->wellFormed) {
13435
278k
        if (ctxt->errNo == 0)
13436
0
      ret = XML_ERR_INTERNAL_ERROR;
13437
278k
  else
13438
278k
      ret = (xmlParserErrors)ctxt->errNo;
13439
278k
    } else {
13440
25.6k
      ret = XML_ERR_OK;
13441
25.6k
    }
13442
13443
303k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13444
25.5k
  xmlNodePtr cur;
13445
13446
  /*
13447
   * Return the newly created nodeset after unlinking it from
13448
   * they pseudo parent.
13449
   */
13450
25.5k
  cur = ctxt->myDoc->children->children;
13451
25.5k
  *lst = cur;
13452
87.4k
  while (cur != NULL) {
13453
61.9k
#ifdef LIBXML_VALID_ENABLED
13454
61.9k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13455
61.9k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13456
61.9k
    (cur->type == XML_ELEMENT_NODE)) {
13457
13.8k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13458
13.8k
      oldctxt->myDoc, cur);
13459
13.8k
      }
13460
61.9k
#endif /* LIBXML_VALID_ENABLED */
13461
61.9k
      cur->parent = NULL;
13462
61.9k
      cur = cur->next;
13463
61.9k
  }
13464
25.5k
  ctxt->myDoc->children->children = NULL;
13465
25.5k
    }
13466
303k
    if (ctxt->myDoc != NULL) {
13467
303k
  xmlFreeNode(ctxt->myDoc->children);
13468
303k
        ctxt->myDoc->children = content;
13469
303k
        ctxt->myDoc->last = last;
13470
303k
    }
13471
13472
    /*
13473
     * Record in the parent context the number of entities replacement
13474
     * done when parsing that reference.
13475
     */
13476
303k
    if (oldctxt != NULL)
13477
303k
        oldctxt->nbentities += ctxt->nbentities;
13478
13479
    /*
13480
     * Also record the last error if any
13481
     */
13482
303k
    if (ctxt->lastError.code != XML_ERR_OK)
13483
278k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13484
13485
303k
    ctxt->sax = oldsax;
13486
303k
    ctxt->dict = NULL;
13487
303k
    ctxt->attsDefault = NULL;
13488
303k
    ctxt->attsSpecial = NULL;
13489
303k
    xmlFreeParserCtxt(ctxt);
13490
303k
    if (newDoc != NULL) {
13491
0
  xmlFreeDoc(newDoc);
13492
0
    }
13493
13494
303k
    return(ret);
13495
303k
}
13496
13497
/**
13498
 * xmlParseInNodeContext:
13499
 * @node:  the context node
13500
 * @data:  the input string
13501
 * @datalen:  the input string length in bytes
13502
 * @options:  a combination of xmlParserOption
13503
 * @lst:  the return value for the set of parsed nodes
13504
 *
13505
 * Parse a well-balanced chunk of an XML document
13506
 * within the context (DTD, namespaces, etc ...) of the given node.
13507
 *
13508
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13509
 * the content production in the XML grammar:
13510
 *
13511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13512
 *
13513
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13514
 * error code otherwise
13515
 */
13516
xmlParserErrors
13517
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13518
0
                      int options, xmlNodePtr *lst) {
13519
0
#ifdef SAX2
13520
0
    xmlParserCtxtPtr ctxt;
13521
0
    xmlDocPtr doc = NULL;
13522
0
    xmlNodePtr fake, cur;
13523
0
    int nsnr = 0;
13524
13525
0
    xmlParserErrors ret = XML_ERR_OK;
13526
13527
    /*
13528
     * check all input parameters, grab the document
13529
     */
13530
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13531
0
        return(XML_ERR_INTERNAL_ERROR);
13532
0
    switch (node->type) {
13533
0
        case XML_ELEMENT_NODE:
13534
0
        case XML_ATTRIBUTE_NODE:
13535
0
        case XML_TEXT_NODE:
13536
0
        case XML_CDATA_SECTION_NODE:
13537
0
        case XML_ENTITY_REF_NODE:
13538
0
        case XML_PI_NODE:
13539
0
        case XML_COMMENT_NODE:
13540
0
        case XML_DOCUMENT_NODE:
13541
0
        case XML_HTML_DOCUMENT_NODE:
13542
0
      break;
13543
0
  default:
13544
0
      return(XML_ERR_INTERNAL_ERROR);
13545
13546
0
    }
13547
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13548
0
           (node->type != XML_DOCUMENT_NODE) &&
13549
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13550
0
  node = node->parent;
13551
0
    if (node == NULL)
13552
0
  return(XML_ERR_INTERNAL_ERROR);
13553
0
    if (node->type == XML_ELEMENT_NODE)
13554
0
  doc = node->doc;
13555
0
    else
13556
0
        doc = (xmlDocPtr) node;
13557
0
    if (doc == NULL)
13558
0
  return(XML_ERR_INTERNAL_ERROR);
13559
13560
    /*
13561
     * allocate a context and set-up everything not related to the
13562
     * node position in the tree
13563
     */
13564
0
    if (doc->type == XML_DOCUMENT_NODE)
13565
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13566
0
#ifdef LIBXML_HTML_ENABLED
13567
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13568
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13569
        /*
13570
         * When parsing in context, it makes no sense to add implied
13571
         * elements like html/body/etc...
13572
         */
13573
0
        options |= HTML_PARSE_NOIMPLIED;
13574
0
    }
13575
0
#endif
13576
0
    else
13577
0
        return(XML_ERR_INTERNAL_ERROR);
13578
13579
0
    if (ctxt == NULL)
13580
0
        return(XML_ERR_NO_MEMORY);
13581
13582
    /*
13583
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13584
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13585
     * we must wait until the last moment to free the original one.
13586
     */
13587
0
    if (doc->dict != NULL) {
13588
0
        if (ctxt->dict != NULL)
13589
0
      xmlDictFree(ctxt->dict);
13590
0
  ctxt->dict = doc->dict;
13591
0
    } else
13592
0
        options |= XML_PARSE_NODICT;
13593
13594
0
    if (doc->encoding != NULL) {
13595
0
        xmlCharEncodingHandlerPtr hdlr;
13596
13597
0
        if (ctxt->encoding != NULL)
13598
0
      xmlFree((xmlChar *) ctxt->encoding);
13599
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13600
13601
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13602
0
        if (hdlr != NULL) {
13603
0
            xmlSwitchToEncoding(ctxt, hdlr);
13604
0
  } else {
13605
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13606
0
        }
13607
0
    }
13608
13609
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13610
0
    xmlDetectSAX2(ctxt);
13611
0
    ctxt->myDoc = doc;
13612
    /* parsing in context, i.e. as within existing content */
13613
0
    ctxt->input_id = 2;
13614
0
    ctxt->instate = XML_PARSER_CONTENT;
13615
13616
0
    fake = xmlNewDocComment(node->doc, NULL);
13617
0
    if (fake == NULL) {
13618
0
        xmlFreeParserCtxt(ctxt);
13619
0
  return(XML_ERR_NO_MEMORY);
13620
0
    }
13621
0
    xmlAddChild(node, fake);
13622
13623
0
    if (node->type == XML_ELEMENT_NODE) {
13624
0
  nodePush(ctxt, node);
13625
  /*
13626
   * initialize the SAX2 namespaces stack
13627
   */
13628
0
  cur = node;
13629
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13630
0
      xmlNsPtr ns = cur->nsDef;
13631
0
      const xmlChar *iprefix, *ihref;
13632
13633
0
      while (ns != NULL) {
13634
0
    if (ctxt->dict) {
13635
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13636
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13637
0
    } else {
13638
0
        iprefix = ns->prefix;
13639
0
        ihref = ns->href;
13640
0
    }
13641
13642
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13643
0
        nsPush(ctxt, iprefix, ihref);
13644
0
        nsnr++;
13645
0
    }
13646
0
    ns = ns->next;
13647
0
      }
13648
0
      cur = cur->parent;
13649
0
  }
13650
0
    }
13651
13652
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13653
  /*
13654
   * ID/IDREF registration will be done in xmlValidateElement below
13655
   */
13656
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13657
0
    }
13658
13659
0
#ifdef LIBXML_HTML_ENABLED
13660
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13661
0
        __htmlParseContent(ctxt);
13662
0
    else
13663
0
#endif
13664
0
  xmlParseContent(ctxt);
13665
13666
0
    nsPop(ctxt, nsnr);
13667
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13668
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13669
0
    } else if (RAW != 0) {
13670
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13671
0
    }
13672
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13673
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13674
0
  ctxt->wellFormed = 0;
13675
0
    }
13676
13677
0
    if (!ctxt->wellFormed) {
13678
0
        if (ctxt->errNo == 0)
13679
0
      ret = XML_ERR_INTERNAL_ERROR;
13680
0
  else
13681
0
      ret = (xmlParserErrors)ctxt->errNo;
13682
0
    } else {
13683
0
        ret = XML_ERR_OK;
13684
0
    }
13685
13686
    /*
13687
     * Return the newly created nodeset after unlinking it from
13688
     * the pseudo sibling.
13689
     */
13690
13691
0
    cur = fake->next;
13692
0
    fake->next = NULL;
13693
0
    node->last = fake;
13694
13695
0
    if (cur != NULL) {
13696
0
  cur->prev = NULL;
13697
0
    }
13698
13699
0
    *lst = cur;
13700
13701
0
    while (cur != NULL) {
13702
0
  cur->parent = NULL;
13703
0
  cur = cur->next;
13704
0
    }
13705
13706
0
    xmlUnlinkNode(fake);
13707
0
    xmlFreeNode(fake);
13708
13709
13710
0
    if (ret != XML_ERR_OK) {
13711
0
        xmlFreeNodeList(*lst);
13712
0
  *lst = NULL;
13713
0
    }
13714
13715
0
    if (doc->dict != NULL)
13716
0
        ctxt->dict = NULL;
13717
0
    xmlFreeParserCtxt(ctxt);
13718
13719
0
    return(ret);
13720
#else /* !SAX2 */
13721
    return(XML_ERR_INTERNAL_ERROR);
13722
#endif
13723
0
}
13724
13725
#ifdef LIBXML_SAX1_ENABLED
13726
/**
13727
 * xmlParseBalancedChunkMemoryRecover:
13728
 * @doc:  the document the chunk pertains to (must not be NULL)
13729
 * @sax:  the SAX handler block (possibly NULL)
13730
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13731
 * @depth:  Used for loop detection, use 0
13732
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13733
 * @lst:  the return value for the set of parsed nodes
13734
 * @recover: return nodes even if the data is broken (use 0)
13735
 *
13736
 *
13737
 * Parse a well-balanced chunk of an XML document
13738
 * called by the parser
13739
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13740
 * the content production in the XML grammar:
13741
 *
13742
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13743
 *
13744
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13745
 *    the parser error code otherwise
13746
 *
13747
 * In case recover is set to 1, the nodelist will not be empty even if
13748
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13749
 * some extent.
13750
 */
13751
int
13752
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13753
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13754
0
     int recover) {
13755
0
    xmlParserCtxtPtr ctxt;
13756
0
    xmlDocPtr newDoc;
13757
0
    xmlSAXHandlerPtr oldsax = NULL;
13758
0
    xmlNodePtr content, newRoot;
13759
0
    int size;
13760
0
    int ret = 0;
13761
13762
0
    if (depth > 40) {
13763
0
  return(XML_ERR_ENTITY_LOOP);
13764
0
    }
13765
13766
13767
0
    if (lst != NULL)
13768
0
        *lst = NULL;
13769
0
    if (string == NULL)
13770
0
        return(-1);
13771
13772
0
    size = xmlStrlen(string);
13773
13774
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13775
0
    if (ctxt == NULL) return(-1);
13776
0
    ctxt->userData = ctxt;
13777
0
    if (sax != NULL) {
13778
0
  oldsax = ctxt->sax;
13779
0
        ctxt->sax = sax;
13780
0
  if (user_data != NULL)
13781
0
      ctxt->userData = user_data;
13782
0
    }
13783
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13784
0
    if (newDoc == NULL) {
13785
0
  xmlFreeParserCtxt(ctxt);
13786
0
  return(-1);
13787
0
    }
13788
0
    newDoc->properties = XML_DOC_INTERNAL;
13789
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13790
0
        xmlDictFree(ctxt->dict);
13791
0
  ctxt->dict = doc->dict;
13792
0
  xmlDictReference(ctxt->dict);
13793
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13794
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13795
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13796
0
  ctxt->dictNames = 1;
13797
0
    } else {
13798
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13799
0
    }
13800
    /* doc == NULL is only supported for historic reasons */
13801
0
    if (doc != NULL) {
13802
0
  newDoc->intSubset = doc->intSubset;
13803
0
  newDoc->extSubset = doc->extSubset;
13804
0
    }
13805
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13806
0
    if (newRoot == NULL) {
13807
0
  if (sax != NULL)
13808
0
      ctxt->sax = oldsax;
13809
0
  xmlFreeParserCtxt(ctxt);
13810
0
  newDoc->intSubset = NULL;
13811
0
  newDoc->extSubset = NULL;
13812
0
        xmlFreeDoc(newDoc);
13813
0
  return(-1);
13814
0
    }
13815
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13816
0
    nodePush(ctxt, newRoot);
13817
    /* doc == NULL is only supported for historic reasons */
13818
0
    if (doc == NULL) {
13819
0
  ctxt->myDoc = newDoc;
13820
0
    } else {
13821
0
  ctxt->myDoc = newDoc;
13822
0
  newDoc->children->doc = doc;
13823
  /* Ensure that doc has XML spec namespace */
13824
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13825
0
  newDoc->oldNs = doc->oldNs;
13826
0
    }
13827
0
    ctxt->instate = XML_PARSER_CONTENT;
13828
0
    ctxt->input_id = 2;
13829
0
    ctxt->depth = depth;
13830
13831
    /*
13832
     * Doing validity checking on chunk doesn't make sense
13833
     */
13834
0
    ctxt->validate = 0;
13835
0
    ctxt->loadsubset = 0;
13836
0
    xmlDetectSAX2(ctxt);
13837
13838
0
    if ( doc != NULL ){
13839
0
        content = doc->children;
13840
0
        doc->children = NULL;
13841
0
        xmlParseContent(ctxt);
13842
0
        doc->children = content;
13843
0
    }
13844
0
    else {
13845
0
        xmlParseContent(ctxt);
13846
0
    }
13847
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13848
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13849
0
    } else if (RAW != 0) {
13850
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13851
0
    }
13852
0
    if (ctxt->node != newDoc->children) {
13853
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13854
0
    }
13855
13856
0
    if (!ctxt->wellFormed) {
13857
0
        if (ctxt->errNo == 0)
13858
0
      ret = 1;
13859
0
  else
13860
0
      ret = ctxt->errNo;
13861
0
    } else {
13862
0
      ret = 0;
13863
0
    }
13864
13865
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13866
0
  xmlNodePtr cur;
13867
13868
  /*
13869
   * Return the newly created nodeset after unlinking it from
13870
   * they pseudo parent.
13871
   */
13872
0
  cur = newDoc->children->children;
13873
0
  *lst = cur;
13874
0
  while (cur != NULL) {
13875
0
      xmlSetTreeDoc(cur, doc);
13876
0
      cur->parent = NULL;
13877
0
      cur = cur->next;
13878
0
  }
13879
0
  newDoc->children->children = NULL;
13880
0
    }
13881
13882
0
    if (sax != NULL)
13883
0
  ctxt->sax = oldsax;
13884
0
    xmlFreeParserCtxt(ctxt);
13885
0
    newDoc->intSubset = NULL;
13886
0
    newDoc->extSubset = NULL;
13887
    /* This leaks the namespace list if doc == NULL */
13888
0
    newDoc->oldNs = NULL;
13889
0
    xmlFreeDoc(newDoc);
13890
13891
0
    return(ret);
13892
0
}
13893
13894
/**
13895
 * xmlSAXParseEntity:
13896
 * @sax:  the SAX handler block
13897
 * @filename:  the filename
13898
 *
13899
 * DEPRECATED: Don't use.
13900
 *
13901
 * parse an XML external entity out of context and build a tree.
13902
 * It use the given SAX function block to handle the parsing callback.
13903
 * If sax is NULL, fallback to the default DOM tree building routines.
13904
 *
13905
 * [78] extParsedEnt ::= TextDecl? content
13906
 *
13907
 * This correspond to a "Well Balanced" chunk
13908
 *
13909
 * Returns the resulting document tree
13910
 */
13911
13912
xmlDocPtr
13913
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13914
0
    xmlDocPtr ret;
13915
0
    xmlParserCtxtPtr ctxt;
13916
13917
0
    ctxt = xmlCreateFileParserCtxt(filename);
13918
0
    if (ctxt == NULL) {
13919
0
  return(NULL);
13920
0
    }
13921
0
    if (sax != NULL) {
13922
0
  if (ctxt->sax != NULL)
13923
0
      xmlFree(ctxt->sax);
13924
0
        ctxt->sax = sax;
13925
0
        ctxt->userData = NULL;
13926
0
    }
13927
13928
0
    xmlParseExtParsedEnt(ctxt);
13929
13930
0
    if (ctxt->wellFormed)
13931
0
  ret = ctxt->myDoc;
13932
0
    else {
13933
0
        ret = NULL;
13934
0
        xmlFreeDoc(ctxt->myDoc);
13935
0
        ctxt->myDoc = NULL;
13936
0
    }
13937
0
    if (sax != NULL)
13938
0
        ctxt->sax = NULL;
13939
0
    xmlFreeParserCtxt(ctxt);
13940
13941
0
    return(ret);
13942
0
}
13943
13944
/**
13945
 * xmlParseEntity:
13946
 * @filename:  the filename
13947
 *
13948
 * parse an XML external entity out of context and build a tree.
13949
 *
13950
 * [78] extParsedEnt ::= TextDecl? content
13951
 *
13952
 * This correspond to a "Well Balanced" chunk
13953
 *
13954
 * Returns the resulting document tree
13955
 */
13956
13957
xmlDocPtr
13958
0
xmlParseEntity(const char *filename) {
13959
0
    return(xmlSAXParseEntity(NULL, filename));
13960
0
}
13961
#endif /* LIBXML_SAX1_ENABLED */
13962
13963
/**
13964
 * xmlCreateEntityParserCtxtInternal:
13965
 * @URL:  the entity URL
13966
 * @ID:  the entity PUBLIC ID
13967
 * @base:  a possible base for the target URI
13968
 * @pctx:  parser context used to set options on new context
13969
 *
13970
 * Create a parser context for an external entity
13971
 * Automatic support for ZLIB/Compress compressed document is provided
13972
 * by default if found at compile-time.
13973
 *
13974
 * Returns the new parser context or NULL
13975
 */
13976
static xmlParserCtxtPtr
13977
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13978
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13979
2.16M
        xmlParserCtxtPtr pctx) {
13980
2.16M
    xmlParserCtxtPtr ctxt;
13981
2.16M
    xmlParserInputPtr inputStream;
13982
2.16M
    char *directory = NULL;
13983
2.16M
    xmlChar *uri;
13984
13985
2.16M
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13986
2.16M
    if (ctxt == NULL) {
13987
0
  return(NULL);
13988
0
    }
13989
13990
2.16M
    if (pctx != NULL) {
13991
2.16M
        ctxt->options = pctx->options;
13992
2.16M
        ctxt->_private = pctx->_private;
13993
  /*
13994
   * this is a subparser of pctx, so the input_id should be
13995
   * incremented to distinguish from main entity
13996
   */
13997
2.16M
  ctxt->input_id = pctx->input_id + 1;
13998
2.16M
    }
13999
14000
    /* Don't read from stdin. */
14001
2.16M
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
14002
0
        URL = BAD_CAST "./-";
14003
14004
2.16M
    uri = xmlBuildURI(URL, base);
14005
14006
2.16M
    if (uri == NULL) {
14007
7.48k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14008
7.48k
  if (inputStream == NULL) {
14009
7.31k
      xmlFreeParserCtxt(ctxt);
14010
7.31k
      return(NULL);
14011
7.31k
  }
14012
14013
168
  inputPush(ctxt, inputStream);
14014
14015
168
  if ((ctxt->directory == NULL) && (directory == NULL))
14016
168
      directory = xmlParserGetDirectory((char *)URL);
14017
168
  if ((ctxt->directory == NULL) && (directory != NULL))
14018
168
      ctxt->directory = directory;
14019
2.16M
    } else {
14020
2.16M
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14021
2.16M
  if (inputStream == NULL) {
14022
49.6k
      xmlFree(uri);
14023
49.6k
      xmlFreeParserCtxt(ctxt);
14024
49.6k
      return(NULL);
14025
49.6k
  }
14026
14027
2.11M
  inputPush(ctxt, inputStream);
14028
14029
2.11M
  if ((ctxt->directory == NULL) && (directory == NULL))
14030
2.11M
      directory = xmlParserGetDirectory((char *)uri);
14031
2.11M
  if ((ctxt->directory == NULL) && (directory != NULL))
14032
2.11M
      ctxt->directory = directory;
14033
2.11M
  xmlFree(uri);
14034
2.11M
    }
14035
2.11M
    return(ctxt);
14036
2.16M
}
14037
14038
/**
14039
 * xmlCreateEntityParserCtxt:
14040
 * @URL:  the entity URL
14041
 * @ID:  the entity PUBLIC ID
14042
 * @base:  a possible base for the target URI
14043
 *
14044
 * Create a parser context for an external entity
14045
 * Automatic support for ZLIB/Compress compressed document is provided
14046
 * by default if found at compile-time.
14047
 *
14048
 * Returns the new parser context or NULL
14049
 */
14050
xmlParserCtxtPtr
14051
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14052
0
                    const xmlChar *base) {
14053
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
14054
14055
0
}
14056
14057
/************************************************************************
14058
 *                  *
14059
 *    Front ends when parsing from a file     *
14060
 *                  *
14061
 ************************************************************************/
14062
14063
/**
14064
 * xmlCreateURLParserCtxt:
14065
 * @filename:  the filename or URL
14066
 * @options:  a combination of xmlParserOption
14067
 *
14068
 * Create a parser context for a file or URL content.
14069
 * Automatic support for ZLIB/Compress compressed document is provided
14070
 * by default if found at compile-time and for file accesses
14071
 *
14072
 * Returns the new parser context or NULL
14073
 */
14074
xmlParserCtxtPtr
14075
xmlCreateURLParserCtxt(const char *filename, int options)
14076
0
{
14077
0
    xmlParserCtxtPtr ctxt;
14078
0
    xmlParserInputPtr inputStream;
14079
0
    char *directory = NULL;
14080
14081
0
    ctxt = xmlNewParserCtxt();
14082
0
    if (ctxt == NULL) {
14083
0
  xmlErrMemory(NULL, "cannot allocate parser context");
14084
0
  return(NULL);
14085
0
    }
14086
14087
0
    if (options)
14088
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14089
0
    ctxt->linenumbers = 1;
14090
14091
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14092
0
    if (inputStream == NULL) {
14093
0
  xmlFreeParserCtxt(ctxt);
14094
0
  return(NULL);
14095
0
    }
14096
14097
0
    inputPush(ctxt, inputStream);
14098
0
    if ((ctxt->directory == NULL) && (directory == NULL))
14099
0
        directory = xmlParserGetDirectory(filename);
14100
0
    if ((ctxt->directory == NULL) && (directory != NULL))
14101
0
        ctxt->directory = directory;
14102
14103
0
    return(ctxt);
14104
0
}
14105
14106
/**
14107
 * xmlCreateFileParserCtxt:
14108
 * @filename:  the filename
14109
 *
14110
 * Create a parser context for a file content.
14111
 * Automatic support for ZLIB/Compress compressed document is provided
14112
 * by default if found at compile-time.
14113
 *
14114
 * Returns the new parser context or NULL
14115
 */
14116
xmlParserCtxtPtr
14117
xmlCreateFileParserCtxt(const char *filename)
14118
0
{
14119
0
    return(xmlCreateURLParserCtxt(filename, 0));
14120
0
}
14121
14122
#ifdef LIBXML_SAX1_ENABLED
14123
/**
14124
 * xmlSAXParseFileWithData:
14125
 * @sax:  the SAX handler block
14126
 * @filename:  the filename
14127
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14128
 *             documents
14129
 * @data:  the userdata
14130
 *
14131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14132
 *
14133
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14134
 * compressed document is provided by default if found at compile-time.
14135
 * It use the given SAX function block to handle the parsing callback.
14136
 * If sax is NULL, fallback to the default DOM tree building routines.
14137
 *
14138
 * User data (void *) is stored within the parser context in the
14139
 * context's _private member, so it is available nearly everywhere in libxml
14140
 *
14141
 * Returns the resulting document tree
14142
 */
14143
14144
xmlDocPtr
14145
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14146
0
                        int recovery, void *data) {
14147
0
    xmlDocPtr ret;
14148
0
    xmlParserCtxtPtr ctxt;
14149
14150
0
    xmlInitParser();
14151
14152
0
    ctxt = xmlCreateFileParserCtxt(filename);
14153
0
    if (ctxt == NULL) {
14154
0
  return(NULL);
14155
0
    }
14156
0
    if (sax != NULL) {
14157
0
  if (ctxt->sax != NULL)
14158
0
      xmlFree(ctxt->sax);
14159
0
        ctxt->sax = sax;
14160
0
    }
14161
0
    xmlDetectSAX2(ctxt);
14162
0
    if (data!=NULL) {
14163
0
  ctxt->_private = data;
14164
0
    }
14165
14166
0
    if (ctxt->directory == NULL)
14167
0
        ctxt->directory = xmlParserGetDirectory(filename);
14168
14169
0
    ctxt->recovery = recovery;
14170
14171
0
    xmlParseDocument(ctxt);
14172
14173
0
    if ((ctxt->wellFormed) || recovery) {
14174
0
        ret = ctxt->myDoc;
14175
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14176
0
      if (ctxt->input->buf->compressed > 0)
14177
0
    ret->compression = 9;
14178
0
      else
14179
0
    ret->compression = ctxt->input->buf->compressed;
14180
0
  }
14181
0
    }
14182
0
    else {
14183
0
       ret = NULL;
14184
0
       xmlFreeDoc(ctxt->myDoc);
14185
0
       ctxt->myDoc = NULL;
14186
0
    }
14187
0
    if (sax != NULL)
14188
0
        ctxt->sax = NULL;
14189
0
    xmlFreeParserCtxt(ctxt);
14190
14191
0
    return(ret);
14192
0
}
14193
14194
/**
14195
 * xmlSAXParseFile:
14196
 * @sax:  the SAX handler block
14197
 * @filename:  the filename
14198
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14199
 *             documents
14200
 *
14201
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14202
 *
14203
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14204
 * compressed document is provided by default if found at compile-time.
14205
 * It use the given SAX function block to handle the parsing callback.
14206
 * If sax is NULL, fallback to the default DOM tree building routines.
14207
 *
14208
 * Returns the resulting document tree
14209
 */
14210
14211
xmlDocPtr
14212
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14213
0
                          int recovery) {
14214
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14215
0
}
14216
14217
/**
14218
 * xmlRecoverDoc:
14219
 * @cur:  a pointer to an array of xmlChar
14220
 *
14221
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14222
 *
14223
 * parse an XML in-memory document and build a tree.
14224
 * In the case the document is not Well Formed, a attempt to build a
14225
 * tree is tried anyway
14226
 *
14227
 * Returns the resulting document tree or NULL in case of failure
14228
 */
14229
14230
xmlDocPtr
14231
0
xmlRecoverDoc(const xmlChar *cur) {
14232
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14233
0
}
14234
14235
/**
14236
 * xmlParseFile:
14237
 * @filename:  the filename
14238
 *
14239
 * DEPRECATED: Use xmlReadFile.
14240
 *
14241
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14242
 * compressed document is provided by default if found at compile-time.
14243
 *
14244
 * Returns the resulting document tree if the file was wellformed,
14245
 * NULL otherwise.
14246
 */
14247
14248
xmlDocPtr
14249
0
xmlParseFile(const char *filename) {
14250
0
    return(xmlSAXParseFile(NULL, filename, 0));
14251
0
}
14252
14253
/**
14254
 * xmlRecoverFile:
14255
 * @filename:  the filename
14256
 *
14257
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14258
 *
14259
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14260
 * compressed document is provided by default if found at compile-time.
14261
 * In the case the document is not Well Formed, it attempts to build
14262
 * a tree anyway
14263
 *
14264
 * Returns the resulting document tree or NULL in case of failure
14265
 */
14266
14267
xmlDocPtr
14268
0
xmlRecoverFile(const char *filename) {
14269
0
    return(xmlSAXParseFile(NULL, filename, 1));
14270
0
}
14271
14272
14273
/**
14274
 * xmlSetupParserForBuffer:
14275
 * @ctxt:  an XML parser context
14276
 * @buffer:  a xmlChar * buffer
14277
 * @filename:  a file name
14278
 *
14279
 * DEPRECATED: Don't use.
14280
 *
14281
 * Setup the parser context to parse a new buffer; Clears any prior
14282
 * contents from the parser context. The buffer parameter must not be
14283
 * NULL, but the filename parameter can be
14284
 */
14285
void
14286
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14287
                             const char* filename)
14288
0
{
14289
0
    xmlParserInputPtr input;
14290
14291
0
    if ((ctxt == NULL) || (buffer == NULL))
14292
0
        return;
14293
14294
0
    input = xmlNewInputStream(ctxt);
14295
0
    if (input == NULL) {
14296
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14297
0
        xmlClearParserCtxt(ctxt);
14298
0
        return;
14299
0
    }
14300
14301
0
    xmlClearParserCtxt(ctxt);
14302
0
    if (filename != NULL)
14303
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14304
0
    input->base = buffer;
14305
0
    input->cur = buffer;
14306
0
    input->end = &buffer[xmlStrlen(buffer)];
14307
0
    inputPush(ctxt, input);
14308
0
}
14309
14310
/**
14311
 * xmlSAXUserParseFile:
14312
 * @sax:  a SAX handler
14313
 * @user_data:  The user data returned on SAX callbacks
14314
 * @filename:  a file name
14315
 *
14316
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14317
 *
14318
 * parse an XML file and call the given SAX handler routines.
14319
 * Automatic support for ZLIB/Compress compressed document is provided
14320
 *
14321
 * Returns 0 in case of success or a error number otherwise
14322
 */
14323
int
14324
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14325
0
                    const char *filename) {
14326
0
    int ret = 0;
14327
0
    xmlParserCtxtPtr ctxt;
14328
14329
0
    ctxt = xmlCreateFileParserCtxt(filename);
14330
0
    if (ctxt == NULL) return -1;
14331
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14332
0
  xmlFree(ctxt->sax);
14333
0
    ctxt->sax = sax;
14334
0
    xmlDetectSAX2(ctxt);
14335
14336
0
    if (user_data != NULL)
14337
0
  ctxt->userData = user_data;
14338
14339
0
    xmlParseDocument(ctxt);
14340
14341
0
    if (ctxt->wellFormed)
14342
0
  ret = 0;
14343
0
    else {
14344
0
        if (ctxt->errNo != 0)
14345
0
      ret = ctxt->errNo;
14346
0
  else
14347
0
      ret = -1;
14348
0
    }
14349
0
    if (sax != NULL)
14350
0
  ctxt->sax = NULL;
14351
0
    if (ctxt->myDoc != NULL) {
14352
0
        xmlFreeDoc(ctxt->myDoc);
14353
0
  ctxt->myDoc = NULL;
14354
0
    }
14355
0
    xmlFreeParserCtxt(ctxt);
14356
14357
0
    return ret;
14358
0
}
14359
#endif /* LIBXML_SAX1_ENABLED */
14360
14361
/************************************************************************
14362
 *                  *
14363
 *    Front ends when parsing from memory     *
14364
 *                  *
14365
 ************************************************************************/
14366
14367
/**
14368
 * xmlCreateMemoryParserCtxt:
14369
 * @buffer:  a pointer to a char array
14370
 * @size:  the size of the array
14371
 *
14372
 * Create a parser context for an XML in-memory document.
14373
 *
14374
 * Returns the new parser context or NULL
14375
 */
14376
xmlParserCtxtPtr
14377
551k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14378
551k
    xmlParserCtxtPtr ctxt;
14379
551k
    xmlParserInputPtr input;
14380
551k
    xmlParserInputBufferPtr buf;
14381
14382
551k
    if (buffer == NULL)
14383
0
  return(NULL);
14384
551k
    if (size <= 0)
14385
678
  return(NULL);
14386
14387
550k
    ctxt = xmlNewParserCtxt();
14388
550k
    if (ctxt == NULL)
14389
0
  return(NULL);
14390
14391
    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14392
550k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14393
550k
    if (buf == NULL) {
14394
0
  xmlFreeParserCtxt(ctxt);
14395
0
  return(NULL);
14396
0
    }
14397
14398
550k
    input = xmlNewInputStream(ctxt);
14399
550k
    if (input == NULL) {
14400
0
  xmlFreeParserInputBuffer(buf);
14401
0
  xmlFreeParserCtxt(ctxt);
14402
0
  return(NULL);
14403
0
    }
14404
14405
550k
    input->filename = NULL;
14406
550k
    input->buf = buf;
14407
550k
    xmlBufResetInput(input->buf->buffer, input);
14408
14409
550k
    inputPush(ctxt, input);
14410
550k
    return(ctxt);
14411
550k
}
14412
14413
#ifdef LIBXML_SAX1_ENABLED
14414
/**
14415
 * xmlSAXParseMemoryWithData:
14416
 * @sax:  the SAX handler block
14417
 * @buffer:  an pointer to a char array
14418
 * @size:  the size of the array
14419
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14420
 *             documents
14421
 * @data:  the userdata
14422
 *
14423
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14424
 *
14425
 * parse an XML in-memory block and use the given SAX function block
14426
 * to handle the parsing callback. If sax is NULL, fallback to the default
14427
 * DOM tree building routines.
14428
 *
14429
 * User data (void *) is stored within the parser context in the
14430
 * context's _private member, so it is available nearly everywhere in libxml
14431
 *
14432
 * Returns the resulting document tree
14433
 */
14434
14435
xmlDocPtr
14436
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14437
0
            int size, int recovery, void *data) {
14438
0
    xmlDocPtr ret;
14439
0
    xmlParserCtxtPtr ctxt;
14440
14441
0
    xmlInitParser();
14442
14443
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14444
0
    if (ctxt == NULL) return(NULL);
14445
0
    if (sax != NULL) {
14446
0
  if (ctxt->sax != NULL)
14447
0
      xmlFree(ctxt->sax);
14448
0
        ctxt->sax = sax;
14449
0
    }
14450
0
    xmlDetectSAX2(ctxt);
14451
0
    if (data!=NULL) {
14452
0
  ctxt->_private=data;
14453
0
    }
14454
14455
0
    ctxt->recovery = recovery;
14456
14457
0
    xmlParseDocument(ctxt);
14458
14459
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14460
0
    else {
14461
0
       ret = NULL;
14462
0
       xmlFreeDoc(ctxt->myDoc);
14463
0
       ctxt->myDoc = NULL;
14464
0
    }
14465
0
    if (sax != NULL)
14466
0
  ctxt->sax = NULL;
14467
0
    xmlFreeParserCtxt(ctxt);
14468
14469
0
    return(ret);
14470
0
}
14471
14472
/**
14473
 * xmlSAXParseMemory:
14474
 * @sax:  the SAX handler block
14475
 * @buffer:  an pointer to a char array
14476
 * @size:  the size of the array
14477
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14478
 *             documents
14479
 *
14480
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14481
 *
14482
 * parse an XML in-memory block and use the given SAX function block
14483
 * to handle the parsing callback. If sax is NULL, fallback to the default
14484
 * DOM tree building routines.
14485
 *
14486
 * Returns the resulting document tree
14487
 */
14488
xmlDocPtr
14489
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14490
0
            int size, int recovery) {
14491
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14492
0
}
14493
14494
/**
14495
 * xmlParseMemory:
14496
 * @buffer:  an pointer to a char array
14497
 * @size:  the size of the array
14498
 *
14499
 * DEPRECATED: Use xmlReadMemory.
14500
 *
14501
 * parse an XML in-memory block and build a tree.
14502
 *
14503
 * Returns the resulting document tree
14504
 */
14505
14506
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14507
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14508
0
}
14509
14510
/**
14511
 * xmlRecoverMemory:
14512
 * @buffer:  an pointer to a char array
14513
 * @size:  the size of the array
14514
 *
14515
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14516
 *
14517
 * parse an XML in-memory block and build a tree.
14518
 * In the case the document is not Well Formed, an attempt to
14519
 * build a tree is tried anyway
14520
 *
14521
 * Returns the resulting document tree or NULL in case of error
14522
 */
14523
14524
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14525
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14526
0
}
14527
14528
/**
14529
 * xmlSAXUserParseMemory:
14530
 * @sax:  a SAX handler
14531
 * @user_data:  The user data returned on SAX callbacks
14532
 * @buffer:  an in-memory XML document input
14533
 * @size:  the length of the XML document in bytes
14534
 *
14535
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14536
 *
14537
 * parse an XML in-memory buffer and call the given SAX handler routines.
14538
 *
14539
 * Returns 0 in case of success or a error number otherwise
14540
 */
14541
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14542
0
        const char *buffer, int size) {
14543
0
    int ret = 0;
14544
0
    xmlParserCtxtPtr ctxt;
14545
14546
0
    xmlInitParser();
14547
14548
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14549
0
    if (ctxt == NULL) return -1;
14550
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14551
0
        xmlFree(ctxt->sax);
14552
0
    ctxt->sax = sax;
14553
0
    xmlDetectSAX2(ctxt);
14554
14555
0
    if (user_data != NULL)
14556
0
  ctxt->userData = user_data;
14557
14558
0
    xmlParseDocument(ctxt);
14559
14560
0
    if (ctxt->wellFormed)
14561
0
  ret = 0;
14562
0
    else {
14563
0
        if (ctxt->errNo != 0)
14564
0
      ret = ctxt->errNo;
14565
0
  else
14566
0
      ret = -1;
14567
0
    }
14568
0
    if (sax != NULL)
14569
0
        ctxt->sax = NULL;
14570
0
    if (ctxt->myDoc != NULL) {
14571
0
        xmlFreeDoc(ctxt->myDoc);
14572
0
  ctxt->myDoc = NULL;
14573
0
    }
14574
0
    xmlFreeParserCtxt(ctxt);
14575
14576
0
    return ret;
14577
0
}
14578
#endif /* LIBXML_SAX1_ENABLED */
14579
14580
/**
14581
 * xmlCreateDocParserCtxt:
14582
 * @cur:  a pointer to an array of xmlChar
14583
 *
14584
 * Creates a parser context for an XML in-memory document.
14585
 *
14586
 * Returns the new parser context or NULL
14587
 */
14588
xmlParserCtxtPtr
14589
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14590
0
    int len;
14591
14592
0
    if (cur == NULL)
14593
0
  return(NULL);
14594
0
    len = xmlStrlen(cur);
14595
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14596
0
}
14597
14598
#ifdef LIBXML_SAX1_ENABLED
14599
/**
14600
 * xmlSAXParseDoc:
14601
 * @sax:  the SAX handler block
14602
 * @cur:  a pointer to an array of xmlChar
14603
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14604
 *             documents
14605
 *
14606
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14607
 *
14608
 * parse an XML in-memory document and build a tree.
14609
 * It use the given SAX function block to handle the parsing callback.
14610
 * If sax is NULL, fallback to the default DOM tree building routines.
14611
 *
14612
 * Returns the resulting document tree
14613
 */
14614
14615
xmlDocPtr
14616
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14617
0
    xmlDocPtr ret;
14618
0
    xmlParserCtxtPtr ctxt;
14619
0
    xmlSAXHandlerPtr oldsax = NULL;
14620
14621
0
    if (cur == NULL) return(NULL);
14622
14623
14624
0
    ctxt = xmlCreateDocParserCtxt(cur);
14625
0
    if (ctxt == NULL) return(NULL);
14626
0
    if (sax != NULL) {
14627
0
        oldsax = ctxt->sax;
14628
0
        ctxt->sax = sax;
14629
0
        ctxt->userData = NULL;
14630
0
    }
14631
0
    xmlDetectSAX2(ctxt);
14632
14633
0
    xmlParseDocument(ctxt);
14634
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14635
0
    else {
14636
0
       ret = NULL;
14637
0
       xmlFreeDoc(ctxt->myDoc);
14638
0
       ctxt->myDoc = NULL;
14639
0
    }
14640
0
    if (sax != NULL)
14641
0
  ctxt->sax = oldsax;
14642
0
    xmlFreeParserCtxt(ctxt);
14643
14644
0
    return(ret);
14645
0
}
14646
14647
/**
14648
 * xmlParseDoc:
14649
 * @cur:  a pointer to an array of xmlChar
14650
 *
14651
 * DEPRECATED: Use xmlReadDoc.
14652
 *
14653
 * parse an XML in-memory document and build a tree.
14654
 *
14655
 * Returns the resulting document tree
14656
 */
14657
14658
xmlDocPtr
14659
0
xmlParseDoc(const xmlChar *cur) {
14660
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14661
0
}
14662
#endif /* LIBXML_SAX1_ENABLED */
14663
14664
#ifdef LIBXML_LEGACY_ENABLED
14665
/************************************************************************
14666
 *                  *
14667
 *  Specific function to keep track of entities references    *
14668
 *  and used by the XSLT debugger         *
14669
 *                  *
14670
 ************************************************************************/
14671
14672
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14673
14674
/**
14675
 * xmlAddEntityReference:
14676
 * @ent : A valid entity
14677
 * @firstNode : A valid first node for children of entity
14678
 * @lastNode : A valid last node of children entity
14679
 *
14680
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14681
 */
14682
static void
14683
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14684
                      xmlNodePtr lastNode)
14685
{
14686
    if (xmlEntityRefFunc != NULL) {
14687
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14688
    }
14689
}
14690
14691
14692
/**
14693
 * xmlSetEntityReferenceFunc:
14694
 * @func: A valid function
14695
 *
14696
 * Set the function to call call back when a xml reference has been made
14697
 */
14698
void
14699
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14700
{
14701
    xmlEntityRefFunc = func;
14702
}
14703
#endif /* LIBXML_LEGACY_ENABLED */
14704
14705
/************************************************************************
14706
 *                  *
14707
 *        Miscellaneous       *
14708
 *                  *
14709
 ************************************************************************/
14710
14711
static int xmlParserInitialized = 0;
14712
14713
/**
14714
 * xmlInitParser:
14715
 *
14716
 * Initialization function for the XML parser.
14717
 * This is not reentrant. Call once before processing in case of
14718
 * use in multithreaded programs.
14719
 */
14720
14721
void
14722
4.67M
xmlInitParser(void) {
14723
4.67M
    if (xmlParserInitialized != 0)
14724
4.65M
  return;
14725
14726
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14727
    if (xmlFree == free)
14728
        atexit(xmlCleanupParser);
14729
#endif
14730
14731
14.9k
#ifdef LIBXML_THREAD_ENABLED
14732
14.9k
    __xmlGlobalInitMutexLock();
14733
14.9k
    if (xmlParserInitialized == 0) {
14734
14.9k
#endif
14735
14.9k
  xmlInitThreads();
14736
14.9k
  xmlInitGlobals();
14737
14.9k
  xmlInitMemory();
14738
14.9k
        xmlInitializeDict();
14739
14.9k
  xmlInitCharEncodingHandlers();
14740
14.9k
  xmlDefaultSAXHandlerInit();
14741
14.9k
  xmlRegisterDefaultInputCallbacks();
14742
14.9k
#ifdef LIBXML_OUTPUT_ENABLED
14743
14.9k
  xmlRegisterDefaultOutputCallbacks();
14744
14.9k
#endif /* LIBXML_OUTPUT_ENABLED */
14745
14.9k
#ifdef LIBXML_HTML_ENABLED
14746
14.9k
  htmlInitAutoClose();
14747
14.9k
  htmlDefaultSAXHandlerInit();
14748
14.9k
#endif
14749
14.9k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14750
14.9k
  xmlXPathInit();
14751
14.9k
#endif
14752
14.9k
  xmlParserInitialized = 1;
14753
14.9k
#ifdef LIBXML_THREAD_ENABLED
14754
14.9k
    }
14755
14.9k
    __xmlGlobalInitMutexUnlock();
14756
14.9k
#endif
14757
14.9k
}
14758
14759
/**
14760
 * xmlCleanupParser:
14761
 *
14762
 * This function name is somewhat misleading. It does not clean up
14763
 * parser state, it cleans up memory allocated by the library itself.
14764
 * It is a cleanup function for the XML library. It tries to reclaim all
14765
 * related global memory allocated for the library processing.
14766
 * It doesn't deallocate any document related memory. One should
14767
 * call xmlCleanupParser() only when the process has finished using
14768
 * the library and all XML/HTML documents built with it.
14769
 * See also xmlInitParser() which has the opposite function of preparing
14770
 * the library for operations.
14771
 *
14772
 * WARNING: if your application is multithreaded or has plugin support
14773
 *          calling this may crash the application if another thread or
14774
 *          a plugin is still using libxml2. It's sometimes very hard to
14775
 *          guess if libxml2 is in use in the application, some libraries
14776
 *          or plugins may use it without notice. In case of doubt abstain
14777
 *          from calling this function or do it just before calling exit()
14778
 *          to avoid leak reports from valgrind !
14779
 */
14780
14781
void
14782
0
xmlCleanupParser(void) {
14783
0
    if (!xmlParserInitialized)
14784
0
  return;
14785
14786
0
    xmlCleanupCharEncodingHandlers();
14787
0
#ifdef LIBXML_CATALOG_ENABLED
14788
0
    xmlCatalogCleanup();
14789
0
#endif
14790
0
    xmlDictCleanup();
14791
0
    xmlCleanupInputCallbacks();
14792
0
#ifdef LIBXML_OUTPUT_ENABLED
14793
0
    xmlCleanupOutputCallbacks();
14794
0
#endif
14795
0
#ifdef LIBXML_SCHEMAS_ENABLED
14796
0
    xmlSchemaCleanupTypes();
14797
0
    xmlRelaxNGCleanupTypes();
14798
0
#endif
14799
0
    xmlCleanupGlobals();
14800
0
    xmlCleanupThreads(); /* must be last if called not from the main thread */
14801
0
    xmlCleanupMemory();
14802
0
    xmlParserInitialized = 0;
14803
0
}
14804
14805
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14806
    !defined(_WIN32)
14807
static void
14808
ATTRIBUTE_DESTRUCTOR
14809
xmlDestructor(void) {
14810
    /*
14811
     * Calling custom deallocation functions in a destructor can cause
14812
     * problems, for example with Nokogiri.
14813
     */
14814
    if (xmlFree == free)
14815
        xmlCleanupParser();
14816
}
14817
#endif
14818
14819
/************************************************************************
14820
 *                  *
14821
 *  New set (2.6.0) of simpler and more flexible APIs   *
14822
 *                  *
14823
 ************************************************************************/
14824
14825
/**
14826
 * DICT_FREE:
14827
 * @str:  a string
14828
 *
14829
 * Free a string if it is not owned by the "dict" dictionary in the
14830
 * current scope
14831
 */
14832
#define DICT_FREE(str)            \
14833
0
  if ((str) && ((!dict) ||       \
14834
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14835
0
      xmlFree((char *)(str));
14836
14837
/**
14838
 * xmlCtxtReset:
14839
 * @ctxt: an XML parser context
14840
 *
14841
 * Reset a parser context
14842
 */
14843
void
14844
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14845
0
{
14846
0
    xmlParserInputPtr input;
14847
0
    xmlDictPtr dict;
14848
14849
0
    if (ctxt == NULL)
14850
0
        return;
14851
14852
0
    dict = ctxt->dict;
14853
14854
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14855
0
        xmlFreeInputStream(input);
14856
0
    }
14857
0
    ctxt->inputNr = 0;
14858
0
    ctxt->input = NULL;
14859
14860
0
    ctxt->spaceNr = 0;
14861
0
    if (ctxt->spaceTab != NULL) {
14862
0
  ctxt->spaceTab[0] = -1;
14863
0
  ctxt->space = &ctxt->spaceTab[0];
14864
0
    } else {
14865
0
        ctxt->space = NULL;
14866
0
    }
14867
14868
14869
0
    ctxt->nodeNr = 0;
14870
0
    ctxt->node = NULL;
14871
14872
0
    ctxt->nameNr = 0;
14873
0
    ctxt->name = NULL;
14874
14875
0
    ctxt->nsNr = 0;
14876
14877
0
    DICT_FREE(ctxt->version);
14878
0
    ctxt->version = NULL;
14879
0
    DICT_FREE(ctxt->encoding);
14880
0
    ctxt->encoding = NULL;
14881
0
    DICT_FREE(ctxt->directory);
14882
0
    ctxt->directory = NULL;
14883
0
    DICT_FREE(ctxt->extSubURI);
14884
0
    ctxt->extSubURI = NULL;
14885
0
    DICT_FREE(ctxt->extSubSystem);
14886
0
    ctxt->extSubSystem = NULL;
14887
0
    if (ctxt->myDoc != NULL)
14888
0
        xmlFreeDoc(ctxt->myDoc);
14889
0
    ctxt->myDoc = NULL;
14890
14891
0
    ctxt->standalone = -1;
14892
0
    ctxt->hasExternalSubset = 0;
14893
0
    ctxt->hasPErefs = 0;
14894
0
    ctxt->html = 0;
14895
0
    ctxt->external = 0;
14896
0
    ctxt->instate = XML_PARSER_START;
14897
0
    ctxt->token = 0;
14898
14899
0
    ctxt->wellFormed = 1;
14900
0
    ctxt->nsWellFormed = 1;
14901
0
    ctxt->disableSAX = 0;
14902
0
    ctxt->valid = 1;
14903
#if 0
14904
    ctxt->vctxt.userData = ctxt;
14905
    ctxt->vctxt.error = xmlParserValidityError;
14906
    ctxt->vctxt.warning = xmlParserValidityWarning;
14907
#endif
14908
0
    ctxt->record_info = 0;
14909
0
    ctxt->checkIndex = 0;
14910
0
    ctxt->inSubset = 0;
14911
0
    ctxt->errNo = XML_ERR_OK;
14912
0
    ctxt->depth = 0;
14913
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14914
0
    ctxt->catalogs = NULL;
14915
0
    ctxt->nbentities = 0;
14916
0
    ctxt->sizeentities = 0;
14917
0
    ctxt->sizeentcopy = 0;
14918
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14919
14920
0
    if (ctxt->attsDefault != NULL) {
14921
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14922
0
        ctxt->attsDefault = NULL;
14923
0
    }
14924
0
    if (ctxt->attsSpecial != NULL) {
14925
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14926
0
        ctxt->attsSpecial = NULL;
14927
0
    }
14928
14929
0
#ifdef LIBXML_CATALOG_ENABLED
14930
0
    if (ctxt->catalogs != NULL)
14931
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14932
0
#endif
14933
0
    if (ctxt->lastError.code != XML_ERR_OK)
14934
0
        xmlResetError(&ctxt->lastError);
14935
0
}
14936
14937
/**
14938
 * xmlCtxtResetPush:
14939
 * @ctxt: an XML parser context
14940
 * @chunk:  a pointer to an array of chars
14941
 * @size:  number of chars in the array
14942
 * @filename:  an optional file name or URI
14943
 * @encoding:  the document encoding, or NULL
14944
 *
14945
 * Reset a push parser context
14946
 *
14947
 * Returns 0 in case of success and 1 in case of error
14948
 */
14949
int
14950
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14951
                 int size, const char *filename, const char *encoding)
14952
0
{
14953
0
    xmlParserInputPtr inputStream;
14954
0
    xmlParserInputBufferPtr buf;
14955
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14956
14957
0
    if (ctxt == NULL)
14958
0
        return(1);
14959
14960
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14961
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14962
14963
0
    buf = xmlAllocParserInputBuffer(enc);
14964
0
    if (buf == NULL)
14965
0
        return(1);
14966
14967
0
    if (ctxt == NULL) {
14968
0
        xmlFreeParserInputBuffer(buf);
14969
0
        return(1);
14970
0
    }
14971
14972
0
    xmlCtxtReset(ctxt);
14973
14974
0
    if (filename == NULL) {
14975
0
        ctxt->directory = NULL;
14976
0
    } else {
14977
0
        ctxt->directory = xmlParserGetDirectory(filename);
14978
0
    }
14979
14980
0
    inputStream = xmlNewInputStream(ctxt);
14981
0
    if (inputStream == NULL) {
14982
0
        xmlFreeParserInputBuffer(buf);
14983
0
        return(1);
14984
0
    }
14985
14986
0
    if (filename == NULL)
14987
0
        inputStream->filename = NULL;
14988
0
    else
14989
0
        inputStream->filename = (char *)
14990
0
            xmlCanonicPath((const xmlChar *) filename);
14991
0
    inputStream->buf = buf;
14992
0
    xmlBufResetInput(buf->buffer, inputStream);
14993
14994
0
    inputPush(ctxt, inputStream);
14995
14996
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14997
0
        (ctxt->input->buf != NULL)) {
14998
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14999
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
15000
15001
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15002
15003
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15004
#ifdef DEBUG_PUSH
15005
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15006
#endif
15007
0
    }
15008
15009
0
    if (encoding != NULL) {
15010
0
        xmlCharEncodingHandlerPtr hdlr;
15011
15012
0
        if (ctxt->encoding != NULL)
15013
0
      xmlFree((xmlChar *) ctxt->encoding);
15014
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15015
15016
0
        hdlr = xmlFindCharEncodingHandler(encoding);
15017
0
        if (hdlr != NULL) {
15018
0
            xmlSwitchToEncoding(ctxt, hdlr);
15019
0
  } else {
15020
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15021
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
15022
0
        }
15023
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
15024
0
        xmlSwitchEncoding(ctxt, enc);
15025
0
    }
15026
15027
0
    return(0);
15028
0
}
15029
15030
15031
/**
15032
 * xmlCtxtUseOptionsInternal:
15033
 * @ctxt: an XML parser context
15034
 * @options:  a combination of xmlParserOption
15035
 * @encoding:  the user provided encoding to use
15036
 *
15037
 * Applies the options to the parser context
15038
 *
15039
 * Returns 0 in case of success, the set of unknown or unimplemented options
15040
 *         in case of error.
15041
 */
15042
static int
15043
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15044
976k
{
15045
976k
    if (ctxt == NULL)
15046
0
        return(-1);
15047
976k
    if (encoding != NULL) {
15048
0
        if (ctxt->encoding != NULL)
15049
0
      xmlFree((xmlChar *) ctxt->encoding);
15050
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15051
0
    }
15052
976k
    if (options & XML_PARSE_RECOVER) {
15053
637k
        ctxt->recovery = 1;
15054
637k
        options -= XML_PARSE_RECOVER;
15055
637k
  ctxt->options |= XML_PARSE_RECOVER;
15056
637k
    } else
15057
338k
        ctxt->recovery = 0;
15058
976k
    if (options & XML_PARSE_DTDLOAD) {
15059
715k
        ctxt->loadsubset = XML_DETECT_IDS;
15060
715k
        options -= XML_PARSE_DTDLOAD;
15061
715k
  ctxt->options |= XML_PARSE_DTDLOAD;
15062
715k
    } else
15063
260k
        ctxt->loadsubset = 0;
15064
976k
    if (options & XML_PARSE_DTDATTR) {
15065
433k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15066
433k
        options -= XML_PARSE_DTDATTR;
15067
433k
  ctxt->options |= XML_PARSE_DTDATTR;
15068
433k
    }
15069
976k
    if (options & XML_PARSE_NOENT) {
15070
576k
        ctxt->replaceEntities = 1;
15071
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
15072
576k
        options -= XML_PARSE_NOENT;
15073
576k
  ctxt->options |= XML_PARSE_NOENT;
15074
576k
    } else
15075
399k
        ctxt->replaceEntities = 0;
15076
976k
    if (options & XML_PARSE_PEDANTIC) {
15077
167k
        ctxt->pedantic = 1;
15078
167k
        options -= XML_PARSE_PEDANTIC;
15079
167k
  ctxt->options |= XML_PARSE_PEDANTIC;
15080
167k
    } else
15081
809k
        ctxt->pedantic = 0;
15082
976k
    if (options & XML_PARSE_NOBLANKS) {
15083
453k
        ctxt->keepBlanks = 0;
15084
453k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15085
453k
        options -= XML_PARSE_NOBLANKS;
15086
453k
  ctxt->options |= XML_PARSE_NOBLANKS;
15087
453k
    } else
15088
523k
        ctxt->keepBlanks = 1;
15089
976k
    if (options & XML_PARSE_DTDVALID) {
15090
390k
        ctxt->validate = 1;
15091
390k
        if (options & XML_PARSE_NOWARNING)
15092
201k
            ctxt->vctxt.warning = NULL;
15093
390k
        if (options & XML_PARSE_NOERROR)
15094
280k
            ctxt->vctxt.error = NULL;
15095
390k
        options -= XML_PARSE_DTDVALID;
15096
390k
  ctxt->options |= XML_PARSE_DTDVALID;
15097
390k
    } else
15098
585k
        ctxt->validate = 0;
15099
976k
    if (options & XML_PARSE_NOWARNING) {
15100
494k
        ctxt->sax->warning = NULL;
15101
494k
        options -= XML_PARSE_NOWARNING;
15102
494k
    }
15103
976k
    if (options & XML_PARSE_NOERROR) {
15104
629k
        ctxt->sax->error = NULL;
15105
629k
        ctxt->sax->fatalError = NULL;
15106
629k
        options -= XML_PARSE_NOERROR;
15107
629k
    }
15108
976k
#ifdef LIBXML_SAX1_ENABLED
15109
976k
    if (options & XML_PARSE_SAX1) {
15110
507k
        ctxt->sax->startElement = xmlSAX2StartElement;
15111
507k
        ctxt->sax->endElement = xmlSAX2EndElement;
15112
507k
        ctxt->sax->startElementNs = NULL;
15113
507k
        ctxt->sax->endElementNs = NULL;
15114
507k
        ctxt->sax->initialized = 1;
15115
507k
        options -= XML_PARSE_SAX1;
15116
507k
  ctxt->options |= XML_PARSE_SAX1;
15117
507k
    }
15118
976k
#endif /* LIBXML_SAX1_ENABLED */
15119
976k
    if (options & XML_PARSE_NODICT) {
15120
358k
        ctxt->dictNames = 0;
15121
358k
        options -= XML_PARSE_NODICT;
15122
358k
  ctxt->options |= XML_PARSE_NODICT;
15123
617k
    } else {
15124
617k
        ctxt->dictNames = 1;
15125
617k
    }
15126
976k
    if (options & XML_PARSE_NOCDATA) {
15127
481k
        ctxt->sax->cdataBlock = NULL;
15128
481k
        options -= XML_PARSE_NOCDATA;
15129
481k
  ctxt->options |= XML_PARSE_NOCDATA;
15130
481k
    }
15131
976k
    if (options & XML_PARSE_NSCLEAN) {
15132
657k
  ctxt->options |= XML_PARSE_NSCLEAN;
15133
657k
        options -= XML_PARSE_NSCLEAN;
15134
657k
    }
15135
976k
    if (options & XML_PARSE_NONET) {
15136
480k
  ctxt->options |= XML_PARSE_NONET;
15137
480k
        options -= XML_PARSE_NONET;
15138
480k
    }
15139
976k
    if (options & XML_PARSE_COMPACT) {
15140
603k
  ctxt->options |= XML_PARSE_COMPACT;
15141
603k
        options -= XML_PARSE_COMPACT;
15142
603k
    }
15143
976k
    if (options & XML_PARSE_OLD10) {
15144
420k
  ctxt->options |= XML_PARSE_OLD10;
15145
420k
        options -= XML_PARSE_OLD10;
15146
420k
    }
15147
976k
    if (options & XML_PARSE_NOBASEFIX) {
15148
479k
  ctxt->options |= XML_PARSE_NOBASEFIX;
15149
479k
        options -= XML_PARSE_NOBASEFIX;
15150
479k
    }
15151
976k
    if (options & XML_PARSE_HUGE) {
15152
406k
  ctxt->options |= XML_PARSE_HUGE;
15153
406k
        options -= XML_PARSE_HUGE;
15154
406k
        if (ctxt->dict != NULL)
15155
406k
            xmlDictSetLimit(ctxt->dict, 0);
15156
406k
    }
15157
976k
    if (options & XML_PARSE_OLDSAX) {
15158
353k
  ctxt->options |= XML_PARSE_OLDSAX;
15159
353k
        options -= XML_PARSE_OLDSAX;
15160
353k
    }
15161
976k
    if (options & XML_PARSE_IGNORE_ENC) {
15162
613k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
15163
613k
        options -= XML_PARSE_IGNORE_ENC;
15164
613k
    }
15165
976k
    if (options & XML_PARSE_BIG_LINES) {
15166
454k
  ctxt->options |= XML_PARSE_BIG_LINES;
15167
454k
        options -= XML_PARSE_BIG_LINES;
15168
454k
    }
15169
976k
    ctxt->linenumbers = 1;
15170
976k
    return (options);
15171
976k
}
15172
15173
/**
15174
 * xmlCtxtUseOptions:
15175
 * @ctxt: an XML parser context
15176
 * @options:  a combination of xmlParserOption
15177
 *
15178
 * Applies the options to the parser context
15179
 *
15180
 * Returns 0 in case of success, the set of unknown or unimplemented options
15181
 *         in case of error.
15182
 */
15183
int
15184
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15185
729k
{
15186
729k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15187
729k
}
15188
15189
/**
15190
 * xmlDoRead:
15191
 * @ctxt:  an XML parser context
15192
 * @URL:  the base URL to use for the document
15193
 * @encoding:  the document encoding, or NULL
15194
 * @options:  a combination of xmlParserOption
15195
 * @reuse:  keep the context for reuse
15196
 *
15197
 * Common front-end for the xmlRead functions
15198
 *
15199
 * Returns the resulting document tree or NULL
15200
 */
15201
static xmlDocPtr
15202
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15203
          int options, int reuse)
15204
246k
{
15205
246k
    xmlDocPtr ret;
15206
15207
246k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15208
246k
    if (encoding != NULL) {
15209
0
        xmlCharEncodingHandlerPtr hdlr;
15210
15211
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15212
0
  if (hdlr != NULL)
15213
0
      xmlSwitchToEncoding(ctxt, hdlr);
15214
0
    }
15215
246k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15216
246k
        (ctxt->input->filename == NULL))
15217
246k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15218
246k
    xmlParseDocument(ctxt);
15219
246k
    if ((ctxt->wellFormed) || ctxt->recovery)
15220
179k
        ret = ctxt->myDoc;
15221
67.8k
    else {
15222
67.8k
        ret = NULL;
15223
67.8k
  if (ctxt->myDoc != NULL) {
15224
63.3k
      xmlFreeDoc(ctxt->myDoc);
15225
63.3k
  }
15226
67.8k
    }
15227
246k
    ctxt->myDoc = NULL;
15228
246k
    if (!reuse) {
15229
246k
  xmlFreeParserCtxt(ctxt);
15230
246k
    }
15231
15232
246k
    return (ret);
15233
246k
}
15234
15235
/**
15236
 * xmlReadDoc:
15237
 * @cur:  a pointer to a zero terminated string
15238
 * @URL:  the base URL to use for the document
15239
 * @encoding:  the document encoding, or NULL
15240
 * @options:  a combination of xmlParserOption
15241
 *
15242
 * parse an XML in-memory document and build a tree.
15243
 *
15244
 * Returns the resulting document tree
15245
 */
15246
xmlDocPtr
15247
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15248
0
{
15249
0
    xmlParserCtxtPtr ctxt;
15250
15251
0
    if (cur == NULL)
15252
0
        return (NULL);
15253
0
    xmlInitParser();
15254
15255
0
    ctxt = xmlCreateDocParserCtxt(cur);
15256
0
    if (ctxt == NULL)
15257
0
        return (NULL);
15258
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15259
0
}
15260
15261
/**
15262
 * xmlReadFile:
15263
 * @filename:  a file or URL
15264
 * @encoding:  the document encoding, or NULL
15265
 * @options:  a combination of xmlParserOption
15266
 *
15267
 * parse an XML file from the filesystem or the network.
15268
 *
15269
 * Returns the resulting document tree
15270
 */
15271
xmlDocPtr
15272
xmlReadFile(const char *filename, const char *encoding, int options)
15273
0
{
15274
0
    xmlParserCtxtPtr ctxt;
15275
15276
0
    xmlInitParser();
15277
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15278
0
    if (ctxt == NULL)
15279
0
        return (NULL);
15280
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15281
0
}
15282
15283
/**
15284
 * xmlReadMemory:
15285
 * @buffer:  a pointer to a char array
15286
 * @size:  the size of the array
15287
 * @URL:  the base URL to use for the document
15288
 * @encoding:  the document encoding, or NULL
15289
 * @options:  a combination of xmlParserOption
15290
 *
15291
 * parse an XML in-memory document and build a tree.
15292
 *
15293
 * Returns the resulting document tree
15294
 */
15295
xmlDocPtr
15296
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15297
247k
{
15298
247k
    xmlParserCtxtPtr ctxt;
15299
15300
247k
    xmlInitParser();
15301
247k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15302
247k
    if (ctxt == NULL)
15303
28
        return (NULL);
15304
246k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15305
247k
}
15306
15307
/**
15308
 * xmlReadFd:
15309
 * @fd:  an open file descriptor
15310
 * @URL:  the base URL to use for the document
15311
 * @encoding:  the document encoding, or NULL
15312
 * @options:  a combination of xmlParserOption
15313
 *
15314
 * parse an XML from a file descriptor and build a tree.
15315
 * NOTE that the file descriptor will not be closed when the
15316
 *      reader is closed or reset.
15317
 *
15318
 * Returns the resulting document tree
15319
 */
15320
xmlDocPtr
15321
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15322
0
{
15323
0
    xmlParserCtxtPtr ctxt;
15324
0
    xmlParserInputBufferPtr input;
15325
0
    xmlParserInputPtr stream;
15326
15327
0
    if (fd < 0)
15328
0
        return (NULL);
15329
0
    xmlInitParser();
15330
15331
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15332
0
    if (input == NULL)
15333
0
        return (NULL);
15334
0
    input->closecallback = NULL;
15335
0
    ctxt = xmlNewParserCtxt();
15336
0
    if (ctxt == NULL) {
15337
0
        xmlFreeParserInputBuffer(input);
15338
0
        return (NULL);
15339
0
    }
15340
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15341
0
    if (stream == NULL) {
15342
0
        xmlFreeParserInputBuffer(input);
15343
0
  xmlFreeParserCtxt(ctxt);
15344
0
        return (NULL);
15345
0
    }
15346
0
    inputPush(ctxt, stream);
15347
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15348
0
}
15349
15350
/**
15351
 * xmlReadIO:
15352
 * @ioread:  an I/O read function
15353
 * @ioclose:  an I/O close function
15354
 * @ioctx:  an I/O handler
15355
 * @URL:  the base URL to use for the document
15356
 * @encoding:  the document encoding, or NULL
15357
 * @options:  a combination of xmlParserOption
15358
 *
15359
 * parse an XML document from I/O functions and source and build a tree.
15360
 *
15361
 * Returns the resulting document tree
15362
 */
15363
xmlDocPtr
15364
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15365
          void *ioctx, const char *URL, const char *encoding, int options)
15366
0
{
15367
0
    xmlParserCtxtPtr ctxt;
15368
0
    xmlParserInputBufferPtr input;
15369
0
    xmlParserInputPtr stream;
15370
15371
0
    if (ioread == NULL)
15372
0
        return (NULL);
15373
0
    xmlInitParser();
15374
15375
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15376
0
                                         XML_CHAR_ENCODING_NONE);
15377
0
    if (input == NULL) {
15378
0
        if (ioclose != NULL)
15379
0
            ioclose(ioctx);
15380
0
        return (NULL);
15381
0
    }
15382
0
    ctxt = xmlNewParserCtxt();
15383
0
    if (ctxt == NULL) {
15384
0
        xmlFreeParserInputBuffer(input);
15385
0
        return (NULL);
15386
0
    }
15387
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15388
0
    if (stream == NULL) {
15389
0
        xmlFreeParserInputBuffer(input);
15390
0
  xmlFreeParserCtxt(ctxt);
15391
0
        return (NULL);
15392
0
    }
15393
0
    inputPush(ctxt, stream);
15394
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15395
0
}
15396
15397
/**
15398
 * xmlCtxtReadDoc:
15399
 * @ctxt:  an XML parser context
15400
 * @cur:  a pointer to a zero terminated string
15401
 * @URL:  the base URL to use for the document
15402
 * @encoding:  the document encoding, or NULL
15403
 * @options:  a combination of xmlParserOption
15404
 *
15405
 * parse an XML in-memory document and build a tree.
15406
 * This reuses the existing @ctxt parser context
15407
 *
15408
 * Returns the resulting document tree
15409
 */
15410
xmlDocPtr
15411
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15412
               const char *URL, const char *encoding, int options)
15413
0
{
15414
0
    if (cur == NULL)
15415
0
        return (NULL);
15416
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15417
0
                              encoding, options));
15418
0
}
15419
15420
/**
15421
 * xmlCtxtReadFile:
15422
 * @ctxt:  an XML parser context
15423
 * @filename:  a file or URL
15424
 * @encoding:  the document encoding, or NULL
15425
 * @options:  a combination of xmlParserOption
15426
 *
15427
 * parse an XML file from the filesystem or the network.
15428
 * This reuses the existing @ctxt parser context
15429
 *
15430
 * Returns the resulting document tree
15431
 */
15432
xmlDocPtr
15433
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15434
                const char *encoding, int options)
15435
0
{
15436
0
    xmlParserInputPtr stream;
15437
15438
0
    if (filename == NULL)
15439
0
        return (NULL);
15440
0
    if (ctxt == NULL)
15441
0
        return (NULL);
15442
0
    xmlInitParser();
15443
15444
0
    xmlCtxtReset(ctxt);
15445
15446
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15447
0
    if (stream == NULL) {
15448
0
        return (NULL);
15449
0
    }
15450
0
    inputPush(ctxt, stream);
15451
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15452
0
}
15453
15454
/**
15455
 * xmlCtxtReadMemory:
15456
 * @ctxt:  an XML parser context
15457
 * @buffer:  a pointer to a char array
15458
 * @size:  the size of the array
15459
 * @URL:  the base URL to use for the document
15460
 * @encoding:  the document encoding, or NULL
15461
 * @options:  a combination of xmlParserOption
15462
 *
15463
 * parse an XML in-memory document and build a tree.
15464
 * This reuses the existing @ctxt parser context
15465
 *
15466
 * Returns the resulting document tree
15467
 */
15468
xmlDocPtr
15469
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15470
                  const char *URL, const char *encoding, int options)
15471
0
{
15472
0
    xmlParserInputBufferPtr input;
15473
0
    xmlParserInputPtr stream;
15474
15475
0
    if (ctxt == NULL)
15476
0
        return (NULL);
15477
0
    if (buffer == NULL)
15478
0
        return (NULL);
15479
0
    xmlInitParser();
15480
15481
0
    xmlCtxtReset(ctxt);
15482
15483
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15484
0
    if (input == NULL) {
15485
0
  return(NULL);
15486
0
    }
15487
15488
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15489
0
    if (stream == NULL) {
15490
0
  xmlFreeParserInputBuffer(input);
15491
0
  return(NULL);
15492
0
    }
15493
15494
0
    inputPush(ctxt, stream);
15495
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15496
0
}
15497
15498
/**
15499
 * xmlCtxtReadFd:
15500
 * @ctxt:  an XML parser context
15501
 * @fd:  an open file descriptor
15502
 * @URL:  the base URL to use for the document
15503
 * @encoding:  the document encoding, or NULL
15504
 * @options:  a combination of xmlParserOption
15505
 *
15506
 * parse an XML from a file descriptor and build a tree.
15507
 * This reuses the existing @ctxt parser context
15508
 * NOTE that the file descriptor will not be closed when the
15509
 *      reader is closed or reset.
15510
 *
15511
 * Returns the resulting document tree
15512
 */
15513
xmlDocPtr
15514
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15515
              const char *URL, const char *encoding, int options)
15516
0
{
15517
0
    xmlParserInputBufferPtr input;
15518
0
    xmlParserInputPtr stream;
15519
15520
0
    if (fd < 0)
15521
0
        return (NULL);
15522
0
    if (ctxt == NULL)
15523
0
        return (NULL);
15524
0
    xmlInitParser();
15525
15526
0
    xmlCtxtReset(ctxt);
15527
15528
15529
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15530
0
    if (input == NULL)
15531
0
        return (NULL);
15532
0
    input->closecallback = NULL;
15533
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15534
0
    if (stream == NULL) {
15535
0
        xmlFreeParserInputBuffer(input);
15536
0
        return (NULL);
15537
0
    }
15538
0
    inputPush(ctxt, stream);
15539
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15540
0
}
15541
15542
/**
15543
 * xmlCtxtReadIO:
15544
 * @ctxt:  an XML parser context
15545
 * @ioread:  an I/O read function
15546
 * @ioclose:  an I/O close function
15547
 * @ioctx:  an I/O handler
15548
 * @URL:  the base URL to use for the document
15549
 * @encoding:  the document encoding, or NULL
15550
 * @options:  a combination of xmlParserOption
15551
 *
15552
 * parse an XML document from I/O functions and source and build a tree.
15553
 * This reuses the existing @ctxt parser context
15554
 *
15555
 * Returns the resulting document tree
15556
 */
15557
xmlDocPtr
15558
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15559
              xmlInputCloseCallback ioclose, void *ioctx,
15560
        const char *URL,
15561
              const char *encoding, int options)
15562
0
{
15563
0
    xmlParserInputBufferPtr input;
15564
0
    xmlParserInputPtr stream;
15565
15566
0
    if (ioread == NULL)
15567
0
        return (NULL);
15568
0
    if (ctxt == NULL)
15569
0
        return (NULL);
15570
0
    xmlInitParser();
15571
15572
0
    xmlCtxtReset(ctxt);
15573
15574
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15575
0
                                         XML_CHAR_ENCODING_NONE);
15576
0
    if (input == NULL) {
15577
0
        if (ioclose != NULL)
15578
0
            ioclose(ioctx);
15579
0
        return (NULL);
15580
0
    }
15581
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15582
0
    if (stream == NULL) {
15583
0
        xmlFreeParserInputBuffer(input);
15584
0
        return (NULL);
15585
0
    }
15586
0
    inputPush(ctxt, stream);
15587
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15588
0
}
15589