Coverage Report

Created: 2026-04-29 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2-2.9.7/parser.c
Line
Count
Source
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32) && !defined (__CYGWIN__)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <libxml/xmlmemory.h>
53
#include <libxml/threads.h>
54
#include <libxml/globals.h>
55
#include <libxml/tree.h>
56
#include <libxml/parser.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#ifdef LIBXML_CATALOG_ENABLED
65
#include <libxml/catalog.h>
66
#endif
67
#ifdef LIBXML_SCHEMAS_ENABLED
68
#include <libxml/xmlschemastypes.h>
69
#include <libxml/relaxng.h>
70
#endif
71
#ifdef HAVE_CTYPE_H
72
#include <ctype.h>
73
#endif
74
#ifdef HAVE_STDLIB_H
75
#include <stdlib.h>
76
#endif
77
#ifdef HAVE_SYS_STAT_H
78
#include <sys/stat.h>
79
#endif
80
#ifdef HAVE_FCNTL_H
81
#include <fcntl.h>
82
#endif
83
#ifdef HAVE_UNISTD_H
84
#include <unistd.h>
85
#endif
86
#ifdef HAVE_ZLIB_H
87
#include <zlib.h>
88
#endif
89
#ifdef HAVE_LZMA_H
90
#include <lzma.h>
91
#endif
92
93
#include "buf.h"
94
#include "enc.h"
95
96
static void
97
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
98
99
static xmlParserCtxtPtr
100
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
101
                    const xmlChar *base, xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
/************************************************************************
106
 *                  *
107
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
108
 *                  *
109
 ************************************************************************/
110
111
59.3k
#define XML_PARSER_BIG_ENTITY 1000
112
#define XML_PARSER_LOT_ENTITY 5000
113
114
/*
115
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
116
 *    replacement over the size in byte of the input indicates that you have
117
 *    and eponential behaviour. A value of 10 correspond to at least 3 entity
118
 *    replacement per byte of input.
119
 */
120
736k
#define XML_PARSER_NON_LINEAR 10
121
122
/*
123
 * xmlParserEntityCheck
124
 *
125
 * Function to check non-linear entity expansion behaviour
126
 * This is here to detect and stop exponential linear entity expansion
127
 * This is not a limitation of the parser but a safety
128
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
129
 * parser option.
130
 */
131
static int
132
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
133
                     xmlEntityPtr ent, size_t replacement)
134
2.36M
{
135
2.36M
    size_t consumed = 0;
136
137
2.36M
    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
138
0
        return (0);
139
2.36M
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
140
574k
        return (1);
141
142
    /*
143
     * This may look absurd but is needed to detect
144
     * entities problems
145
     */
146
1.78M
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
147
731k
  (ent->content != NULL) && (ent->checked == 0) &&
148
19.8k
  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
149
19.8k
  unsigned long oldnbent = ctxt->nbentities;
150
19.8k
  xmlChar *rep;
151
152
19.8k
  ent->checked = 1;
153
154
19.8k
        ++ctxt->depth;
155
19.8k
  rep = xmlStringDecodeEntities(ctxt, ent->content,
156
19.8k
          XML_SUBSTITUTE_REF, 0, 0, 0);
157
19.8k
        --ctxt->depth;
158
19.8k
  if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
159
10.0k
      ent->content[0] = 0;
160
10.0k
  }
161
162
19.8k
  ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
163
19.8k
  if (rep != NULL) {
164
16.5k
      if (xmlStrchr(rep, '<'))
165
4.12k
    ent->checked |= 1;
166
16.5k
      xmlFree(rep);
167
16.5k
      rep = NULL;
168
16.5k
  }
169
19.8k
    }
170
1.78M
    if (replacement != 0) {
171
0
  if (replacement < XML_MAX_TEXT_LENGTH)
172
0
      return(0);
173
174
        /*
175
   * If the volume of entity copy reaches 10 times the
176
   * amount of parsed data and over the large text threshold
177
   * then that's very likely to be an abuse.
178
   */
179
0
        if (ctxt->input != NULL) {
180
0
      consumed = ctxt->input->consumed +
181
0
                 (ctxt->input->cur - ctxt->input->base);
182
0
  }
183
0
        consumed += ctxt->sizeentities;
184
185
0
        if (replacement < XML_PARSER_NON_LINEAR * consumed)
186
0
      return(0);
187
1.78M
    } else if (size != 0) {
188
        /*
189
         * Do the check based on the replacement size of the entity
190
         */
191
59.3k
        if (size < XML_PARSER_BIG_ENTITY)
192
51.8k
      return(0);
193
194
        /*
195
         * A limit on the amount of text data reasonably used
196
         */
197
7.51k
        if (ctxt->input != NULL) {
198
7.51k
            consumed = ctxt->input->consumed +
199
7.51k
                (ctxt->input->cur - ctxt->input->base);
200
7.51k
        }
201
7.51k
        consumed += ctxt->sizeentities;
202
203
7.51k
        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
204
7.47k
      (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
205
6.76k
            return (0);
206
1.72M
    } else if (ent != NULL) {
207
        /*
208
         * use the number of parsed entities in the replacement
209
         */
210
721k
        size = ent->checked / 2;
211
212
        /*
213
         * The amount of data parsed counting entities size only once
214
         */
215
721k
        if (ctxt->input != NULL) {
216
721k
            consumed = ctxt->input->consumed +
217
721k
                (ctxt->input->cur - ctxt->input->base);
218
721k
        }
219
721k
        consumed += ctxt->sizeentities;
220
221
        /*
222
         * Check the density of entities for the amount of data
223
   * knowing an entity reference will take at least 3 bytes
224
         */
225
721k
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
226
653k
            return (0);
227
1.00M
    } else {
228
        /*
229
         * strange we got no data for checking
230
         */
231
1.00M
  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
232
407k
       (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
233
949k
      (ctxt->nbentities <= 10000))
234
700k
      return (0);
235
1.00M
    }
236
376k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
237
376k
    return (1);
238
1.78M
}
239
240
/**
241
 * xmlParserMaxDepth:
242
 *
243
 * arbitrary depth limit for the XML documents that we allow to
244
 * process. This is not a limitation of the parser but a safety
245
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
246
 * parser option.
247
 */
248
unsigned int xmlParserMaxDepth = 256;
249
250
251
252
#define SAX2 1
253
1.15G
#define XML_PARSER_BIG_BUFFER_SIZE 300
254
3.12G
#define XML_PARSER_BUFFER_SIZE 100
255
152k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
256
257
/**
258
 * XML_PARSER_CHUNK_SIZE
259
 *
260
 * When calling GROW that's the minimal amount of data
261
 * the parser expected to have received. It is not a hard
262
 * limit but an optimization when reading strings like Names
263
 * It is not strictly needed as long as inputs available characters
264
 * are followed by 0, which should be provided by the I/O level
265
 */
266
389M
#define XML_PARSER_CHUNK_SIZE 100
267
268
/*
269
 * List of XML prefixed PI allowed by W3C specs
270
 */
271
272
static const char *xmlW3CPIs[] = {
273
    "xml-stylesheet",
274
    "xml-model",
275
    NULL
276
};
277
278
279
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
280
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
281
                                              const xmlChar **str);
282
283
static xmlParserErrors
284
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
285
                xmlSAXHandlerPtr sax,
286
          void *user_data, int depth, const xmlChar *URL,
287
          const xmlChar *ID, xmlNodePtr *list);
288
289
static int
290
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
291
                          const char *encoding);
292
#ifdef LIBXML_LEGACY_ENABLED
293
static void
294
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
295
                      xmlNodePtr lastNode);
296
#endif /* LIBXML_LEGACY_ENABLED */
297
298
static xmlParserErrors
299
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
300
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
301
302
static int
303
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
304
305
/************************************************************************
306
 *                  *
307
 *    Some factorized error routines        *
308
 *                  *
309
 ************************************************************************/
310
311
/**
312
 * xmlErrAttributeDup:
313
 * @ctxt:  an XML parser context
314
 * @prefix:  the attribute prefix
315
 * @localname:  the attribute localname
316
 *
317
 * Handle a redefinition of attribute error
318
 */
319
static void
320
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
321
                   const xmlChar * localname)
322
130k
{
323
130k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
324
25.1k
        (ctxt->instate == XML_PARSER_EOF))
325
0
  return;
326
130k
    if (ctxt != NULL)
327
130k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
328
329
130k
    if (prefix == NULL)
330
104k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
331
104k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
332
104k
                        (const char *) localname, NULL, NULL, 0, 0,
333
104k
                        "Attribute %s redefined\n", localname);
334
25.6k
    else
335
25.6k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
336
25.6k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
337
25.6k
                        (const char *) prefix, (const char *) localname,
338
25.6k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
339
25.6k
                        localname);
340
130k
    if (ctxt != NULL) {
341
130k
  ctxt->wellFormed = 0;
342
130k
  if (ctxt->recovery == 0)
343
25.2k
      ctxt->disableSAX = 1;
344
130k
    }
345
130k
}
346
347
/**
348
 * xmlFatalErr:
349
 * @ctxt:  an XML parser context
350
 * @error:  the error number
351
 * @extra:  extra information string
352
 *
353
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
354
 */
355
static void
356
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
357
5.50M
{
358
5.50M
    const char *errmsg;
359
360
5.50M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
361
767k
        (ctxt->instate == XML_PARSER_EOF))
362
258k
  return;
363
5.24M
    switch (error) {
364
134k
        case XML_ERR_INVALID_HEX_CHARREF:
365
134k
            errmsg = "CharRef: invalid hexadecimal value";
366
134k
            break;
367
277k
        case XML_ERR_INVALID_DEC_CHARREF:
368
277k
            errmsg = "CharRef: invalid decimal value";
369
277k
            break;
370
0
        case XML_ERR_INVALID_CHARREF:
371
0
            errmsg = "CharRef: invalid value";
372
0
            break;
373
191k
        case XML_ERR_INTERNAL_ERROR:
374
191k
            errmsg = "internal error";
375
191k
            break;
376
0
        case XML_ERR_PEREF_AT_EOF:
377
0
            errmsg = "PEReference at end of document";
378
0
            break;
379
0
        case XML_ERR_PEREF_IN_PROLOG:
380
0
            errmsg = "PEReference in prolog";
381
0
            break;
382
0
        case XML_ERR_PEREF_IN_EPILOG:
383
0
            errmsg = "PEReference in epilog";
384
0
            break;
385
0
        case XML_ERR_PEREF_NO_NAME:
386
0
            errmsg = "PEReference: no name";
387
0
            break;
388
204k
        case XML_ERR_PEREF_SEMICOL_MISSING:
389
204k
            errmsg = "PEReference: expecting ';'";
390
204k
            break;
391
398k
        case XML_ERR_ENTITY_LOOP:
392
398k
            errmsg = "Detected an entity reference loop";
393
398k
            break;
394
0
        case XML_ERR_ENTITY_NOT_STARTED:
395
0
            errmsg = "EntityValue: \" or ' expected";
396
0
            break;
397
1.66k
        case XML_ERR_ENTITY_PE_INTERNAL:
398
1.66k
            errmsg = "PEReferences forbidden in internal subset";
399
1.66k
            break;
400
491
        case XML_ERR_ENTITY_NOT_FINISHED:
401
491
            errmsg = "EntityValue: \" or ' expected";
402
491
            break;
403
251k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
404
251k
            errmsg = "AttValue: \" or ' expected";
405
251k
            break;
406
638k
        case XML_ERR_LT_IN_ATTRIBUTE:
407
638k
            errmsg = "Unescaped '<' not allowed in attributes values";
408
638k
            break;
409
12.2k
        case XML_ERR_LITERAL_NOT_STARTED:
410
12.2k
            errmsg = "SystemLiteral \" or ' expected";
411
12.2k
            break;
412
24.2k
        case XML_ERR_LITERAL_NOT_FINISHED:
413
24.2k
            errmsg = "Unfinished System or Public ID \" or ' expected";
414
24.2k
            break;
415
47.8k
        case XML_ERR_MISPLACED_CDATA_END:
416
47.8k
            errmsg = "Sequence ']]>' not allowed in content";
417
47.8k
            break;
418
8.46k
        case XML_ERR_URI_REQUIRED:
419
8.46k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
420
8.46k
            break;
421
4.47k
        case XML_ERR_PUBID_REQUIRED:
422
4.47k
            errmsg = "PUBLIC, the Public Identifier is missing";
423
4.47k
            break;
424
176k
        case XML_ERR_HYPHEN_IN_COMMENT:
425
176k
            errmsg = "Comment must not contain '--' (double-hyphen)";
426
176k
            break;
427
237k
        case XML_ERR_PI_NOT_STARTED:
428
237k
            errmsg = "xmlParsePI : no target name";
429
237k
            break;
430
7.79k
        case XML_ERR_RESERVED_XML_NAME:
431
7.79k
            errmsg = "Invalid PI name";
432
7.79k
            break;
433
3.46k
        case XML_ERR_NOTATION_NOT_STARTED:
434
3.46k
            errmsg = "NOTATION: Name expected here";
435
3.46k
            break;
436
34.1k
        case XML_ERR_NOTATION_NOT_FINISHED:
437
34.1k
            errmsg = "'>' required to close NOTATION declaration";
438
34.1k
            break;
439
7.89k
        case XML_ERR_VALUE_REQUIRED:
440
7.89k
            errmsg = "Entity value required";
441
7.89k
            break;
442
5.66k
        case XML_ERR_URI_FRAGMENT:
443
5.66k
            errmsg = "Fragment not allowed";
444
5.66k
            break;
445
31.6k
        case XML_ERR_ATTLIST_NOT_STARTED:
446
31.6k
            errmsg = "'(' required to start ATTLIST enumeration";
447
31.6k
            break;
448
3.31k
        case XML_ERR_NMTOKEN_REQUIRED:
449
3.31k
            errmsg = "NmToken expected in ATTLIST enumeration";
450
3.31k
            break;
451
6.22k
        case XML_ERR_ATTLIST_NOT_FINISHED:
452
6.22k
            errmsg = "')' required to finish ATTLIST enumeration";
453
6.22k
            break;
454
12.2k
        case XML_ERR_MIXED_NOT_STARTED:
455
12.2k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
456
12.2k
            break;
457
0
        case XML_ERR_PCDATA_REQUIRED:
458
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
459
0
            break;
460
62.4k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
461
62.4k
            errmsg = "ContentDecl : Name or '(' expected";
462
62.4k
            break;
463
255k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
464
255k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
465
255k
            break;
466
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
467
0
            errmsg =
468
0
                "PEReference: forbidden within markup decl in internal subset";
469
0
            break;
470
1.09M
        case XML_ERR_GT_REQUIRED:
471
1.09M
            errmsg = "expected '>'";
472
1.09M
            break;
473
43
        case XML_ERR_CONDSEC_INVALID:
474
43
            errmsg = "XML conditional section '[' expected";
475
43
            break;
476
338
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
477
338
            errmsg = "Content error in the external subset";
478
338
            break;
479
133
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
480
133
            errmsg =
481
133
                "conditional section INCLUDE or IGNORE keyword expected";
482
133
            break;
483
8.47k
        case XML_ERR_CONDSEC_NOT_FINISHED:
484
8.47k
            errmsg = "XML conditional section not closed";
485
8.47k
            break;
486
0
        case XML_ERR_XMLDECL_NOT_STARTED:
487
0
            errmsg = "Text declaration '<?xml' required";
488
0
            break;
489
20.3k
        case XML_ERR_XMLDECL_NOT_FINISHED:
490
20.3k
            errmsg = "parsing XML declaration: '?>' expected";
491
20.3k
            break;
492
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
493
0
            errmsg = "external parsed entities cannot be standalone";
494
0
            break;
495
791k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
496
791k
            errmsg = "EntityRef: expecting ';'";
497
791k
            break;
498
21.3k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
499
21.3k
            errmsg = "DOCTYPE improperly terminated";
500
21.3k
            break;
501
0
        case XML_ERR_LTSLASH_REQUIRED:
502
0
            errmsg = "EndTag: '</' not found";
503
0
            break;
504
724
        case XML_ERR_EQUAL_REQUIRED:
505
724
            errmsg = "expected '='";
506
724
            break;
507
1.73k
        case XML_ERR_STRING_NOT_CLOSED:
508
1.73k
            errmsg = "String not closed expecting \" or '";
509
1.73k
            break;
510
411
        case XML_ERR_STRING_NOT_STARTED:
511
411
            errmsg = "String not started expecting ' or \"";
512
411
            break;
513
189
        case XML_ERR_ENCODING_NAME:
514
189
            errmsg = "Invalid XML encoding name";
515
189
            break;
516
177
        case XML_ERR_STANDALONE_VALUE:
517
177
            errmsg = "standalone accepts only 'yes' or 'no'";
518
177
            break;
519
26.2k
        case XML_ERR_DOCUMENT_EMPTY:
520
26.2k
            errmsg = "Document is empty";
521
26.2k
            break;
522
80.7k
        case XML_ERR_DOCUMENT_END:
523
80.7k
            errmsg = "Extra content at the end of the document";
524
80.7k
            break;
525
123k
        case XML_ERR_NOT_WELL_BALANCED:
526
123k
            errmsg = "chunk is not well balanced";
527
123k
            break;
528
0
        case XML_ERR_EXTRA_CONTENT:
529
0
            errmsg = "extra content at the end of well balanced chunk";
530
0
            break;
531
20.6k
        case XML_ERR_VERSION_MISSING:
532
20.6k
            errmsg = "Malformed declaration expecting version";
533
20.6k
            break;
534
14.4k
        case XML_ERR_NAME_TOO_LONG:
535
14.4k
            errmsg = "Name too long use XML_PARSE_HUGE option";
536
14.4k
            break;
537
#if 0
538
        case:
539
            errmsg = "";
540
            break;
541
#endif
542
722
        default:
543
722
            errmsg = "Unregistered error message";
544
5.24M
    }
545
5.24M
    if (ctxt != NULL)
546
5.24M
  ctxt->errNo = error;
547
5.24M
    if (info == NULL) {
548
5.03M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
549
5.03M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
550
5.03M
                        errmsg);
551
5.03M
    } else {
552
206k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
553
206k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
554
206k
                        errmsg, info);
555
206k
    }
556
5.24M
    if (ctxt != NULL) {
557
5.24M
  ctxt->wellFormed = 0;
558
5.24M
  if (ctxt->recovery == 0)
559
566k
      ctxt->disableSAX = 1;
560
5.24M
    }
561
5.24M
}
562
563
/**
564
 * xmlFatalErrMsg:
565
 * @ctxt:  an XML parser context
566
 * @error:  the error number
567
 * @msg:  the error message
568
 *
569
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
570
 */
571
static void LIBXML_ATTR_FORMAT(3,0)
572
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
573
               const char *msg)
574
10.4M
{
575
10.4M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
576
1.16M
        (ctxt->instate == XML_PARSER_EOF))
577
2
  return;
578
10.4M
    if (ctxt != NULL)
579
10.4M
  ctxt->errNo = error;
580
10.4M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
581
10.4M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
582
10.4M
    if (ctxt != NULL) {
583
10.4M
  ctxt->wellFormed = 0;
584
10.4M
  if (ctxt->recovery == 0)
585
1.17M
      ctxt->disableSAX = 1;
586
10.4M
    }
587
10.4M
}
588
589
/**
590
 * xmlWarningMsg:
591
 * @ctxt:  an XML parser context
592
 * @error:  the error number
593
 * @msg:  the error message
594
 * @str1:  extra data
595
 * @str2:  extra data
596
 *
597
 * Handle a warning.
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
              const char *msg, const xmlChar *str1, const xmlChar *str2)
602
138k
{
603
138k
    xmlStructuredErrorFunc schannel = NULL;
604
605
138k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
606
8.15k
        (ctxt->instate == XML_PARSER_EOF))
607
0
  return;
608
138k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
609
138k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
610
138k
        schannel = ctxt->sax->serror;
611
138k
    if (ctxt != NULL) {
612
138k
        __xmlRaiseError(schannel,
613
138k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
614
138k
                    ctxt->userData,
615
138k
                    ctxt, NULL, XML_FROM_PARSER, error,
616
138k
                    XML_ERR_WARNING, NULL, 0,
617
138k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
618
138k
        msg, (const char *) str1, (const char *) str2);
619
138k
    } else {
620
0
        __xmlRaiseError(schannel, NULL, NULL,
621
0
                    ctxt, NULL, XML_FROM_PARSER, error,
622
0
                    XML_ERR_WARNING, NULL, 0,
623
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
624
0
        msg, (const char *) str1, (const char *) str2);
625
0
    }
626
138k
}
627
628
/**
629
 * xmlValidityError:
630
 * @ctxt:  an XML parser context
631
 * @error:  the error number
632
 * @msg:  the error message
633
 * @str1:  extra data
634
 *
635
 * Handle a validity error.
636
 */
637
static void LIBXML_ATTR_FORMAT(3,0)
638
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
639
              const char *msg, const xmlChar *str1, const xmlChar *str2)
640
25.1k
{
641
25.1k
    xmlStructuredErrorFunc schannel = NULL;
642
643
25.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
644
0
        (ctxt->instate == XML_PARSER_EOF))
645
0
  return;
646
25.1k
    if (ctxt != NULL) {
647
25.1k
  ctxt->errNo = error;
648
25.1k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
649
25.1k
      schannel = ctxt->sax->serror;
650
25.1k
    }
651
25.1k
    if (ctxt != NULL) {
652
25.1k
        __xmlRaiseError(schannel,
653
25.1k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
654
25.1k
                    ctxt, NULL, XML_FROM_DTD, error,
655
25.1k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
656
25.1k
        (const char *) str2, NULL, 0, 0,
657
25.1k
        msg, (const char *) str1, (const char *) str2);
658
25.1k
  ctxt->valid = 0;
659
25.1k
    } else {
660
0
        __xmlRaiseError(schannel, NULL, NULL,
661
0
                    ctxt, NULL, XML_FROM_DTD, error,
662
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
663
0
        (const char *) str2, NULL, 0, 0,
664
0
        msg, (const char *) str1, (const char *) str2);
665
0
    }
666
25.1k
}
667
668
/**
669
 * xmlFatalErrMsgInt:
670
 * @ctxt:  an XML parser context
671
 * @error:  the error number
672
 * @msg:  the error message
673
 * @val:  an integer value
674
 *
675
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
676
 */
677
static void LIBXML_ATTR_FORMAT(3,0)
678
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
679
                  const char *msg, int val)
680
15.2M
{
681
15.2M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
682
132k
        (ctxt->instate == XML_PARSER_EOF))
683
0
  return;
684
15.2M
    if (ctxt != NULL)
685
15.2M
  ctxt->errNo = error;
686
15.2M
    __xmlRaiseError(NULL, NULL, NULL,
687
15.2M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
688
15.2M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
689
15.2M
    if (ctxt != NULL) {
690
15.2M
  ctxt->wellFormed = 0;
691
15.2M
  if (ctxt->recovery == 0)
692
133k
      ctxt->disableSAX = 1;
693
15.2M
    }
694
15.2M
}
695
696
/**
697
 * xmlFatalErrMsgStrIntStr:
698
 * @ctxt:  an XML parser context
699
 * @error:  the error number
700
 * @msg:  the error message
701
 * @str1:  an string info
702
 * @val:  an integer value
703
 * @str2:  an string info
704
 *
705
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706
 */
707
static void LIBXML_ATTR_FORMAT(3,0)
708
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709
                  const char *msg, const xmlChar *str1, int val,
710
      const xmlChar *str2)
711
4.03M
{
712
4.03M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
713
1.68M
        (ctxt->instate == XML_PARSER_EOF))
714
0
  return;
715
4.03M
    if (ctxt != NULL)
716
4.03M
  ctxt->errNo = error;
717
4.03M
    __xmlRaiseError(NULL, NULL, NULL,
718
4.03M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
719
4.03M
                    NULL, 0, (const char *) str1, (const char *) str2,
720
4.03M
        NULL, val, 0, msg, str1, val, str2);
721
4.03M
    if (ctxt != NULL) {
722
4.03M
  ctxt->wellFormed = 0;
723
4.03M
  if (ctxt->recovery == 0)
724
1.68M
      ctxt->disableSAX = 1;
725
4.03M
    }
726
4.03M
}
727
728
/**
729
 * xmlFatalErrMsgStr:
730
 * @ctxt:  an XML parser context
731
 * @error:  the error number
732
 * @msg:  the error message
733
 * @val:  a string value
734
 *
735
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
736
 */
737
static void LIBXML_ATTR_FORMAT(3,0)
738
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
739
                  const char *msg, const xmlChar * val)
740
7.08M
{
741
7.08M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
742
320k
        (ctxt->instate == XML_PARSER_EOF))
743
0
  return;
744
7.08M
    if (ctxt != NULL)
745
7.08M
  ctxt->errNo = error;
746
7.08M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
747
7.08M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
748
7.08M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
749
7.08M
                    val);
750
7.08M
    if (ctxt != NULL) {
751
7.08M
  ctxt->wellFormed = 0;
752
7.08M
  if (ctxt->recovery == 0)
753
446k
      ctxt->disableSAX = 1;
754
7.08M
    }
755
7.08M
}
756
757
/**
758
 * xmlErrMsgStr:
759
 * @ctxt:  an XML parser context
760
 * @error:  the error number
761
 * @msg:  the error message
762
 * @val:  a string value
763
 *
764
 * Handle a non fatal parser error
765
 */
766
static void LIBXML_ATTR_FORMAT(3,0)
767
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
768
                  const char *msg, const xmlChar * val)
769
300k
{
770
300k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
771
0
        (ctxt->instate == XML_PARSER_EOF))
772
0
  return;
773
300k
    if (ctxt != NULL)
774
300k
  ctxt->errNo = error;
775
300k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
776
300k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
777
300k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
778
300k
                    val);
779
300k
}
780
781
/**
782
 * xmlNsErr:
783
 * @ctxt:  an XML parser context
784
 * @error:  the error number
785
 * @msg:  the message
786
 * @info1:  extra information string
787
 * @info2:  extra information string
788
 *
789
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
790
 */
791
static void LIBXML_ATTR_FORMAT(3,0)
792
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
793
         const char *msg,
794
         const xmlChar * info1, const xmlChar * info2,
795
         const xmlChar * info3)
796
2.37M
{
797
2.37M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
798
369k
        (ctxt->instate == XML_PARSER_EOF))
799
0
  return;
800
2.37M
    if (ctxt != NULL)
801
2.37M
  ctxt->errNo = error;
802
2.37M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
803
2.37M
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
804
2.37M
                    (const char *) info2, (const char *) info3, 0, 0, msg,
805
2.37M
                    info1, info2, info3);
806
2.37M
    if (ctxt != NULL)
807
2.37M
  ctxt->nsWellFormed = 0;
808
2.37M
}
809
810
/**
811
 * xmlNsWarn
812
 * @ctxt:  an XML parser context
813
 * @error:  the error number
814
 * @msg:  the message
815
 * @info1:  extra information string
816
 * @info2:  extra information string
817
 *
818
 * Handle a namespace warning error
819
 */
820
static void LIBXML_ATTR_FORMAT(3,0)
821
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
822
         const char *msg,
823
         const xmlChar * info1, const xmlChar * info2,
824
         const xmlChar * info3)
825
92.0k
{
826
92.0k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
827
33.5k
        (ctxt->instate == XML_PARSER_EOF))
828
0
  return;
829
92.0k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
830
92.0k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
831
92.0k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
832
92.0k
                    info1, info2, info3);
833
92.0k
}
834
835
/************************************************************************
836
 *                  *
837
 *    Library wide options          *
838
 *                  *
839
 ************************************************************************/
840
841
/**
842
  * xmlHasFeature:
843
  * @feature: the feature to be examined
844
  *
845
  * Examines if the library has been compiled with a given feature.
846
  *
847
  * Returns a non-zero value if the feature exist, otherwise zero.
848
  * Returns zero (0) if the feature does not exist or an unknown
849
  * unknown feature is requested, non-zero otherwise.
850
  */
851
int
852
xmlHasFeature(xmlFeature feature)
853
0
{
854
0
    switch (feature) {
855
0
  case XML_WITH_THREAD:
856
0
#ifdef LIBXML_THREAD_ENABLED
857
0
      return(1);
858
#else
859
      return(0);
860
#endif
861
0
        case XML_WITH_TREE:
862
0
#ifdef LIBXML_TREE_ENABLED
863
0
            return(1);
864
#else
865
            return(0);
866
#endif
867
0
        case XML_WITH_OUTPUT:
868
0
#ifdef LIBXML_OUTPUT_ENABLED
869
0
            return(1);
870
#else
871
            return(0);
872
#endif
873
0
        case XML_WITH_PUSH:
874
0
#ifdef LIBXML_PUSH_ENABLED
875
0
            return(1);
876
#else
877
            return(0);
878
#endif
879
0
        case XML_WITH_READER:
880
0
#ifdef LIBXML_READER_ENABLED
881
0
            return(1);
882
#else
883
            return(0);
884
#endif
885
0
        case XML_WITH_PATTERN:
886
0
#ifdef LIBXML_PATTERN_ENABLED
887
0
            return(1);
888
#else
889
            return(0);
890
#endif
891
0
        case XML_WITH_WRITER:
892
0
#ifdef LIBXML_WRITER_ENABLED
893
0
            return(1);
894
#else
895
            return(0);
896
#endif
897
0
        case XML_WITH_SAX1:
898
0
#ifdef LIBXML_SAX1_ENABLED
899
0
            return(1);
900
#else
901
            return(0);
902
#endif
903
0
        case XML_WITH_FTP:
904
0
#ifdef LIBXML_FTP_ENABLED
905
0
            return(1);
906
#else
907
            return(0);
908
#endif
909
0
        case XML_WITH_HTTP:
910
0
#ifdef LIBXML_HTTP_ENABLED
911
0
            return(1);
912
#else
913
            return(0);
914
#endif
915
0
        case XML_WITH_VALID:
916
0
#ifdef LIBXML_VALID_ENABLED
917
0
            return(1);
918
#else
919
            return(0);
920
#endif
921
0
        case XML_WITH_HTML:
922
0
#ifdef LIBXML_HTML_ENABLED
923
0
            return(1);
924
#else
925
            return(0);
926
#endif
927
0
        case XML_WITH_LEGACY:
928
0
#ifdef LIBXML_LEGACY_ENABLED
929
0
            return(1);
930
#else
931
            return(0);
932
#endif
933
0
        case XML_WITH_C14N:
934
0
#ifdef LIBXML_C14N_ENABLED
935
0
            return(1);
936
#else
937
            return(0);
938
#endif
939
0
        case XML_WITH_CATALOG:
940
0
#ifdef LIBXML_CATALOG_ENABLED
941
0
            return(1);
942
#else
943
            return(0);
944
#endif
945
0
        case XML_WITH_XPATH:
946
0
#ifdef LIBXML_XPATH_ENABLED
947
0
            return(1);
948
#else
949
            return(0);
950
#endif
951
0
        case XML_WITH_XPTR:
952
0
#ifdef LIBXML_XPTR_ENABLED
953
0
            return(1);
954
#else
955
            return(0);
956
#endif
957
0
        case XML_WITH_XINCLUDE:
958
0
#ifdef LIBXML_XINCLUDE_ENABLED
959
0
            return(1);
960
#else
961
            return(0);
962
#endif
963
0
        case XML_WITH_ICONV:
964
0
#ifdef LIBXML_ICONV_ENABLED
965
0
            return(1);
966
#else
967
            return(0);
968
#endif
969
0
        case XML_WITH_ISO8859X:
970
0
#ifdef LIBXML_ISO8859X_ENABLED
971
0
            return(1);
972
#else
973
            return(0);
974
#endif
975
0
        case XML_WITH_UNICODE:
976
0
#ifdef LIBXML_UNICODE_ENABLED
977
0
            return(1);
978
#else
979
            return(0);
980
#endif
981
0
        case XML_WITH_REGEXP:
982
0
#ifdef LIBXML_REGEXP_ENABLED
983
0
            return(1);
984
#else
985
            return(0);
986
#endif
987
0
        case XML_WITH_AUTOMATA:
988
0
#ifdef LIBXML_AUTOMATA_ENABLED
989
0
            return(1);
990
#else
991
            return(0);
992
#endif
993
0
        case XML_WITH_EXPR:
994
0
#ifdef LIBXML_EXPR_ENABLED
995
0
            return(1);
996
#else
997
            return(0);
998
#endif
999
0
        case XML_WITH_SCHEMAS:
1000
0
#ifdef LIBXML_SCHEMAS_ENABLED
1001
0
            return(1);
1002
#else
1003
            return(0);
1004
#endif
1005
0
        case XML_WITH_SCHEMATRON:
1006
0
#ifdef LIBXML_SCHEMATRON_ENABLED
1007
0
            return(1);
1008
#else
1009
            return(0);
1010
#endif
1011
0
        case XML_WITH_MODULES:
1012
0
#ifdef LIBXML_MODULES_ENABLED
1013
0
            return(1);
1014
#else
1015
            return(0);
1016
#endif
1017
0
        case XML_WITH_DEBUG:
1018
0
#ifdef LIBXML_DEBUG_ENABLED
1019
0
            return(1);
1020
#else
1021
            return(0);
1022
#endif
1023
0
        case XML_WITH_DEBUG_MEM:
1024
#ifdef DEBUG_MEMORY_LOCATION
1025
            return(1);
1026
#else
1027
0
            return(0);
1028
0
#endif
1029
0
        case XML_WITH_DEBUG_RUN:
1030
#ifdef LIBXML_DEBUG_RUNTIME
1031
            return(1);
1032
#else
1033
0
            return(0);
1034
0
#endif
1035
0
        case XML_WITH_ZLIB:
1036
#ifdef LIBXML_ZLIB_ENABLED
1037
            return(1);
1038
#else
1039
0
            return(0);
1040
0
#endif
1041
0
        case XML_WITH_LZMA:
1042
#ifdef LIBXML_LZMA_ENABLED
1043
            return(1);
1044
#else
1045
0
            return(0);
1046
0
#endif
1047
0
        case XML_WITH_ICU:
1048
#ifdef LIBXML_ICU_ENABLED
1049
            return(1);
1050
#else
1051
0
            return(0);
1052
0
#endif
1053
0
        default:
1054
0
      break;
1055
0
     }
1056
0
     return(0);
1057
0
}
1058
1059
/************************************************************************
1060
 *                  *
1061
 *    SAX2 defaulted attributes handling      *
1062
 *                  *
1063
 ************************************************************************/
1064
1065
/**
1066
 * xmlDetectSAX2:
1067
 * @ctxt:  an XML parser context
1068
 *
1069
 * Do the SAX2 detection and specific intialization
1070
 */
1071
static void
1072
2.60M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1073
2.60M
    if (ctxt == NULL) return;
1074
2.60M
#ifdef LIBXML_SAX1_ENABLED
1075
2.60M
    if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1076
2.60M
        ((ctxt->sax->startElementNs != NULL) ||
1077
2.60M
         (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1078
#else
1079
    ctxt->sax2 = 1;
1080
#endif /* LIBXML_SAX1_ENABLED */
1081
1082
2.60M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1083
2.60M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1084
2.60M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1085
2.60M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1086
2.60M
    (ctxt->str_xml_ns == NULL)) {
1087
0
        xmlErrMemory(ctxt, NULL);
1088
0
    }
1089
2.60M
}
1090
1091
typedef struct _xmlDefAttrs xmlDefAttrs;
1092
typedef xmlDefAttrs *xmlDefAttrsPtr;
1093
struct _xmlDefAttrs {
1094
    int nbAttrs;  /* number of defaulted attributes on that element */
1095
    int maxAttrs;       /* the size of the array */
1096
#if __STDC_VERSION__ >= 199901L
1097
    /* Using a C99 flexible array member avoids UBSan errors. */
1098
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1099
#else
1100
    const xmlChar *values[5];
1101
#endif
1102
};
1103
1104
/**
1105
 * xmlAttrNormalizeSpace:
1106
 * @src: the source string
1107
 * @dst: the target string
1108
 *
1109
 * Normalize the space in non CDATA attribute values:
1110
 * If the attribute type is not CDATA, then the XML processor MUST further
1111
 * process the normalized attribute value by discarding any leading and
1112
 * trailing space (#x20) characters, and by replacing sequences of space
1113
 * (#x20) characters by a single space (#x20) character.
1114
 * Note that the size of dst need to be at least src, and if one doesn't need
1115
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1116
 * passing src as dst is just fine.
1117
 *
1118
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1119
 *         is needed.
1120
 */
1121
static xmlChar *
1122
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1123
210k
{
1124
210k
    if ((src == NULL) || (dst == NULL))
1125
0
        return(NULL);
1126
1127
248k
    while (*src == 0x20) src++;
1128
50.4M
    while (*src != 0) {
1129
50.2M
  if (*src == 0x20) {
1130
825k
      while (*src == 0x20) src++;
1131
278k
      if (*src != 0)
1132
238k
    *dst++ = 0x20;
1133
49.9M
  } else {
1134
49.9M
      *dst++ = *src++;
1135
49.9M
  }
1136
50.2M
    }
1137
210k
    *dst = 0;
1138
210k
    if (dst == src)
1139
130k
       return(NULL);
1140
79.9k
    return(dst);
1141
210k
}
1142
1143
/**
1144
 * xmlAttrNormalizeSpace2:
1145
 * @src: the source string
1146
 *
1147
 * Normalize the space in non CDATA attribute values, a slightly more complex
1148
 * front end to avoid allocation problems when running on attribute values
1149
 * coming from the input.
1150
 *
1151
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1152
 *         is needed.
1153
 */
1154
static const xmlChar *
1155
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1156
72.7k
{
1157
72.7k
    int i;
1158
72.7k
    int remove_head = 0;
1159
72.7k
    int need_realloc = 0;
1160
72.7k
    const xmlChar *cur;
1161
1162
72.7k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1163
0
        return(NULL);
1164
72.7k
    i = *len;
1165
72.7k
    if (i <= 0)
1166
12.9k
        return(NULL);
1167
1168
59.8k
    cur = src;
1169
88.9k
    while (*cur == 0x20) {
1170
29.0k
        cur++;
1171
29.0k
  remove_head++;
1172
29.0k
    }
1173
741k
    while (*cur != 0) {
1174
693k
  if (*cur == 0x20) {
1175
45.5k
      cur++;
1176
45.5k
      if ((*cur == 0x20) || (*cur == 0)) {
1177
11.9k
          need_realloc = 1;
1178
11.9k
    break;
1179
11.9k
      }
1180
45.5k
  } else
1181
647k
      cur++;
1182
693k
    }
1183
59.8k
    if (need_realloc) {
1184
11.9k
        xmlChar *ret;
1185
1186
11.9k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1187
11.9k
  if (ret == NULL) {
1188
0
      xmlErrMemory(ctxt, NULL);
1189
0
      return(NULL);
1190
0
  }
1191
11.9k
  xmlAttrNormalizeSpace(ret, ret);
1192
11.9k
  *len = (int) strlen((const char *)ret);
1193
11.9k
        return(ret);
1194
47.9k
    } else if (remove_head) {
1195
13.4k
        *len -= remove_head;
1196
13.4k
        memmove(src, src + remove_head, 1 + *len);
1197
13.4k
  return(src);
1198
13.4k
    }
1199
34.4k
    return(NULL);
1200
59.8k
}
1201
1202
/**
1203
 * xmlAddDefAttrs:
1204
 * @ctxt:  an XML parser context
1205
 * @fullname:  the element fullname
1206
 * @fullattr:  the attribute fullname
1207
 * @value:  the attribute value
1208
 *
1209
 * Add a defaulted attribute for an element
1210
 */
1211
static void
1212
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1213
               const xmlChar *fullname,
1214
               const xmlChar *fullattr,
1215
202k
               const xmlChar *value) {
1216
202k
    xmlDefAttrsPtr defaults;
1217
202k
    int len;
1218
202k
    const xmlChar *name;
1219
202k
    const xmlChar *prefix;
1220
1221
    /*
1222
     * Allows to detect attribute redefinitions
1223
     */
1224
202k
    if (ctxt->attsSpecial != NULL) {
1225
190k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1226
115k
      return;
1227
190k
    }
1228
1229
86.7k
    if (ctxt->attsDefault == NULL) {
1230
12.7k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1231
12.7k
  if (ctxt->attsDefault == NULL)
1232
0
      goto mem_error;
1233
12.7k
    }
1234
1235
    /*
1236
     * split the element name into prefix:localname , the string found
1237
     * are within the DTD and then not associated to namespace names.
1238
     */
1239
86.7k
    name = xmlSplitQName3(fullname, &len);
1240
86.7k
    if (name == NULL) {
1241
48.7k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1242
48.7k
  prefix = NULL;
1243
48.7k
    } else {
1244
38.0k
        name = xmlDictLookup(ctxt->dict, name, -1);
1245
38.0k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1246
38.0k
    }
1247
1248
    /*
1249
     * make sure there is some storage
1250
     */
1251
86.7k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1252
86.7k
    if (defaults == NULL) {
1253
17.9k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1254
17.9k
                     (4 * 5) * sizeof(const xmlChar *));
1255
17.9k
  if (defaults == NULL)
1256
0
      goto mem_error;
1257
17.9k
  defaults->nbAttrs = 0;
1258
17.9k
  defaults->maxAttrs = 4;
1259
17.9k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1260
17.9k
                          defaults, NULL) < 0) {
1261
0
      xmlFree(defaults);
1262
0
      goto mem_error;
1263
0
  }
1264
68.8k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1265
10.0k
        xmlDefAttrsPtr temp;
1266
1267
10.0k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1268
10.0k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1269
10.0k
  if (temp == NULL)
1270
0
      goto mem_error;
1271
10.0k
  defaults = temp;
1272
10.0k
  defaults->maxAttrs *= 2;
1273
10.0k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1274
10.0k
                          defaults, NULL) < 0) {
1275
0
      xmlFree(defaults);
1276
0
      goto mem_error;
1277
0
  }
1278
10.0k
    }
1279
1280
    /*
1281
     * Split the element name into prefix:localname , the string found
1282
     * are within the DTD and hen not associated to namespace names.
1283
     */
1284
86.7k
    name = xmlSplitQName3(fullattr, &len);
1285
86.7k
    if (name == NULL) {
1286
47.3k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1287
47.3k
  prefix = NULL;
1288
47.3k
    } else {
1289
39.4k
        name = xmlDictLookup(ctxt->dict, name, -1);
1290
39.4k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1291
39.4k
    }
1292
1293
86.7k
    defaults->values[5 * defaults->nbAttrs] = name;
1294
86.7k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1295
    /* intern the string and precompute the end */
1296
86.7k
    len = xmlStrlen(value);
1297
86.7k
    value = xmlDictLookup(ctxt->dict, value, len);
1298
86.7k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1299
86.7k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1300
86.7k
    if (ctxt->external)
1301
0
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1302
86.7k
    else
1303
86.7k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1304
86.7k
    defaults->nbAttrs++;
1305
1306
86.7k
    return;
1307
1308
0
mem_error:
1309
0
    xmlErrMemory(ctxt, NULL);
1310
0
    return;
1311
86.7k
}
1312
1313
/**
1314
 * xmlAddSpecialAttr:
1315
 * @ctxt:  an XML parser context
1316
 * @fullname:  the element fullname
1317
 * @fullattr:  the attribute fullname
1318
 * @type:  the attribute type
1319
 *
1320
 * Register this attribute type
1321
 */
1322
static void
1323
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1324
      const xmlChar *fullname,
1325
      const xmlChar *fullattr,
1326
      int type)
1327
234k
{
1328
234k
    if (ctxt->attsSpecial == NULL) {
1329
13.7k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1330
13.7k
  if (ctxt->attsSpecial == NULL)
1331
0
      goto mem_error;
1332
13.7k
    }
1333
1334
234k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1335
142k
        return;
1336
1337
92.0k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1338
92.0k
                     (void *) (ptrdiff_t) type);
1339
92.0k
    return;
1340
1341
0
mem_error:
1342
0
    xmlErrMemory(ctxt, NULL);
1343
0
    return;
1344
234k
}
1345
1346
/**
1347
 * xmlCleanSpecialAttrCallback:
1348
 *
1349
 * Removes CDATA attributes from the special attribute table
1350
 */
1351
static void
1352
xmlCleanSpecialAttrCallback(void *payload, void *data,
1353
                            const xmlChar *fullname, const xmlChar *fullattr,
1354
92.0k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1355
92.0k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1356
1357
92.0k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1358
9.24k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1359
9.24k
    }
1360
92.0k
}
1361
1362
/**
1363
 * xmlCleanSpecialAttr:
1364
 * @ctxt:  an XML parser context
1365
 *
1366
 * Trim the list of attributes defined to remove all those of type
1367
 * CDATA as they are not special. This call should be done when finishing
1368
 * to parse the DTD and before starting to parse the document root.
1369
 */
1370
static void
1371
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1372
60.8k
{
1373
60.8k
    if (ctxt->attsSpecial == NULL)
1374
47.1k
        return;
1375
1376
13.6k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1377
1378
13.6k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1379
183
        xmlHashFree(ctxt->attsSpecial, NULL);
1380
183
        ctxt->attsSpecial = NULL;
1381
183
    }
1382
13.6k
    return;
1383
60.8k
}
1384
1385
/**
1386
 * xmlCheckLanguageID:
1387
 * @lang:  pointer to the string value
1388
 *
1389
 * Checks that the value conforms to the LanguageID production:
1390
 *
1391
 * NOTE: this is somewhat deprecated, those productions were removed from
1392
 *       the XML Second edition.
1393
 *
1394
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1395
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1396
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1397
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1398
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1399
 * [38] Subcode ::= ([a-z] | [A-Z])+
1400
 *
1401
 * The current REC reference the sucessors of RFC 1766, currently 5646
1402
 *
1403
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1404
 * langtag       = language
1405
 *                 ["-" script]
1406
 *                 ["-" region]
1407
 *                 *("-" variant)
1408
 *                 *("-" extension)
1409
 *                 ["-" privateuse]
1410
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1411
 *                 ["-" extlang]       ; sometimes followed by
1412
 *                                     ; extended language subtags
1413
 *               / 4ALPHA              ; or reserved for future use
1414
 *               / 5*8ALPHA            ; or registered language subtag
1415
 *
1416
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1417
 *                 *2("-" 3ALPHA)      ; permanently reserved
1418
 *
1419
 * script        = 4ALPHA              ; ISO 15924 code
1420
 *
1421
 * region        = 2ALPHA              ; ISO 3166-1 code
1422
 *               / 3DIGIT              ; UN M.49 code
1423
 *
1424
 * variant       = 5*8alphanum         ; registered variants
1425
 *               / (DIGIT 3alphanum)
1426
 *
1427
 * extension     = singleton 1*("-" (2*8alphanum))
1428
 *
1429
 *                                     ; Single alphanumerics
1430
 *                                     ; "x" reserved for private use
1431
 * singleton     = DIGIT               ; 0 - 9
1432
 *               / %x41-57             ; A - W
1433
 *               / %x59-5A             ; Y - Z
1434
 *               / %x61-77             ; a - w
1435
 *               / %x79-7A             ; y - z
1436
 *
1437
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1438
 * The parser below doesn't try to cope with extension or privateuse
1439
 * that could be added but that's not interoperable anyway
1440
 *
1441
 * Returns 1 if correct 0 otherwise
1442
 **/
1443
int
1444
xmlCheckLanguageID(const xmlChar * lang)
1445
0
{
1446
0
    const xmlChar *cur = lang, *nxt;
1447
1448
0
    if (cur == NULL)
1449
0
        return (0);
1450
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1451
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1452
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1453
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1454
        /*
1455
         * Still allow IANA code and user code which were coming
1456
         * from the previous version of the XML-1.0 specification
1457
         * it's deprecated but we should not fail
1458
         */
1459
0
        cur += 2;
1460
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1461
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1462
0
            cur++;
1463
0
        return(cur[0] == 0);
1464
0
    }
1465
0
    nxt = cur;
1466
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1467
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1468
0
           nxt++;
1469
0
    if (nxt - cur >= 4) {
1470
        /*
1471
         * Reserved
1472
         */
1473
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1474
0
            return(0);
1475
0
        return(1);
1476
0
    }
1477
0
    if (nxt - cur < 2)
1478
0
        return(0);
1479
    /* we got an ISO 639 code */
1480
0
    if (nxt[0] == 0)
1481
0
        return(1);
1482
0
    if (nxt[0] != '-')
1483
0
        return(0);
1484
1485
0
    nxt++;
1486
0
    cur = nxt;
1487
    /* now we can have extlang or script or region or variant */
1488
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1489
0
        goto region_m49;
1490
1491
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1492
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1493
0
           nxt++;
1494
0
    if (nxt - cur == 4)
1495
0
        goto script;
1496
0
    if (nxt - cur == 2)
1497
0
        goto region;
1498
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1499
0
        goto variant;
1500
0
    if (nxt - cur != 3)
1501
0
        return(0);
1502
    /* we parsed an extlang */
1503
0
    if (nxt[0] == 0)
1504
0
        return(1);
1505
0
    if (nxt[0] != '-')
1506
0
        return(0);
1507
1508
0
    nxt++;
1509
0
    cur = nxt;
1510
    /* now we can have script or region or variant */
1511
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1512
0
        goto region_m49;
1513
1514
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1515
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1516
0
           nxt++;
1517
0
    if (nxt - cur == 2)
1518
0
        goto region;
1519
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1520
0
        goto variant;
1521
0
    if (nxt - cur != 4)
1522
0
        return(0);
1523
    /* we parsed a script */
1524
0
script:
1525
0
    if (nxt[0] == 0)
1526
0
        return(1);
1527
0
    if (nxt[0] != '-')
1528
0
        return(0);
1529
1530
0
    nxt++;
1531
0
    cur = nxt;
1532
    /* now we can have region or variant */
1533
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1534
0
        goto region_m49;
1535
1536
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1538
0
           nxt++;
1539
1540
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1541
0
        goto variant;
1542
0
    if (nxt - cur != 2)
1543
0
        return(0);
1544
    /* we parsed a region */
1545
0
region:
1546
0
    if (nxt[0] == 0)
1547
0
        return(1);
1548
0
    if (nxt[0] != '-')
1549
0
        return(0);
1550
1551
0
    nxt++;
1552
0
    cur = nxt;
1553
    /* now we can just have a variant */
1554
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1555
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1556
0
           nxt++;
1557
1558
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1559
0
        return(0);
1560
1561
    /* we parsed a variant */
1562
0
variant:
1563
0
    if (nxt[0] == 0)
1564
0
        return(1);
1565
0
    if (nxt[0] != '-')
1566
0
        return(0);
1567
    /* extensions and private use subtags not checked */
1568
0
    return (1);
1569
1570
0
region_m49:
1571
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1572
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1573
0
        nxt += 3;
1574
0
        goto region;
1575
0
    }
1576
0
    return(0);
1577
0
}
1578
1579
/************************************************************************
1580
 *                  *
1581
 *    Parser stacks related functions and macros    *
1582
 *                  *
1583
 ************************************************************************/
1584
1585
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1586
                                            const xmlChar ** str);
1587
1588
#ifdef SAX2
1589
/**
1590
 * nsPush:
1591
 * @ctxt:  an XML parser context
1592
 * @prefix:  the namespace prefix or NULL
1593
 * @URL:  the namespace name
1594
 *
1595
 * Pushes a new parser namespace on top of the ns stack
1596
 *
1597
 * Returns -1 in case of error, -2 if the namespace should be discarded
1598
 *     and the index in the stack otherwise.
1599
 */
1600
static int
1601
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1602
2.04M
{
1603
2.04M
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1604
0
        int i;
1605
0
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1606
0
      if (ctxt->nsTab[i] == prefix) {
1607
    /* in scope */
1608
0
          if (ctxt->nsTab[i + 1] == URL)
1609
0
        return(-2);
1610
    /* out of scope keep it */
1611
0
    break;
1612
0
      }
1613
0
  }
1614
0
    }
1615
2.04M
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1616
132k
  ctxt->nsMax = 10;
1617
132k
  ctxt->nsNr = 0;
1618
132k
  ctxt->nsTab = (const xmlChar **)
1619
132k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1620
132k
  if (ctxt->nsTab == NULL) {
1621
0
      xmlErrMemory(ctxt, NULL);
1622
0
      ctxt->nsMax = 0;
1623
0
            return (-1);
1624
0
  }
1625
1.91M
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1626
35.2k
        const xmlChar ** tmp;
1627
35.2k
        ctxt->nsMax *= 2;
1628
35.2k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1629
35.2k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1630
35.2k
        if (tmp == NULL) {
1631
0
            xmlErrMemory(ctxt, NULL);
1632
0
      ctxt->nsMax /= 2;
1633
0
            return (-1);
1634
0
        }
1635
35.2k
  ctxt->nsTab = tmp;
1636
35.2k
    }
1637
2.04M
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1638
2.04M
    ctxt->nsTab[ctxt->nsNr++] = URL;
1639
2.04M
    return (ctxt->nsNr);
1640
2.04M
}
1641
/**
1642
 * nsPop:
1643
 * @ctxt: an XML parser context
1644
 * @nr:  the number to pop
1645
 *
1646
 * Pops the top @nr parser prefix/namespace from the ns stack
1647
 *
1648
 * Returns the number of namespaces removed
1649
 */
1650
static int
1651
nsPop(xmlParserCtxtPtr ctxt, int nr)
1652
331k
{
1653
331k
    int i;
1654
1655
331k
    if (ctxt->nsTab == NULL) return(0);
1656
331k
    if (ctxt->nsNr < nr) {
1657
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1658
0
        nr = ctxt->nsNr;
1659
0
    }
1660
331k
    if (ctxt->nsNr <= 0)
1661
0
        return (0);
1662
1663
1.19M
    for (i = 0;i < nr;i++) {
1664
858k
         ctxt->nsNr--;
1665
858k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1666
858k
    }
1667
331k
    return(nr);
1668
331k
}
1669
#endif
1670
1671
static int
1672
137k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1673
137k
    const xmlChar **atts;
1674
137k
    int *attallocs;
1675
137k
    int maxatts;
1676
1677
137k
    if (ctxt->atts == NULL) {
1678
134k
  maxatts = 55; /* allow for 10 attrs by default */
1679
134k
  atts = (const xmlChar **)
1680
134k
         xmlMalloc(maxatts * sizeof(xmlChar *));
1681
134k
  if (atts == NULL) goto mem_error;
1682
134k
  ctxt->atts = atts;
1683
134k
  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1684
134k
  if (attallocs == NULL) goto mem_error;
1685
134k
  ctxt->attallocs = attallocs;
1686
134k
  ctxt->maxatts = maxatts;
1687
134k
    } else if (nr + 5 > ctxt->maxatts) {
1688
3.17k
  maxatts = (nr + 5) * 2;
1689
3.17k
  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1690
3.17k
             maxatts * sizeof(const xmlChar *));
1691
3.17k
  if (atts == NULL) goto mem_error;
1692
3.17k
  ctxt->atts = atts;
1693
3.17k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1694
3.17k
                               (maxatts / 5) * sizeof(int));
1695
3.17k
  if (attallocs == NULL) goto mem_error;
1696
3.17k
  ctxt->attallocs = attallocs;
1697
3.17k
  ctxt->maxatts = maxatts;
1698
3.17k
    }
1699
137k
    return(ctxt->maxatts);
1700
0
mem_error:
1701
0
    xmlErrMemory(ctxt, NULL);
1702
0
    return(-1);
1703
137k
}
1704
1705
/**
1706
 * inputPush:
1707
 * @ctxt:  an XML parser context
1708
 * @value:  the parser input
1709
 *
1710
 * Pushes a new parser input on top of the input stack
1711
 *
1712
 * Returns -1 in case of error, the index in the stack otherwise
1713
 */
1714
int
1715
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1716
912k
{
1717
912k
    if ((ctxt == NULL) || (value == NULL))
1718
0
        return(-1);
1719
912k
    if (ctxt->inputNr >= ctxt->inputMax) {
1720
0
        ctxt->inputMax *= 2;
1721
0
        ctxt->inputTab =
1722
0
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1723
0
                                             ctxt->inputMax *
1724
0
                                             sizeof(ctxt->inputTab[0]));
1725
0
        if (ctxt->inputTab == NULL) {
1726
0
            xmlErrMemory(ctxt, NULL);
1727
0
      xmlFreeInputStream(value);
1728
0
      ctxt->inputMax /= 2;
1729
0
      value = NULL;
1730
0
            return (-1);
1731
0
        }
1732
0
    }
1733
912k
    ctxt->inputTab[ctxt->inputNr] = value;
1734
912k
    ctxt->input = value;
1735
912k
    return (ctxt->inputNr++);
1736
912k
}
1737
/**
1738
 * inputPop:
1739
 * @ctxt: an XML parser context
1740
 *
1741
 * Pops the top parser input from the input stack
1742
 *
1743
 * Returns the input just removed
1744
 */
1745
xmlParserInputPtr
1746
inputPop(xmlParserCtxtPtr ctxt)
1747
1.78M
{
1748
1.78M
    xmlParserInputPtr ret;
1749
1750
1.78M
    if (ctxt == NULL)
1751
0
        return(NULL);
1752
1.78M
    if (ctxt->inputNr <= 0)
1753
871k
        return (NULL);
1754
912k
    ctxt->inputNr--;
1755
912k
    if (ctxt->inputNr > 0)
1756
476k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1757
435k
    else
1758
435k
        ctxt->input = NULL;
1759
912k
    ret = ctxt->inputTab[ctxt->inputNr];
1760
912k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1761
912k
    return (ret);
1762
1.78M
}
1763
/**
1764
 * nodePush:
1765
 * @ctxt:  an XML parser context
1766
 * @value:  the element node
1767
 *
1768
 * Pushes a new element node on top of the node stack
1769
 *
1770
 * Returns -1 in case of error, the index in the stack otherwise
1771
 */
1772
int
1773
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1774
19.1M
{
1775
19.1M
    if (ctxt == NULL) return(0);
1776
19.1M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1777
82.9k
        xmlNodePtr *tmp;
1778
1779
82.9k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1780
82.9k
                                      ctxt->nodeMax * 2 *
1781
82.9k
                                      sizeof(ctxt->nodeTab[0]));
1782
82.9k
        if (tmp == NULL) {
1783
0
            xmlErrMemory(ctxt, NULL);
1784
0
            return (-1);
1785
0
        }
1786
82.9k
        ctxt->nodeTab = tmp;
1787
82.9k
  ctxt->nodeMax *= 2;
1788
82.9k
    }
1789
19.1M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1790
480
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1791
480
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1792
480
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1793
480
        xmlParserMaxDepth);
1794
480
  xmlHaltParser(ctxt);
1795
480
  return(-1);
1796
480
    }
1797
19.1M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1798
19.1M
    ctxt->node = value;
1799
19.1M
    return (ctxt->nodeNr++);
1800
19.1M
}
1801
1802
/**
1803
 * nodePop:
1804
 * @ctxt: an XML parser context
1805
 *
1806
 * Pops the top element node from the node stack
1807
 *
1808
 * Returns the node just removed
1809
 */
1810
xmlNodePtr
1811
nodePop(xmlParserCtxtPtr ctxt)
1812
18.5M
{
1813
18.5M
    xmlNodePtr ret;
1814
1815
18.5M
    if (ctxt == NULL) return(NULL);
1816
18.5M
    if (ctxt->nodeNr <= 0)
1817
972k
        return (NULL);
1818
17.5M
    ctxt->nodeNr--;
1819
17.5M
    if (ctxt->nodeNr > 0)
1820
15.8M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1821
1.76M
    else
1822
1.76M
        ctxt->node = NULL;
1823
17.5M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1824
17.5M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1825
17.5M
    return (ret);
1826
18.5M
}
1827
1828
#ifdef LIBXML_PUSH_ENABLED
1829
/**
1830
 * nameNsPush:
1831
 * @ctxt:  an XML parser context
1832
 * @value:  the element name
1833
 * @prefix:  the element prefix
1834
 * @URI:  the element namespace name
1835
 *
1836
 * Pushes a new element name/prefix/URL on top of the name stack
1837
 *
1838
 * Returns -1 in case of error, the index in the stack otherwise
1839
 */
1840
static int
1841
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1842
           const xmlChar *prefix, const xmlChar *URI, int nsNr)
1843
13.7M
{
1844
13.7M
    if (ctxt->nameNr >= ctxt->nameMax) {
1845
101k
        const xmlChar * *tmp;
1846
101k
        void **tmp2;
1847
101k
        ctxt->nameMax *= 2;
1848
101k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1849
101k
                                    ctxt->nameMax *
1850
101k
                                    sizeof(ctxt->nameTab[0]));
1851
101k
        if (tmp == NULL) {
1852
0
      ctxt->nameMax /= 2;
1853
0
      goto mem_error;
1854
0
        }
1855
101k
  ctxt->nameTab = tmp;
1856
101k
        tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1857
101k
                                    ctxt->nameMax * 3 *
1858
101k
                                    sizeof(ctxt->pushTab[0]));
1859
101k
        if (tmp2 == NULL) {
1860
0
      ctxt->nameMax /= 2;
1861
0
      goto mem_error;
1862
0
        }
1863
101k
  ctxt->pushTab = tmp2;
1864
101k
    }
1865
13.7M
    ctxt->nameTab[ctxt->nameNr] = value;
1866
13.7M
    ctxt->name = value;
1867
13.7M
    ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1868
13.7M
    ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1869
13.7M
    ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1870
13.7M
    return (ctxt->nameNr++);
1871
0
mem_error:
1872
0
    xmlErrMemory(ctxt, NULL);
1873
0
    return (-1);
1874
13.7M
}
1875
/**
1876
 * nameNsPop:
1877
 * @ctxt: an XML parser context
1878
 *
1879
 * Pops the top element/prefix/URI name from the name stack
1880
 *
1881
 * Returns the name just removed
1882
 */
1883
static const xmlChar *
1884
nameNsPop(xmlParserCtxtPtr ctxt)
1885
7.69M
{
1886
7.69M
    const xmlChar *ret;
1887
1888
7.69M
    if (ctxt->nameNr <= 0)
1889
0
        return (NULL);
1890
7.69M
    ctxt->nameNr--;
1891
7.69M
    if (ctxt->nameNr > 0)
1892
7.65M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1893
41.9k
    else
1894
41.9k
        ctxt->name = NULL;
1895
7.69M
    ret = ctxt->nameTab[ctxt->nameNr];
1896
7.69M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1897
7.69M
    return (ret);
1898
7.69M
}
1899
#endif /* LIBXML_PUSH_ENABLED */
1900
1901
/**
1902
 * namePush:
1903
 * @ctxt:  an XML parser context
1904
 * @value:  the element name
1905
 *
1906
 * Pushes a new element name on top of the name stack
1907
 *
1908
 * Returns -1 in case of error, the index in the stack otherwise
1909
 */
1910
int
1911
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1912
1.98M
{
1913
1.98M
    if (ctxt == NULL) return (-1);
1914
1915
1.98M
    if (ctxt->nameNr >= ctxt->nameMax) {
1916
42.5k
        const xmlChar * *tmp;
1917
42.5k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1918
42.5k
                                    ctxt->nameMax * 2 *
1919
42.5k
                                    sizeof(ctxt->nameTab[0]));
1920
42.5k
        if (tmp == NULL) {
1921
0
      goto mem_error;
1922
0
        }
1923
42.5k
  ctxt->nameTab = tmp;
1924
42.5k
        ctxt->nameMax *= 2;
1925
42.5k
    }
1926
1.98M
    ctxt->nameTab[ctxt->nameNr] = value;
1927
1.98M
    ctxt->name = value;
1928
1.98M
    return (ctxt->nameNr++);
1929
0
mem_error:
1930
0
    xmlErrMemory(ctxt, NULL);
1931
0
    return (-1);
1932
1.98M
}
1933
/**
1934
 * namePop:
1935
 * @ctxt: an XML parser context
1936
 *
1937
 * Pops the top element name from the name stack
1938
 *
1939
 * Returns the name just removed
1940
 */
1941
const xmlChar *
1942
namePop(xmlParserCtxtPtr ctxt)
1943
1.80M
{
1944
1.80M
    const xmlChar *ret;
1945
1946
1.80M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1947
0
        return (NULL);
1948
1.80M
    ctxt->nameNr--;
1949
1.80M
    if (ctxt->nameNr > 0)
1950
1.49M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1951
310k
    else
1952
310k
        ctxt->name = NULL;
1953
1.80M
    ret = ctxt->nameTab[ctxt->nameNr];
1954
1.80M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1955
1.80M
    return (ret);
1956
1.80M
}
1957
1958
20.4M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1959
20.4M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1960
104k
        int *tmp;
1961
1962
104k
  ctxt->spaceMax *= 2;
1963
104k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1964
104k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1965
104k
        if (tmp == NULL) {
1966
0
      xmlErrMemory(ctxt, NULL);
1967
0
      ctxt->spaceMax /=2;
1968
0
      return(-1);
1969
0
  }
1970
104k
  ctxt->spaceTab = tmp;
1971
104k
    }
1972
20.4M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1973
20.4M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1974
20.4M
    return(ctxt->spaceNr++);
1975
20.4M
}
1976
1977
19.0M
static int spacePop(xmlParserCtxtPtr ctxt) {
1978
19.0M
    int ret;
1979
19.0M
    if (ctxt->spaceNr <= 0) return(0);
1980
18.8M
    ctxt->spaceNr--;
1981
18.8M
    if (ctxt->spaceNr > 0)
1982
18.3M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1983
503k
    else
1984
503k
        ctxt->space = &ctxt->spaceTab[0];
1985
18.8M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1986
18.8M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1987
18.8M
    return(ret);
1988
19.0M
}
1989
1990
/*
1991
 * Macros for accessing the content. Those should be used only by the parser,
1992
 * and not exported.
1993
 *
1994
 * Dirty macros, i.e. one often need to make assumption on the context to
1995
 * use them
1996
 *
1997
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1998
 *           To be used with extreme caution since operations consuming
1999
 *           characters may move the input buffer to a different location !
2000
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2001
 *           This should be used internally by the parser
2002
 *           only to compare to ASCII values otherwise it would break when
2003
 *           running with UTF-8 encoding.
2004
 *   RAW     same as CUR but in the input buffer, bypass any token
2005
 *           extraction that may have been done
2006
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2007
 *           to compare on ASCII based substring.
2008
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2009
 *           strings without newlines within the parser.
2010
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2011
 *           defined char within the parser.
2012
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2013
 *
2014
 *   NEXT    Skip to the next character, this does the proper decoding
2015
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2016
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2017
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2018
 *           to the number of xmlChars used for the encoding [0-5].
2019
 *   CUR_SCHAR  same but operate on a string instead of the context
2020
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2021
 *            the index
2022
 *   GROW, SHRINK  handling of input buffers
2023
 */
2024
2025
204M
#define RAW (*ctxt->input->cur)
2026
67.3M
#define CUR (*ctxt->input->cur)
2027
1.56G
#define NXT(val) ctxt->input->cur[(val)]
2028
144M
#define CUR_PTR ctxt->input->cur
2029
7.24M
#define BASE_PTR ctxt->input->base
2030
2031
#define CMP4( s, c1, c2, c3, c4 ) \
2032
17.8M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2033
8.91M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2034
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2035
17.0M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2036
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2037
15.3M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2038
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2039
13.7M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2040
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2041
12.0M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2042
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2043
5.61M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2044
5.61M
    ((unsigned char *) s)[ 8 ] == c9 )
2045
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2046
113k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2047
113k
    ((unsigned char *) s)[ 9 ] == c10 )
2048
2049
17.6M
#define SKIP(val) do {             \
2050
17.6M
    ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);     \
2051
17.6M
    if (*ctxt->input->cur == 0)           \
2052
17.6M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2053
17.6M
  } while (0)
2054
2055
749k
#define SKIPL(val) do {             \
2056
749k
    int skipl;                \
2057
221M
    for(skipl=0; skipl<val; skipl++) {         \
2058
221M
  if (*(ctxt->input->cur) == '\n') {       \
2059
718k
  ctxt->input->line++; ctxt->input->col = 1;      \
2060
220M
  } else ctxt->input->col++;         \
2061
221M
  ctxt->nbChars++;            \
2062
221M
  ctxt->input->cur++;           \
2063
221M
    }                  \
2064
749k
    if (*ctxt->input->cur == 0)           \
2065
749k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2066
749k
  } while (0)
2067
2068
97.8M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2069
97.8M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2070
97.8M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2071
259k
  xmlSHRINK (ctxt);
2072
2073
282k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2074
282k
    xmlParserInputShrink(ctxt->input);
2075
282k
    if (*ctxt->input->cur == 0)
2076
31.3k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2077
282k
}
2078
2079
3.54G
#define GROW if ((ctxt->progressive == 0) &&       \
2080
3.54G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2081
3.54G
  xmlGROW (ctxt);
2082
2083
21.0M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2084
21.0M
    unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2085
21.0M
    unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2086
2087
21.0M
    if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2088
21.0M
         (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2089
45
         ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2090
30
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2091
30
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2092
30
        xmlHaltParser(ctxt);
2093
30
  return;
2094
30
    }
2095
21.0M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2096
21.0M
    if ((ctxt->input->cur > ctxt->input->end) ||
2097
21.0M
        (ctxt->input->cur < ctxt->input->base)) {
2098
0
        xmlHaltParser(ctxt);
2099
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2100
0
  return;
2101
0
    }
2102
21.0M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2103
2.15M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2104
21.0M
}
2105
2106
64.8M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2107
2108
51.6M
#define NEXT xmlNextChar(ctxt)
2109
2110
28.5M
#define NEXT1 {               \
2111
28.5M
  ctxt->input->col++;           \
2112
28.5M
  ctxt->input->cur++;           \
2113
28.5M
  ctxt->nbChars++;            \
2114
28.5M
  if (*ctxt->input->cur == 0)         \
2115
28.5M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2116
28.5M
    }
2117
2118
5.75G
#define NEXTL(l) do {             \
2119
5.75G
    if (*(ctxt->input->cur) == '\n') {         \
2120
73.3M
  ctxt->input->line++; ctxt->input->col = 1;      \
2121
5.68G
    } else ctxt->input->col++;           \
2122
5.75G
    ctxt->input->cur += l;        \
2123
5.75G
  } while (0)
2124
2125
5.77G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2126
2.34G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2127
2128
#define COPY_BUF(l,b,i,v)           \
2129
7.74G
    if (l == 1) b[i++] = (xmlChar) v;         \
2130
7.74G
    else i += xmlCopyCharMultiByte(&b[i],v)
2131
2132
/**
2133
 * xmlSkipBlankChars:
2134
 * @ctxt:  the XML parser context
2135
 *
2136
 * skip all blanks character found at that point in the input streams.
2137
 * It pops up finished entities in the process if allowable at that point.
2138
 *
2139
 * Returns the number of space chars skipped
2140
 */
2141
2142
int
2143
64.8M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2144
64.8M
    int res = 0;
2145
2146
    /*
2147
     * It's Okay to use CUR/NEXT here since all the blanks are on
2148
     * the ASCII range.
2149
     */
2150
64.8M
    if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2151
57.5M
  const xmlChar *cur;
2152
  /*
2153
   * if we are in the document content, go really fast
2154
   */
2155
57.5M
  cur = ctxt->input->cur;
2156
79.8M
  while (IS_BLANK_CH(*cur)) {
2157
79.8M
      if (*cur == '\n') {
2158
6.30M
    ctxt->input->line++; ctxt->input->col = 1;
2159
73.5M
      } else {
2160
73.5M
    ctxt->input->col++;
2161
73.5M
      }
2162
79.8M
      cur++;
2163
79.8M
      res++;
2164
79.8M
      if (*cur == 0) {
2165
130k
    ctxt->input->cur = cur;
2166
130k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2167
130k
    cur = ctxt->input->cur;
2168
130k
      }
2169
79.8M
  }
2170
57.5M
  ctxt->input->cur = cur;
2171
57.5M
    } else {
2172
7.27M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2173
2174
12.5M
  while (1) {
2175
12.5M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2176
4.45M
    NEXT;
2177
8.05M
      } else if (CUR == '%') {
2178
                /*
2179
                 * Need to handle support of entities branching here
2180
                 */
2181
970k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2182
501k
                    break;
2183
468k
          xmlParsePEReference(ctxt);
2184
7.08M
            } else if (CUR == 0) {
2185
325k
                if (ctxt->inputNr <= 1)
2186
11.3k
                    break;
2187
313k
                xmlPopInput(ctxt);
2188
6.76M
            } else {
2189
6.76M
                break;
2190
6.76M
            }
2191
2192
            /*
2193
             * Also increase the counter when entering or exiting a PERef.
2194
             * The spec says: "When a parameter-entity reference is recognized
2195
             * in the DTD and included, its replacement text MUST be enlarged
2196
             * by the attachment of one leading and one following space (#x20)
2197
             * character."
2198
             */
2199
5.23M
      res++;
2200
5.23M
        }
2201
7.27M
    }
2202
64.8M
    return(res);
2203
64.8M
}
2204
2205
/************************************************************************
2206
 *                  *
2207
 *    Commodity functions to handle entities      *
2208
 *                  *
2209
 ************************************************************************/
2210
2211
/**
2212
 * xmlPopInput:
2213
 * @ctxt:  an XML parser context
2214
 *
2215
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2216
 *          pop it and return the next char.
2217
 *
2218
 * Returns the current xmlChar in the parser context
2219
 */
2220
xmlChar
2221
476k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2222
476k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2223
476k
    if (xmlParserDebugEntities)
2224
0
  xmlGenericError(xmlGenericErrorContext,
2225
0
    "Popping input %d\n", ctxt->inputNr);
2226
476k
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2227
0
        (ctxt->instate != XML_PARSER_EOF))
2228
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2229
0
                    "Unfinished entity outside the DTD");
2230
476k
    xmlFreeInputStream(inputPop(ctxt));
2231
476k
    if (*ctxt->input->cur == 0)
2232
147
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2233
476k
    return(CUR);
2234
476k
}
2235
2236
/**
2237
 * xmlPushInput:
2238
 * @ctxt:  an XML parser context
2239
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2240
 *
2241
 * xmlPushInput: switch to a new input stream which is stacked on top
2242
 *               of the previous one(s).
2243
 * Returns -1 in case of error or the index in the input stack
2244
 */
2245
int
2246
476k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2247
476k
    int ret;
2248
476k
    if (input == NULL) return(-1);
2249
2250
476k
    if (xmlParserDebugEntities) {
2251
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2252
0
      xmlGenericError(xmlGenericErrorContext,
2253
0
        "%s(%d): ", ctxt->input->filename,
2254
0
        ctxt->input->line);
2255
0
  xmlGenericError(xmlGenericErrorContext,
2256
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2257
0
    }
2258
476k
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2259
476k
        (ctxt->inputNr > 1024)) {
2260
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2261
0
        while (ctxt->inputNr > 1)
2262
0
            xmlFreeInputStream(inputPop(ctxt));
2263
0
  return(-1);
2264
0
    }
2265
476k
    ret = inputPush(ctxt, input);
2266
476k
    if (ctxt->instate == XML_PARSER_EOF)
2267
0
        return(-1);
2268
476k
    GROW;
2269
476k
    return(ret);
2270
476k
}
2271
2272
/**
2273
 * xmlParseCharRef:
2274
 * @ctxt:  an XML parser context
2275
 *
2276
 * parse Reference declarations
2277
 *
2278
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2279
 *                  '&#x' [0-9a-fA-F]+ ';'
2280
 *
2281
 * [ WFC: Legal Character ]
2282
 * Characters referred to using character references must match the
2283
 * production for Char.
2284
 *
2285
 * Returns the value parsed (as an int), 0 in case of error
2286
 */
2287
int
2288
867k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2289
867k
    unsigned int val = 0;
2290
867k
    int count = 0;
2291
867k
    unsigned int outofrange = 0;
2292
2293
    /*
2294
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2295
     */
2296
867k
    if ((RAW == '&') && (NXT(1) == '#') &&
2297
867k
        (NXT(2) == 'x')) {
2298
325k
  SKIP(3);
2299
325k
  GROW;
2300
1.38M
  while (RAW != ';') { /* loop blocked by count */
2301
1.16M
      if (count++ > 20) {
2302
23.0k
    count = 0;
2303
23.0k
    GROW;
2304
23.0k
                if (ctxt->instate == XML_PARSER_EOF)
2305
0
                    return(0);
2306
23.0k
      }
2307
1.16M
      if ((RAW >= '0') && (RAW <= '9'))
2308
574k
          val = val * 16 + (CUR - '0');
2309
592k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2310
383k
          val = val * 16 + (CUR - 'a') + 10;
2311
209k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2312
103k
          val = val * 16 + (CUR - 'A') + 10;
2313
106k
      else {
2314
106k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2315
106k
    val = 0;
2316
106k
    break;
2317
106k
      }
2318
1.06M
      if (val > 0x10FFFF)
2319
237k
          outofrange = val;
2320
2321
1.06M
      NEXT;
2322
1.06M
      count++;
2323
1.06M
  }
2324
325k
  if (RAW == ';') {
2325
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2326
218k
      ctxt->input->col++;
2327
218k
      ctxt->nbChars ++;
2328
218k
      ctxt->input->cur++;
2329
218k
  }
2330
542k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2331
542k
  SKIP(2);
2332
542k
  GROW;
2333
2.15M
  while (RAW != ';') { /* loop blocked by count */
2334
1.87M
      if (count++ > 20) {
2335
26.3k
    count = 0;
2336
26.3k
    GROW;
2337
26.3k
                if (ctxt->instate == XML_PARSER_EOF)
2338
0
                    return(0);
2339
26.3k
      }
2340
1.87M
      if ((RAW >= '0') && (RAW <= '9'))
2341
1.61M
          val = val * 10 + (CUR - '0');
2342
261k
      else {
2343
261k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2344
261k
    val = 0;
2345
261k
    break;
2346
261k
      }
2347
1.61M
      if (val > 0x10FFFF)
2348
329k
          outofrange = val;
2349
2350
1.61M
      NEXT;
2351
1.61M
      count++;
2352
1.61M
  }
2353
542k
  if (RAW == ';') {
2354
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2355
280k
      ctxt->input->col++;
2356
280k
      ctxt->nbChars ++;
2357
280k
      ctxt->input->cur++;
2358
280k
  }
2359
542k
    } else {
2360
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2361
0
    }
2362
2363
    /*
2364
     * [ WFC: Legal Character ]
2365
     * Characters referred to using character references must match the
2366
     * production for Char.
2367
     */
2368
867k
    if ((IS_CHAR(val) && (outofrange == 0))) {
2369
412k
        return(val);
2370
454k
    } else {
2371
454k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2372
454k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2373
454k
                    val);
2374
454k
    }
2375
454k
    return(0);
2376
867k
}
2377
2378
/**
2379
 * xmlParseStringCharRef:
2380
 * @ctxt:  an XML parser context
2381
 * @str:  a pointer to an index in the string
2382
 *
2383
 * parse Reference declarations, variant parsing from a string rather
2384
 * than an an input flow.
2385
 *
2386
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2387
 *                  '&#x' [0-9a-fA-F]+ ';'
2388
 *
2389
 * [ WFC: Legal Character ]
2390
 * Characters referred to using character references must match the
2391
 * production for Char.
2392
 *
2393
 * Returns the value parsed (as an int), 0 in case of error, str will be
2394
 *         updated to the current value of the index
2395
 */
2396
static int
2397
214k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2398
214k
    const xmlChar *ptr;
2399
214k
    xmlChar cur;
2400
214k
    unsigned int val = 0;
2401
214k
    unsigned int outofrange = 0;
2402
2403
214k
    if ((str == NULL) || (*str == NULL)) return(0);
2404
214k
    ptr = *str;
2405
214k
    cur = *ptr;
2406
214k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2407
70.6k
  ptr += 3;
2408
70.6k
  cur = *ptr;
2409
349k
  while (cur != ';') { /* Non input consuming loop */
2410
307k
      if ((cur >= '0') && (cur <= '9'))
2411
193k
          val = val * 16 + (cur - '0');
2412
113k
      else if ((cur >= 'a') && (cur <= 'f'))
2413
31.1k
          val = val * 16 + (cur - 'a') + 10;
2414
82.4k
      else if ((cur >= 'A') && (cur <= 'F'))
2415
54.0k
          val = val * 16 + (cur - 'A') + 10;
2416
28.4k
      else {
2417
28.4k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2418
28.4k
    val = 0;
2419
28.4k
    break;
2420
28.4k
      }
2421
279k
      if (val > 0x10FFFF)
2422
63.8k
          outofrange = val;
2423
2424
279k
      ptr++;
2425
279k
      cur = *ptr;
2426
279k
  }
2427
70.6k
  if (cur == ';')
2428
42.1k
      ptr++;
2429
143k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2430
143k
  ptr += 2;
2431
143k
  cur = *ptr;
2432
496k
  while (cur != ';') { /* Non input consuming loops */
2433
369k
      if ((cur >= '0') && (cur <= '9'))
2434
353k
          val = val * 10 + (cur - '0');
2435
16.0k
      else {
2436
16.0k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2437
16.0k
    val = 0;
2438
16.0k
    break;
2439
16.0k
      }
2440
353k
      if (val > 0x10FFFF)
2441
16.7k
          outofrange = val;
2442
2443
353k
      ptr++;
2444
353k
      cur = *ptr;
2445
353k
  }
2446
143k
  if (cur == ';')
2447
127k
      ptr++;
2448
143k
    } else {
2449
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2450
0
  return(0);
2451
0
    }
2452
214k
    *str = ptr;
2453
2454
    /*
2455
     * [ WFC: Legal Character ]
2456
     * Characters referred to using character references must match the
2457
     * production for Char.
2458
     */
2459
214k
    if ((IS_CHAR(val) && (outofrange == 0))) {
2460
150k
        return(val);
2461
150k
    } else {
2462
63.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2463
63.9k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2464
63.9k
        val);
2465
63.9k
    }
2466
63.9k
    return(0);
2467
214k
}
2468
2469
/**
2470
 * xmlParserHandlePEReference:
2471
 * @ctxt:  the parser context
2472
 *
2473
 * [69] PEReference ::= '%' Name ';'
2474
 *
2475
 * [ WFC: No Recursion ]
2476
 * A parsed entity must not contain a recursive
2477
 * reference to itself, either directly or indirectly.
2478
 *
2479
 * [ WFC: Entity Declared ]
2480
 * In a document without any DTD, a document with only an internal DTD
2481
 * subset which contains no parameter entity references, or a document
2482
 * with "standalone='yes'", ...  ... The declaration of a parameter
2483
 * entity must precede any reference to it...
2484
 *
2485
 * [ VC: Entity Declared ]
2486
 * In a document with an external subset or external parameter entities
2487
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2488
 * must precede any reference to it...
2489
 *
2490
 * [ WFC: In DTD ]
2491
 * Parameter-entity references may only appear in the DTD.
2492
 * NOTE: misleading but this is handled.
2493
 *
2494
 * A PEReference may have been detected in the current input stream
2495
 * the handling is done accordingly to
2496
 *      http://www.w3.org/TR/REC-xml#entproc
2497
 * i.e.
2498
 *   - Included in literal in entity values
2499
 *   - Included as Parameter Entity reference within DTDs
2500
 */
2501
void
2502
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2503
0
    switch(ctxt->instate) {
2504
0
  case XML_PARSER_CDATA_SECTION:
2505
0
      return;
2506
0
        case XML_PARSER_COMMENT:
2507
0
      return;
2508
0
  case XML_PARSER_START_TAG:
2509
0
      return;
2510
0
  case XML_PARSER_END_TAG:
2511
0
      return;
2512
0
        case XML_PARSER_EOF:
2513
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2514
0
      return;
2515
0
        case XML_PARSER_PROLOG:
2516
0
  case XML_PARSER_START:
2517
0
  case XML_PARSER_MISC:
2518
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2519
0
      return;
2520
0
  case XML_PARSER_ENTITY_DECL:
2521
0
        case XML_PARSER_CONTENT:
2522
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2523
0
        case XML_PARSER_PI:
2524
0
  case XML_PARSER_SYSTEM_LITERAL:
2525
0
  case XML_PARSER_PUBLIC_LITERAL:
2526
      /* we just ignore it there */
2527
0
      return;
2528
0
        case XML_PARSER_EPILOG:
2529
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2530
0
      return;
2531
0
  case XML_PARSER_ENTITY_VALUE:
2532
      /*
2533
       * NOTE: in the case of entity values, we don't do the
2534
       *       substitution here since we need the literal
2535
       *       entity value to be able to save the internal
2536
       *       subset of the document.
2537
       *       This will be handled by xmlStringDecodeEntities
2538
       */
2539
0
      return;
2540
0
        case XML_PARSER_DTD:
2541
      /*
2542
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2543
       * In the internal DTD subset, parameter-entity references
2544
       * can occur only where markup declarations can occur, not
2545
       * within markup declarations.
2546
       * In that case this is handled in xmlParseMarkupDecl
2547
       */
2548
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2549
0
    return;
2550
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2551
0
    return;
2552
0
            break;
2553
0
        case XML_PARSER_IGNORE:
2554
0
            return;
2555
0
    }
2556
2557
0
    xmlParsePEReference(ctxt);
2558
0
}
2559
2560
/*
2561
 * Macro used to grow the current buffer.
2562
 * buffer##_size is expected to be a size_t
2563
 * mem_error: is expected to handle memory allocation failures
2564
 */
2565
550k
#define growBuffer(buffer, n) {           \
2566
550k
    xmlChar *tmp;             \
2567
550k
    size_t new_size = buffer##_size * 2 + n;                            \
2568
550k
    if (new_size < buffer##_size) goto mem_error;                       \
2569
550k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2570
550k
    if (tmp == NULL) goto mem_error;         \
2571
550k
    buffer = tmp;             \
2572
550k
    buffer##_size = new_size;                                           \
2573
550k
}
2574
2575
/**
2576
 * xmlStringLenDecodeEntities:
2577
 * @ctxt:  the parser context
2578
 * @str:  the input string
2579
 * @len: the string length
2580
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2581
 * @end:  an end marker xmlChar, 0 if none
2582
 * @end2:  an end marker xmlChar, 0 if none
2583
 * @end3:  an end marker xmlChar, 0 if none
2584
 *
2585
 * Takes a entity string content and process to do the adequate substitutions.
2586
 *
2587
 * [67] Reference ::= EntityRef | CharRef
2588
 *
2589
 * [69] PEReference ::= '%' Name ';'
2590
 *
2591
 * Returns A newly allocated string with the substitution done. The caller
2592
 *      must deallocate it !
2593
 */
2594
xmlChar *
2595
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2596
837k
          int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2597
837k
    xmlChar *buffer = NULL;
2598
837k
    size_t buffer_size = 0;
2599
837k
    size_t nbchars = 0;
2600
2601
837k
    xmlChar *current = NULL;
2602
837k
    xmlChar *rep = NULL;
2603
837k
    const xmlChar *last;
2604
837k
    xmlEntityPtr ent;
2605
837k
    int c,l;
2606
2607
837k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2608
0
  return(NULL);
2609
837k
    last = str + len;
2610
2611
837k
    if (((ctxt->depth > 40) &&
2612
2.81k
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2613
834k
  (ctxt->depth > 1024)) {
2614
2.81k
  xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2615
2.81k
  return(NULL);
2616
2.81k
    }
2617
2618
    /*
2619
     * allocate a translation buffer.
2620
     */
2621
834k
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2622
834k
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2623
834k
    if (buffer == NULL) goto mem_error;
2624
2625
    /*
2626
     * OK loop until we reach one of the ending char or a size limit.
2627
     * we are operating on already parsed values.
2628
     */
2629
834k
    if (str < last)
2630
729k
  c = CUR_SCHAR(str, l);
2631
105k
    else
2632
105k
        c = 0;
2633
2.27G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2634
2.27G
     (c != end2) && (c != end3)) {
2635
2636
2.27G
  if (c == 0) break;
2637
2.27G
        if ((c == '&') && (str[1] == '#')) {
2638
214k
      int val = xmlParseStringCharRef(ctxt, &str);
2639
214k
      if (val == 0)
2640
63.9k
                goto int_error;
2641
150k
      COPY_BUF(0,buffer,nbchars,val);
2642
150k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2643
3.26k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2644
3.26k
      }
2645
2.27G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2646
1.16M
      if (xmlParserDebugEntities)
2647
0
    xmlGenericError(xmlGenericErrorContext,
2648
0
      "String decoding Entity Reference: %.30s\n",
2649
0
      str);
2650
1.16M
      ent = xmlParseStringEntityRef(ctxt, &str);
2651
1.16M
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2652
1.16M
      if (ent != NULL)
2653
754k
          ctxt->nbentities += ent->checked / 2;
2654
1.16M
      if ((ent != NULL) &&
2655
754k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2656
50.3k
    if (ent->content != NULL) {
2657
50.3k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2658
50.3k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2659
3.12k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2660
3.12k
        }
2661
50.3k
    } else {
2662
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2663
0
          "predefined entity has no content\n");
2664
0
                    goto int_error;
2665
0
    }
2666
1.11M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2667
657k
    ctxt->depth++;
2668
657k
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2669
657k
                            0, 0, 0);
2670
657k
    ctxt->depth--;
2671
657k
    if (rep == NULL)
2672
97.7k
                    goto int_error;
2673
2674
560k
                current = rep;
2675
844M
                while (*current != 0) { /* non input consuming loop */
2676
844M
                    buffer[nbchars++] = *current++;
2677
844M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
59.4k
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2679
905
                            goto int_error;
2680
175k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2681
175k
                    }
2682
844M
                }
2683
559k
                xmlFree(rep);
2684
559k
                rep = NULL;
2685
559k
      } else if (ent != NULL) {
2686
46.0k
    int i = xmlStrlen(ent->name);
2687
46.0k
    const xmlChar *cur = ent->name;
2688
2689
46.0k
    buffer[nbchars++] = '&';
2690
46.0k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2691
1.79k
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2692
1.79k
    }
2693
182k
    for (;i > 0;i--)
2694
135k
        buffer[nbchars++] = *cur++;
2695
46.0k
    buffer[nbchars++] = ';';
2696
46.0k
      }
2697
2.27G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2698
0
      if (xmlParserDebugEntities)
2699
0
    xmlGenericError(xmlGenericErrorContext,
2700
0
      "String decoding PE Reference: %.30s\n", str);
2701
0
      ent = xmlParseStringPEReference(ctxt, &str);
2702
0
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2703
0
      if (ent != NULL)
2704
0
          ctxt->nbentities += ent->checked / 2;
2705
0
      if (ent != NULL) {
2706
0
                if (ent->content == NULL) {
2707
        /*
2708
         * Note: external parsed entities will not be loaded,
2709
         * it is not required for a non-validating parser to
2710
         * complete external PEreferences coming from the
2711
         * internal subset
2712
         */
2713
0
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2714
0
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2715
0
      (ctxt->validate != 0)) {
2716
0
      xmlLoadEntityContent(ctxt, ent);
2717
0
        } else {
2718
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2719
0
      "not validating will not read content for PE entity %s\n",
2720
0
                          ent->name, NULL);
2721
0
        }
2722
0
    }
2723
0
    ctxt->depth++;
2724
0
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2725
0
                            0, 0, 0);
2726
0
    ctxt->depth--;
2727
0
    if (rep == NULL)
2728
0
                    goto int_error;
2729
0
                current = rep;
2730
0
                while (*current != 0) { /* non input consuming loop */
2731
0
                    buffer[nbchars++] = *current++;
2732
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2733
0
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2734
0
                            goto int_error;
2735
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2736
0
                    }
2737
0
                }
2738
0
                xmlFree(rep);
2739
0
                rep = NULL;
2740
0
      }
2741
2.27G
  } else {
2742
2.27G
      COPY_BUF(l,buffer,nbchars,c);
2743
2.27G
      str += l;
2744
2.27G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2745
318k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2746
318k
      }
2747
2.27G
  }
2748
2.27G
  if (str < last)
2749
2.27G
      c = CUR_SCHAR(str, l);
2750
566k
  else
2751
566k
      c = 0;
2752
2.27G
    }
2753
671k
    buffer[nbchars] = 0;
2754
671k
    return(buffer);
2755
2756
0
mem_error:
2757
0
    xmlErrMemory(ctxt, NULL);
2758
162k
int_error:
2759
162k
    if (rep != NULL)
2760
905
        xmlFree(rep);
2761
162k
    if (buffer != NULL)
2762
162k
        xmlFree(buffer);
2763
162k
    return(NULL);
2764
0
}
2765
2766
/**
2767
 * xmlStringDecodeEntities:
2768
 * @ctxt:  the parser context
2769
 * @str:  the input string
2770
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2771
 * @end:  an end marker xmlChar, 0 if none
2772
 * @end2:  an end marker xmlChar, 0 if none
2773
 * @end3:  an end marker xmlChar, 0 if none
2774
 *
2775
 * Takes a entity string content and process to do the adequate substitutions.
2776
 *
2777
 * [67] Reference ::= EntityRef | CharRef
2778
 *
2779
 * [69] PEReference ::= '%' Name ';'
2780
 *
2781
 * Returns A newly allocated string with the substitution done. The caller
2782
 *      must deallocate it !
2783
 */
2784
xmlChar *
2785
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2786
837k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2787
837k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2788
837k
    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2789
837k
           end, end2, end3));
2790
837k
}
2791
2792
/************************************************************************
2793
 *                  *
2794
 *    Commodity functions, cleanup needed ?     *
2795
 *                  *
2796
 ************************************************************************/
2797
2798
/**
2799
 * areBlanks:
2800
 * @ctxt:  an XML parser context
2801
 * @str:  a xmlChar *
2802
 * @len:  the size of @str
2803
 * @blank_chars: we know the chars are blanks
2804
 *
2805
 * Is this a sequence of blank chars that one can ignore ?
2806
 *
2807
 * Returns 1 if ignorable 0 otherwise.
2808
 */
2809
2810
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2811
15.7M
                     int blank_chars) {
2812
15.7M
    int i, ret;
2813
15.7M
    xmlNodePtr lastChild;
2814
2815
    /*
2816
     * Don't spend time trying to differentiate them, the same callback is
2817
     * used !
2818
     */
2819
15.7M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2820
0
  return(0);
2821
2822
    /*
2823
     * Check for xml:space value.
2824
     */
2825
15.7M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2826
15.3M
        (*(ctxt->space) == -2))
2827
10.6M
  return(0);
2828
2829
    /*
2830
     * Check that the string is made of blanks
2831
     */
2832
5.14M
    if (blank_chars == 0) {
2833
9.36M
  for (i = 0;i < len;i++)
2834
8.93M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2835
2.32M
    }
2836
2837
    /*
2838
     * Look if the element is mixed content in the DTD if available
2839
     */
2840
3.24M
    if (ctxt->node == NULL) return(0);
2841
3.13M
    if (ctxt->myDoc != NULL) {
2842
3.13M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2843
3.13M
        if (ret == 0) return(1);
2844
3.13M
        if (ret == 1) return(0);
2845
3.13M
    }
2846
2847
    /*
2848
     * Otherwise, heuristic :-\
2849
     */
2850
3.13M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2851
3.05M
    if ((ctxt->node->children == NULL) &&
2852
925k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2853
2854
2.97M
    lastChild = xmlGetLastChild(ctxt->node);
2855
2.97M
    if (lastChild == NULL) {
2856
846k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2857
0
            (ctxt->node->content != NULL)) return(0);
2858
2.13M
    } else if (xmlNodeIsText(lastChild))
2859
159k
        return(0);
2860
1.97M
    else if ((ctxt->node->children != NULL) &&
2861
1.97M
             (xmlNodeIsText(ctxt->node->children)))
2862
43.3k
        return(0);
2863
2.77M
    return(1);
2864
2.97M
}
2865
2866
/************************************************************************
2867
 *                  *
2868
 *    Extra stuff for namespace support     *
2869
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2870
 *                  *
2871
 ************************************************************************/
2872
2873
/**
2874
 * xmlSplitQName:
2875
 * @ctxt:  an XML parser context
2876
 * @name:  an XML parser context
2877
 * @prefix:  a xmlChar **
2878
 *
2879
 * parse an UTF8 encoded XML qualified name string
2880
 *
2881
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2882
 *
2883
 * [NS 6] Prefix ::= NCName
2884
 *
2885
 * [NS 7] LocalPart ::= NCName
2886
 *
2887
 * Returns the local part, and prefix is updated
2888
 *   to get the Prefix if any.
2889
 */
2890
2891
xmlChar *
2892
234k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2893
234k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2894
234k
    xmlChar *buffer = NULL;
2895
234k
    int len = 0;
2896
234k
    int max = XML_MAX_NAMELEN;
2897
234k
    xmlChar *ret = NULL;
2898
234k
    const xmlChar *cur = name;
2899
234k
    int c;
2900
2901
234k
    if (prefix == NULL) return(NULL);
2902
234k
    *prefix = NULL;
2903
2904
234k
    if (cur == NULL) return(NULL);
2905
2906
#ifndef XML_XML_NAMESPACE
2907
    /* xml: prefix is not really a namespace */
2908
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2909
        (cur[2] == 'l') && (cur[3] == ':'))
2910
  return(xmlStrdup(name));
2911
#endif
2912
2913
    /* nasty but well=formed */
2914
234k
    if (cur[0] == ':')
2915
16.5k
  return(xmlStrdup(name));
2916
2917
218k
    c = *cur++;
2918
5.10M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2919
4.88M
  buf[len++] = c;
2920
4.88M
  c = *cur++;
2921
4.88M
    }
2922
218k
    if (len >= max) {
2923
  /*
2924
   * Okay someone managed to make a huge name, so he's ready to pay
2925
   * for the processing speed.
2926
   */
2927
18.5k
  max = len * 2;
2928
2929
18.5k
  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2930
18.5k
  if (buffer == NULL) {
2931
0
      xmlErrMemory(ctxt, NULL);
2932
0
      return(NULL);
2933
0
  }
2934
18.5k
  memcpy(buffer, buf, len);
2935
3.27M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2936
3.25M
      if (len + 10 > max) {
2937
9.59k
          xmlChar *tmp;
2938
2939
9.59k
    max *= 2;
2940
9.59k
    tmp = (xmlChar *) xmlRealloc(buffer,
2941
9.59k
            max * sizeof(xmlChar));
2942
9.59k
    if (tmp == NULL) {
2943
0
        xmlFree(buffer);
2944
0
        xmlErrMemory(ctxt, NULL);
2945
0
        return(NULL);
2946
0
    }
2947
9.59k
    buffer = tmp;
2948
9.59k
      }
2949
3.25M
      buffer[len++] = c;
2950
3.25M
      c = *cur++;
2951
3.25M
  }
2952
18.5k
  buffer[len] = 0;
2953
18.5k
    }
2954
2955
218k
    if ((c == ':') && (*cur == 0)) {
2956
24.4k
        if (buffer != NULL)
2957
2.98k
      xmlFree(buffer);
2958
24.4k
  *prefix = NULL;
2959
24.4k
  return(xmlStrdup(name));
2960
24.4k
    }
2961
2962
193k
    if (buffer == NULL)
2963
177k
  ret = xmlStrndup(buf, len);
2964
15.6k
    else {
2965
15.6k
  ret = buffer;
2966
15.6k
  buffer = NULL;
2967
15.6k
  max = XML_MAX_NAMELEN;
2968
15.6k
    }
2969
2970
2971
193k
    if (c == ':') {
2972
59.7k
  c = *cur;
2973
59.7k
        *prefix = ret;
2974
59.7k
  if (c == 0) {
2975
0
      return(xmlStrndup(BAD_CAST "", 0));
2976
0
  }
2977
59.7k
  len = 0;
2978
2979
  /*
2980
   * Check that the first character is proper to start
2981
   * a new name
2982
   */
2983
59.7k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
2984
33.3k
        ((c >= 0x41) && (c <= 0x5A)) ||
2985
28.1k
        (c == '_') || (c == ':'))) {
2986
24.9k
      int l;
2987
24.9k
      int first = CUR_SCHAR(cur, l);
2988
2989
24.9k
      if (!IS_LETTER(first) && (first != '_')) {
2990
7.56k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2991
7.56k
          "Name %s is not XML Namespace compliant\n",
2992
7.56k
          name);
2993
7.56k
      }
2994
24.9k
  }
2995
59.7k
  cur++;
2996
2997
1.97M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2998
1.91M
      buf[len++] = c;
2999
1.91M
      c = *cur++;
3000
1.91M
  }
3001
59.7k
  if (len >= max) {
3002
      /*
3003
       * Okay someone managed to make a huge name, so he's ready to pay
3004
       * for the processing speed.
3005
       */
3006
9.06k
      max = len * 2;
3007
3008
9.06k
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3009
9.06k
      if (buffer == NULL) {
3010
0
          xmlErrMemory(ctxt, NULL);
3011
0
    return(NULL);
3012
0
      }
3013
9.06k
      memcpy(buffer, buf, len);
3014
2.51M
      while (c != 0) { /* tested bigname2.xml */
3015
2.50M
    if (len + 10 > max) {
3016
6.32k
        xmlChar *tmp;
3017
3018
6.32k
        max *= 2;
3019
6.32k
        tmp = (xmlChar *) xmlRealloc(buffer,
3020
6.32k
                max * sizeof(xmlChar));
3021
6.32k
        if (tmp == NULL) {
3022
0
      xmlErrMemory(ctxt, NULL);
3023
0
      xmlFree(buffer);
3024
0
      return(NULL);
3025
0
        }
3026
6.32k
        buffer = tmp;
3027
6.32k
    }
3028
2.50M
    buffer[len++] = c;
3029
2.50M
    c = *cur++;
3030
2.50M
      }
3031
9.06k
      buffer[len] = 0;
3032
9.06k
  }
3033
3034
59.7k
  if (buffer == NULL)
3035
50.6k
      ret = xmlStrndup(buf, len);
3036
9.06k
  else {
3037
9.06k
      ret = buffer;
3038
9.06k
  }
3039
59.7k
    }
3040
3041
193k
    return(ret);
3042
193k
}
3043
3044
/************************************************************************
3045
 *                  *
3046
 *      The parser itself       *
3047
 *  Relates to http://www.w3.org/TR/REC-xml       *
3048
 *                  *
3049
 ************************************************************************/
3050
3051
/************************************************************************
3052
 *                  *
3053
 *  Routines to parse Name, NCName and NmToken      *
3054
 *                  *
3055
 ************************************************************************/
3056
#ifdef DEBUG
3057
static unsigned long nbParseName = 0;
3058
static unsigned long nbParseNmToken = 0;
3059
static unsigned long nbParseNCName = 0;
3060
static unsigned long nbParseNCNameComplex = 0;
3061
static unsigned long nbParseNameComplex = 0;
3062
static unsigned long nbParseStringName = 0;
3063
#endif
3064
3065
/*
3066
 * The two following functions are related to the change of accepted
3067
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3068
 * They correspond to the modified production [4] and the new production [4a]
3069
 * changes in that revision. Also note that the macros used for the
3070
 * productions Letter, Digit, CombiningChar and Extender are not needed
3071
 * anymore.
3072
 * We still keep compatibility to pre-revision5 parsing semantic if the
3073
 * new XML_PARSE_OLD10 option is given to the parser.
3074
 */
3075
static int
3076
6.68M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3077
6.68M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3078
        /*
3079
   * Use the new checks of production [4] [4a] amd [5] of the
3080
   * Update 5 of XML-1.0
3081
   */
3082
6.68M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3083
6.67M
      (((c >= 'a') && (c <= 'z')) ||
3084
4.85M
       ((c >= 'A') && (c <= 'Z')) ||
3085
4.30M
       (c == '_') || (c == ':') ||
3086
3.89M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3087
3.74M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3088
3.64M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3089
3.46M
       ((c >= 0x370) && (c <= 0x37D)) ||
3090
3.46M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3091
3.29M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3092
3.28M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3093
3.28M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3094
3.28M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3095
3.27M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3096
3.27M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3097
3.26M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3098
3.40M
      return(1);
3099
6.68M
    } else {
3100
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3101
0
      return(1);
3102
0
    }
3103
3.27M
    return(0);
3104
6.68M
}
3105
3106
static int
3107
333M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3108
333M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3109
        /*
3110
   * Use the new checks of production [4] [4a] amd [5] of the
3111
   * Update 5 of XML-1.0
3112
   */
3113
333M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3114
333M
      (((c >= 'a') && (c <= 'z')) ||
3115
296M
       ((c >= 'A') && (c <= 'Z')) ||
3116
293M
       ((c >= '0') && (c <= '9')) || /* !start */
3117
291M
       (c == '_') || (c == ':') ||
3118
290M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3119
290M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3120
288M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3121
286M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3122
282M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3123
282M
       ((c >= 0x370) && (c <= 0x37D)) ||
3124
282M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3125
2.49M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3126
2.48M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3127
2.48M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3128
2.48M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3129
2.47M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3130
2.42M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3131
2.41M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3132
2.40M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3133
330M
       return(1);
3134
333M
    } else {
3135
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3136
0
            (c == '.') || (c == '-') ||
3137
0
      (c == '_') || (c == ':') ||
3138
0
      (IS_COMBINING(c)) ||
3139
0
      (IS_EXTENDER(c)))
3140
0
      return(1);
3141
0
    }
3142
2.47M
    return(0);
3143
333M
}
3144
3145
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3146
                                          int *len, int *alloc, int normalize);
3147
3148
static const xmlChar *
3149
3.35M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3150
3.35M
    int len = 0, l;
3151
3.35M
    int c;
3152
3.35M
    int count = 0;
3153
3154
#ifdef DEBUG
3155
    nbParseNameComplex++;
3156
#endif
3157
3158
    /*
3159
     * Handler for more complex cases
3160
     */
3161
3.35M
    GROW;
3162
3.35M
    if (ctxt->instate == XML_PARSER_EOF)
3163
2
        return(NULL);
3164
3.35M
    c = CUR_CHAR(l);
3165
3.35M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3166
        /*
3167
   * Use the new checks of production [4] [4a] amd [5] of the
3168
   * Update 5 of XML-1.0
3169
   */
3170
3.35M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3171
3.15M
      (!(((c >= 'a') && (c <= 'z')) ||
3172
2.69M
         ((c >= 'A') && (c <= 'Z')) ||
3173
2.03M
         (c == '_') || (c == ':') ||
3174
1.97M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3175
1.86M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3176
1.76M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3177
1.60M
         ((c >= 0x370) && (c <= 0x37D)) ||
3178
1.60M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179
1.53M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3180
1.53M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3181
1.53M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3182
1.52M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3183
1.52M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3184
1.51M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3185
1.71M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3186
1.71M
      return(NULL);
3187
1.71M
  }
3188
1.64M
  len += l;
3189
1.64M
  NEXTL(l);
3190
1.64M
  c = CUR_CHAR(l);
3191
134M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3192
134M
         (((c >= 'a') && (c <= 'z')) ||
3193
97.2M
          ((c >= 'A') && (c <= 'Z')) ||
3194
87.8M
          ((c >= '0') && (c <= '9')) || /* !start */
3195
86.8M
          (c == '_') || (c == ':') ||
3196
84.6M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3197
84.2M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3198
81.5M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3199
80.5M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3200
71.5M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3201
71.5M
          ((c >= 0x370) && (c <= 0x37D)) ||
3202
71.5M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3203
1.51M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3204
1.51M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3205
1.51M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3206
1.50M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3207
1.50M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3208
1.41M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3209
1.41M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3210
1.39M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3211
134M
    )) {
3212
132M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3213
1.02M
    count = 0;
3214
1.02M
    GROW;
3215
1.02M
                if (ctxt->instate == XML_PARSER_EOF)
3216
0
                    return(NULL);
3217
1.02M
      }
3218
132M
      len += l;
3219
132M
      NEXTL(l);
3220
132M
      c = CUR_CHAR(l);
3221
132M
  }
3222
1.64M
    } else {
3223
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3224
0
      (!IS_LETTER(c) && (c != '_') &&
3225
0
       (c != ':'))) {
3226
0
      return(NULL);
3227
0
  }
3228
0
  len += l;
3229
0
  NEXTL(l);
3230
0
  c = CUR_CHAR(l);
3231
3232
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3233
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3234
0
    (c == '.') || (c == '-') ||
3235
0
    (c == '_') || (c == ':') ||
3236
0
    (IS_COMBINING(c)) ||
3237
0
    (IS_EXTENDER(c)))) {
3238
0
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3239
0
    count = 0;
3240
0
    GROW;
3241
0
                if (ctxt->instate == XML_PARSER_EOF)
3242
0
                    return(NULL);
3243
0
      }
3244
0
      len += l;
3245
0
      NEXTL(l);
3246
0
      c = CUR_CHAR(l);
3247
0
  }
3248
0
    }
3249
1.64M
    if ((len > XML_MAX_NAME_LENGTH) &&
3250
2.23k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3251
2.23k
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3252
2.23k
        return(NULL);
3253
2.23k
    }
3254
1.64M
    if (ctxt->input->cur - ctxt->input->base < len) {
3255
        /*
3256
         * There were a couple of bugs where PERefs lead to to a change
3257
         * of the buffer. Check the buffer size to avoid passing an invalid
3258
         * pointer to xmlDictLookup.
3259
         */
3260
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3261
0
                    "unexpected change of input buffer");
3262
0
        return (NULL);
3263
0
    }
3264
1.64M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3265
5.14k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3266
1.63M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3267
1.64M
}
3268
3269
/**
3270
 * xmlParseName:
3271
 * @ctxt:  an XML parser context
3272
 *
3273
 * parse an XML name.
3274
 *
3275
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3276
 *                  CombiningChar | Extender
3277
 *
3278
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3279
 *
3280
 * [6] Names ::= Name (#x20 Name)*
3281
 *
3282
 * Returns the Name parsed or NULL
3283
 */
3284
3285
const xmlChar *
3286
9.14M
xmlParseName(xmlParserCtxtPtr ctxt) {
3287
9.14M
    const xmlChar *in;
3288
9.14M
    const xmlChar *ret;
3289
9.14M
    int count = 0;
3290
3291
9.14M
    GROW;
3292
3293
#ifdef DEBUG
3294
    nbParseName++;
3295
#endif
3296
3297
    /*
3298
     * Accelerator for simple ASCII names
3299
     */
3300
9.14M
    in = ctxt->input->cur;
3301
9.14M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3302
5.31M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3303
6.95M
  (*in == '_') || (*in == ':')) {
3304
6.95M
  in++;
3305
103M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3306
16.9M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3307
10.6M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3308
8.83M
         (*in == '_') || (*in == '-') ||
3309
7.99M
         (*in == ':') || (*in == '.'))
3310
96.0M
      in++;
3311
6.95M
  if ((*in > 0) && (*in < 0x80)) {
3312
5.78M
      count = in - ctxt->input->cur;
3313
5.78M
            if ((count > XML_MAX_NAME_LENGTH) &&
3314
876
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3315
876
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316
876
                return(NULL);
3317
876
            }
3318
5.78M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3319
5.78M
      ctxt->input->cur = in;
3320
5.78M
      ctxt->nbChars += count;
3321
5.78M
      ctxt->input->col += count;
3322
5.78M
      if (ret == NULL)
3323
0
          xmlErrMemory(ctxt, NULL);
3324
5.78M
      return(ret);
3325
5.78M
  }
3326
6.95M
    }
3327
    /* accelerator for special cases */
3328
3.35M
    return(xmlParseNameComplex(ctxt));
3329
9.14M
}
3330
3331
static const xmlChar *
3332
5.58M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3333
5.58M
    int len = 0, l;
3334
5.58M
    int c;
3335
5.58M
    int count = 0;
3336
5.58M
    size_t startPosition = 0;
3337
3338
#ifdef DEBUG
3339
    nbParseNCNameComplex++;
3340
#endif
3341
3342
    /*
3343
     * Handler for more complex cases
3344
     */
3345
5.58M
    GROW;
3346
5.58M
    startPosition = CUR_PTR - BASE_PTR;
3347
5.58M
    c = CUR_CHAR(l);
3348
5.58M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349
5.28M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3350
3.91M
  return(NULL);
3351
3.91M
    }
3352
3353
210M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3354
210M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3355
208M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3356
1.88M
            if ((len > XML_MAX_NAME_LENGTH) &&
3357
3.79k
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3358
3.79k
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3359
3.79k
                return(NULL);
3360
3.79k
            }
3361
1.88M
      count = 0;
3362
1.88M
      GROW;
3363
1.88M
            if (ctxt->instate == XML_PARSER_EOF)
3364
0
                return(NULL);
3365
1.88M
  }
3366
208M
  len += l;
3367
208M
  NEXTL(l);
3368
208M
  c = CUR_CHAR(l);
3369
208M
  if (c == 0) {
3370
27.6k
      count = 0;
3371
      /*
3372
       * when shrinking to extend the buffer we really need to preserve
3373
       * the part of the name we already parsed. Hence rolling back
3374
       * by current lenght.
3375
       */
3376
27.6k
      ctxt->input->cur -= l;
3377
27.6k
      GROW;
3378
27.6k
      ctxt->input->cur += l;
3379
27.6k
            if (ctxt->instate == XML_PARSER_EOF)
3380
0
                return(NULL);
3381
27.6k
      c = CUR_CHAR(l);
3382
27.6k
  }
3383
208M
    }
3384
1.66M
    if ((len > XML_MAX_NAME_LENGTH) &&
3385
2.00k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386
2.00k
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3387
2.00k
        return(NULL);
3388
2.00k
    }
3389
1.66M
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3390
1.66M
}
3391
3392
/**
3393
 * xmlParseNCName:
3394
 * @ctxt:  an XML parser context
3395
 * @len:  length of the string parsed
3396
 *
3397
 * parse an XML name.
3398
 *
3399
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3400
 *                      CombiningChar | Extender
3401
 *
3402
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3403
 *
3404
 * Returns the Name parsed or NULL
3405
 */
3406
3407
static const xmlChar *
3408
35.3M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3409
35.3M
    const xmlChar *in, *e;
3410
35.3M
    const xmlChar *ret;
3411
35.3M
    int count = 0;
3412
3413
#ifdef DEBUG
3414
    nbParseNCName++;
3415
#endif
3416
3417
    /*
3418
     * Accelerator for simple ASCII names
3419
     */
3420
35.3M
    in = ctxt->input->cur;
3421
35.3M
    e = ctxt->input->end;
3422
35.3M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3423
22.6M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3424
30.8M
   (*in == '_')) && (in < e)) {
3425
30.8M
  in++;
3426
178M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3427
47.6M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3428
35.3M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3429
32.2M
          (*in == '_') || (*in == '-') ||
3430
147M
          (*in == '.')) && (in < e))
3431
147M
      in++;
3432
30.8M
  if (in >= e)
3433
20.1k
      goto complex;
3434
30.8M
  if ((*in > 0) && (*in < 0x80)) {
3435
29.8M
      count = in - ctxt->input->cur;
3436
29.8M
            if ((count > XML_MAX_NAME_LENGTH) &&
3437
525
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3438
525
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3439
525
                return(NULL);
3440
525
            }
3441
29.8M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3442
29.8M
      ctxt->input->cur = in;
3443
29.8M
      ctxt->nbChars += count;
3444
29.8M
      ctxt->input->col += count;
3445
29.8M
      if (ret == NULL) {
3446
0
          xmlErrMemory(ctxt, NULL);
3447
0
      }
3448
29.8M
      return(ret);
3449
29.8M
  }
3450
30.8M
    }
3451
5.58M
complex:
3452
5.58M
    return(xmlParseNCNameComplex(ctxt));
3453
35.3M
}
3454
3455
/**
3456
 * xmlParseNameAndCompare:
3457
 * @ctxt:  an XML parser context
3458
 *
3459
 * parse an XML name and compares for match
3460
 * (specialized for endtag parsing)
3461
 *
3462
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3463
 * and the name for mismatch
3464
 */
3465
3466
static const xmlChar *
3467
7.50M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3468
7.50M
    register const xmlChar *cmp = other;
3469
7.50M
    register const xmlChar *in;
3470
7.50M
    const xmlChar *ret;
3471
3472
7.50M
    GROW;
3473
7.50M
    if (ctxt->instate == XML_PARSER_EOF)
3474
0
        return(NULL);
3475
3476
7.50M
    in = ctxt->input->cur;
3477
42.1M
    while (*in != 0 && *in == *cmp) {
3478
34.6M
  ++in;
3479
34.6M
  ++cmp;
3480
34.6M
  ctxt->input->col++;
3481
34.6M
    }
3482
7.50M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3483
  /* success */
3484
5.18M
  ctxt->input->cur = in;
3485
5.18M
  return (const xmlChar*) 1;
3486
5.18M
    }
3487
    /* failure (or end of input buffer), check with full function */
3488
2.32M
    ret = xmlParseName (ctxt);
3489
    /* strings coming from the dictionary direct compare possible */
3490
2.32M
    if (ret == other) {
3491
29.5k
  return (const xmlChar*) 1;
3492
29.5k
    }
3493
2.29M
    return ret;
3494
2.32M
}
3495
3496
/**
3497
 * xmlParseStringName:
3498
 * @ctxt:  an XML parser context
3499
 * @str:  a pointer to the string pointer (IN/OUT)
3500
 *
3501
 * parse an XML name.
3502
 *
3503
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3504
 *                  CombiningChar | Extender
3505
 *
3506
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3507
 *
3508
 * [6] Names ::= Name (#x20 Name)*
3509
 *
3510
 * Returns the Name parsed or NULL. The @str pointer
3511
 * is updated to the current location in the string.
3512
 */
3513
3514
static xmlChar *
3515
1.39M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3516
1.39M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3517
1.39M
    const xmlChar *cur = *str;
3518
1.39M
    int len = 0, l;
3519
1.39M
    int c;
3520
3521
#ifdef DEBUG
3522
    nbParseStringName++;
3523
#endif
3524
3525
1.39M
    c = CUR_SCHAR(cur, l);
3526
1.39M
    if (!xmlIsNameStartChar(ctxt, c)) {
3527
25.1k
  return(NULL);
3528
25.1k
    }
3529
3530
1.37M
    COPY_BUF(l,buf,len,c);
3531
1.37M
    cur += l;
3532
1.37M
    c = CUR_SCHAR(cur, l);
3533
8.11M
    while (xmlIsNameChar(ctxt, c)) {
3534
6.76M
  COPY_BUF(l,buf,len,c);
3535
6.76M
  cur += l;
3536
6.76M
  c = CUR_SCHAR(cur, l);
3537
6.76M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3538
      /*
3539
       * Okay someone managed to make a huge name, so he's ready to pay
3540
       * for the processing speed.
3541
       */
3542
14.1k
      xmlChar *buffer;
3543
14.1k
      int max = len * 2;
3544
3545
14.1k
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3546
14.1k
      if (buffer == NULL) {
3547
0
          xmlErrMemory(ctxt, NULL);
3548
0
    return(NULL);
3549
0
      }
3550
14.1k
      memcpy(buffer, buf, len);
3551
66.5M
      while (xmlIsNameChar(ctxt, c)) {
3552
66.5M
    if (len + 10 > max) {
3553
54.5k
        xmlChar *tmp;
3554
3555
54.5k
                    if ((len > XML_MAX_NAME_LENGTH) &&
3556
2.49k
                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3557
2.49k
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3558
2.49k
      xmlFree(buffer);
3559
2.49k
                        return(NULL);
3560
2.49k
                    }
3561
52.0k
        max *= 2;
3562
52.0k
        tmp = (xmlChar *) xmlRealloc(buffer,
3563
52.0k
                                  max * sizeof(xmlChar));
3564
52.0k
        if (tmp == NULL) {
3565
0
      xmlErrMemory(ctxt, NULL);
3566
0
      xmlFree(buffer);
3567
0
      return(NULL);
3568
0
        }
3569
52.0k
        buffer = tmp;
3570
52.0k
    }
3571
66.5M
    COPY_BUF(l,buffer,len,c);
3572
66.5M
    cur += l;
3573
66.5M
    c = CUR_SCHAR(cur, l);
3574
66.5M
      }
3575
11.6k
      buffer[len] = 0;
3576
11.6k
      *str = cur;
3577
11.6k
      return(buffer);
3578
14.1k
  }
3579
6.76M
    }
3580
1.35M
    if ((len > XML_MAX_NAME_LENGTH) &&
3581
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3582
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3583
0
        return(NULL);
3584
0
    }
3585
1.35M
    *str = cur;
3586
1.35M
    return(xmlStrndup(buf, len));
3587
1.35M
}
3588
3589
/**
3590
 * xmlParseNmtoken:
3591
 * @ctxt:  an XML parser context
3592
 *
3593
 * parse an XML Nmtoken.
3594
 *
3595
 * [7] Nmtoken ::= (NameChar)+
3596
 *
3597
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3598
 *
3599
 * Returns the Nmtoken parsed or NULL
3600
 */
3601
3602
xmlChar *
3603
251k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3604
251k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3605
251k
    int len = 0, l;
3606
251k
    int c;
3607
251k
    int count = 0;
3608
3609
#ifdef DEBUG
3610
    nbParseNmToken++;
3611
#endif
3612
3613
251k
    GROW;
3614
251k
    if (ctxt->instate == XML_PARSER_EOF)
3615
0
        return(NULL);
3616
251k
    c = CUR_CHAR(l);
3617
3618
1.53M
    while (xmlIsNameChar(ctxt, c)) {
3619
1.28M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3620
0
      count = 0;
3621
0
      GROW;
3622
0
  }
3623
1.28M
  COPY_BUF(l,buf,len,c);
3624
1.28M
  NEXTL(l);
3625
1.28M
  c = CUR_CHAR(l);
3626
1.28M
  if (c == 0) {
3627
1.23k
      count = 0;
3628
1.23k
      GROW;
3629
1.23k
      if (ctxt->instate == XML_PARSER_EOF)
3630
0
    return(NULL);
3631
1.23k
            c = CUR_CHAR(l);
3632
1.23k
  }
3633
1.28M
  if (len >= XML_MAX_NAMELEN) {
3634
      /*
3635
       * Okay someone managed to make a huge token, so he's ready to pay
3636
       * for the processing speed.
3637
       */
3638
9.13k
      xmlChar *buffer;
3639
9.13k
      int max = len * 2;
3640
3641
9.13k
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3642
9.13k
      if (buffer == NULL) {
3643
0
          xmlErrMemory(ctxt, NULL);
3644
0
    return(NULL);
3645
0
      }
3646
9.13k
      memcpy(buffer, buf, len);
3647
46.8M
      while (xmlIsNameChar(ctxt, c)) {
3648
46.8M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3649
459k
        count = 0;
3650
459k
        GROW;
3651
459k
                    if (ctxt->instate == XML_PARSER_EOF) {
3652
0
                        xmlFree(buffer);
3653
0
                        return(NULL);
3654
0
                    }
3655
459k
    }
3656
46.8M
    if (len + 10 > max) {
3657
27.4k
        xmlChar *tmp;
3658
3659
27.4k
                    if ((max > XML_MAX_NAME_LENGTH) &&
3660
1.83k
                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3661
1.83k
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3662
1.83k
                        xmlFree(buffer);
3663
1.83k
                        return(NULL);
3664
1.83k
                    }
3665
25.6k
        max *= 2;
3666
25.6k
        tmp = (xmlChar *) xmlRealloc(buffer,
3667
25.6k
                                  max * sizeof(xmlChar));
3668
25.6k
        if (tmp == NULL) {
3669
0
      xmlErrMemory(ctxt, NULL);
3670
0
      xmlFree(buffer);
3671
0
      return(NULL);
3672
0
        }
3673
25.6k
        buffer = tmp;
3674
25.6k
    }
3675
46.8M
    COPY_BUF(l,buffer,len,c);
3676
46.8M
    NEXTL(l);
3677
46.8M
    c = CUR_CHAR(l);
3678
46.8M
      }
3679
7.29k
      buffer[len] = 0;
3680
7.29k
      return(buffer);
3681
9.13k
  }
3682
1.28M
    }
3683
242k
    if (len == 0)
3684
98.1k
        return(NULL);
3685
144k
    if ((len > XML_MAX_NAME_LENGTH) &&
3686
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3687
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3688
0
        return(NULL);
3689
0
    }
3690
144k
    return(xmlStrndup(buf, len));
3691
144k
}
3692
3693
/**
3694
 * xmlParseEntityValue:
3695
 * @ctxt:  an XML parser context
3696
 * @orig:  if non-NULL store a copy of the original entity value
3697
 *
3698
 * parse a value for ENTITY declarations
3699
 *
3700
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3701
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3702
 *
3703
 * Returns the EntityValue parsed with reference substituted or NULL
3704
 */
3705
3706
xmlChar *
3707
175k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3708
175k
    xmlChar *buf = NULL;
3709
175k
    int len = 0;
3710
175k
    int size = XML_PARSER_BUFFER_SIZE;
3711
175k
    int c, l;
3712
175k
    xmlChar stop;
3713
175k
    xmlChar *ret = NULL;
3714
175k
    const xmlChar *cur = NULL;
3715
175k
    xmlParserInputPtr input;
3716
3717
175k
    if (RAW == '"') stop = '"';
3718
133k
    else if (RAW == '\'') stop = '\'';
3719
0
    else {
3720
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3721
0
  return(NULL);
3722
0
    }
3723
175k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3724
175k
    if (buf == NULL) {
3725
0
  xmlErrMemory(ctxt, NULL);
3726
0
  return(NULL);
3727
0
    }
3728
3729
    /*
3730
     * The content of the entity definition is copied in a buffer.
3731
     */
3732
3733
175k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3734
175k
    input = ctxt->input;
3735
175k
    GROW;
3736
175k
    if (ctxt->instate == XML_PARSER_EOF)
3737
0
        goto error;
3738
175k
    NEXT;
3739
175k
    c = CUR_CHAR(l);
3740
    /*
3741
     * NOTE: 4.4.5 Included in Literal
3742
     * When a parameter entity reference appears in a literal entity
3743
     * value, ... a single or double quote character in the replacement
3744
     * text is always treated as a normal data character and will not
3745
     * terminate the literal.
3746
     * In practice it means we stop the loop only when back at parsing
3747
     * the initial entity and the quote is found
3748
     */
3749
1.75G
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3750
1.75G
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3751
1.75G
  if (len + 5 >= size) {
3752
112k
      xmlChar *tmp;
3753
3754
112k
      size *= 2;
3755
112k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3756
112k
      if (tmp == NULL) {
3757
0
    xmlErrMemory(ctxt, NULL);
3758
0
                goto error;
3759
0
      }
3760
112k
      buf = tmp;
3761
112k
  }
3762
1.75G
  COPY_BUF(l,buf,len,c);
3763
1.75G
  NEXTL(l);
3764
3765
1.75G
  GROW;
3766
1.75G
  c = CUR_CHAR(l);
3767
1.75G
  if (c == 0) {
3768
305
      GROW;
3769
305
      c = CUR_CHAR(l);
3770
305
  }
3771
1.75G
    }
3772
175k
    buf[len] = 0;
3773
175k
    if (ctxt->instate == XML_PARSER_EOF)
3774
8
        goto error;
3775
175k
    if (c != stop) {
3776
491
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3777
491
        goto error;
3778
491
    }
3779
175k
    NEXT;
3780
3781
    /*
3782
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3783
     * reference constructs. Note Charref will be handled in
3784
     * xmlStringDecodeEntities()
3785
     */
3786
175k
    cur = buf;
3787
4.98G
    while (*cur != 0) { /* non input consuming */
3788
4.98G
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3789
234k
      xmlChar *name;
3790
234k
      xmlChar tmp = *cur;
3791
234k
            int nameOk = 0;
3792
3793
234k
      cur++;
3794
234k
      name = xmlParseStringName(ctxt, &cur);
3795
234k
            if (name != NULL) {
3796
224k
                nameOk = 1;
3797
224k
                xmlFree(name);
3798
224k
            }
3799
234k
            if ((nameOk == 0) || (*cur != ';')) {
3800
18.8k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3801
18.8k
      "EntityValue: '%c' forbidden except for entities references\n",
3802
18.8k
                            tmp);
3803
18.8k
                goto error;
3804
18.8k
      }
3805
216k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3806
1.66k
    (ctxt->inputNr == 1)) {
3807
1.66k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3808
1.66k
                goto error;
3809
1.66k
      }
3810
214k
      if (*cur == 0)
3811
0
          break;
3812
214k
  }
3813
4.98G
  cur++;
3814
4.98G
    }
3815
3816
    /*
3817
     * Then PEReference entities are substituted.
3818
     *
3819
     * NOTE: 4.4.7 Bypassed
3820
     * When a general entity reference appears in the EntityValue in
3821
     * an entity declaration, it is bypassed and left as is.
3822
     * so XML_SUBSTITUTE_REF is not set here.
3823
     */
3824
154k
    ++ctxt->depth;
3825
154k
    ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3826
154k
                                  0, 0, 0);
3827
154k
    --ctxt->depth;
3828
154k
    if (orig != NULL) {
3829
154k
        *orig = buf;
3830
154k
        buf = NULL;
3831
154k
    }
3832
3833
175k
error:
3834
175k
    if (buf != NULL)
3835
20.9k
        xmlFree(buf);
3836
175k
    return(ret);
3837
154k
}
3838
3839
/**
3840
 * xmlParseAttValueComplex:
3841
 * @ctxt:  an XML parser context
3842
 * @len:   the resulting attribute len
3843
 * @normalize:  wether to apply the inner normalization
3844
 *
3845
 * parse a value for an attribute, this is the fallback function
3846
 * of xmlParseAttValue() when the attribute parsing requires handling
3847
 * of non-ASCII characters, or normalization compaction.
3848
 *
3849
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3850
 */
3851
static xmlChar *
3852
1.28M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3853
1.28M
    xmlChar limit = 0;
3854
1.28M
    xmlChar *buf = NULL;
3855
1.28M
    xmlChar *rep = NULL;
3856
1.28M
    size_t len = 0;
3857
1.28M
    size_t buf_size = 0;
3858
1.28M
    int c, l, in_space = 0;
3859
1.28M
    xmlChar *current = NULL;
3860
1.28M
    xmlEntityPtr ent;
3861
3862
1.28M
    if (NXT(0) == '"') {
3863
777k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3864
777k
  limit = '"';
3865
777k
        NEXT;
3866
777k
    } else if (NXT(0) == '\'') {
3867
510k
  limit = '\'';
3868
510k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3869
510k
        NEXT;
3870
510k
    } else {
3871
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3872
0
  return(NULL);
3873
0
    }
3874
3875
    /*
3876
     * allocate a translation buffer.
3877
     */
3878
1.28M
    buf_size = XML_PARSER_BUFFER_SIZE;
3879
1.28M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3880
1.28M
    if (buf == NULL) goto mem_error;
3881
3882
    /*
3883
     * OK loop until we reach one of the ending char or a size limit.
3884
     */
3885
1.28M
    c = CUR_CHAR(l);
3886
1.52G
    while (((NXT(0) != limit) && /* checked */
3887
1.52G
            (IS_CHAR(c)) && (c != '<')) &&
3888
1.52G
            (ctxt->instate != XML_PARSER_EOF)) {
3889
        /*
3890
         * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3891
         * special option is given
3892
         */
3893
1.52G
        if ((len > XML_MAX_TEXT_LENGTH) &&
3894
8
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3895
8
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3896
8
                           "AttValue length too long\n");
3897
8
            goto mem_error;
3898
8
        }
3899
1.52G
  if (c == 0) break;
3900
1.52G
  if (c == '&') {
3901
1.76M
      in_space = 0;
3902
1.76M
      if (NXT(1) == '#') {
3903
453k
    int val = xmlParseCharRef(ctxt);
3904
3905
453k
    if (val == '&') {
3906
32.8k
        if (ctxt->replaceEntities) {
3907
0
      if (len + 10 > buf_size) {
3908
0
          growBuffer(buf, 10);
3909
0
      }
3910
0
      buf[len++] = '&';
3911
32.8k
        } else {
3912
      /*
3913
       * The reparsing will be done in xmlStringGetNodeList()
3914
       * called by the attribute() function in SAX.c
3915
       */
3916
32.8k
      if (len + 10 > buf_size) {
3917
4.91k
          growBuffer(buf, 10);
3918
4.91k
      }
3919
32.8k
      buf[len++] = '&';
3920
32.8k
      buf[len++] = '#';
3921
32.8k
      buf[len++] = '3';
3922
32.8k
      buf[len++] = '8';
3923
32.8k
      buf[len++] = ';';
3924
32.8k
        }
3925
420k
    } else if (val != 0) {
3926
234k
        if (len + 10 > buf_size) {
3927
4.62k
      growBuffer(buf, 10);
3928
4.62k
        }
3929
234k
        len += xmlCopyChar(0, &buf[len], val);
3930
234k
    }
3931
1.31M
      } else {
3932
1.31M
    ent = xmlParseEntityRef(ctxt);
3933
1.31M
    ctxt->nbentities++;
3934
1.31M
    if (ent != NULL)
3935
415k
        ctxt->nbentities += ent->owner;
3936
1.31M
    if ((ent != NULL) &&
3937
415k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3938
241k
        if (len + 10 > buf_size) {
3939
3.87k
      growBuffer(buf, 10);
3940
3.87k
        }
3941
241k
        if ((ctxt->replaceEntities == 0) &&
3942
241k
            (ent->content[0] == '&')) {
3943
146k
      buf[len++] = '&';
3944
146k
      buf[len++] = '#';
3945
146k
      buf[len++] = '3';
3946
146k
      buf[len++] = '8';
3947
146k
      buf[len++] = ';';
3948
146k
        } else {
3949
95.0k
      buf[len++] = ent->content[0];
3950
95.0k
        }
3951
1.07M
    } else if ((ent != NULL) &&
3952
174k
               (ctxt->replaceEntities != 0)) {
3953
0
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3954
0
      ++ctxt->depth;
3955
0
      rep = xmlStringDecodeEntities(ctxt, ent->content,
3956
0
                  XML_SUBSTITUTE_REF,
3957
0
                  0, 0, 0);
3958
0
      --ctxt->depth;
3959
0
      if (rep != NULL) {
3960
0
          current = rep;
3961
0
          while (*current != 0) { /* non input consuming */
3962
0
                                if ((*current == 0xD) || (*current == 0xA) ||
3963
0
                                    (*current == 0x9)) {
3964
0
                                    buf[len++] = 0x20;
3965
0
                                    current++;
3966
0
                                } else
3967
0
                                    buf[len++] = *current++;
3968
0
        if (len + 10 > buf_size) {
3969
0
            growBuffer(buf, 10);
3970
0
        }
3971
0
          }
3972
0
          xmlFree(rep);
3973
0
          rep = NULL;
3974
0
      }
3975
0
        } else {
3976
0
      if (len + 10 > buf_size) {
3977
0
          growBuffer(buf, 10);
3978
0
      }
3979
0
      if (ent->content != NULL)
3980
0
          buf[len++] = ent->content[0];
3981
0
        }
3982
1.07M
    } else if (ent != NULL) {
3983
174k
        int i = xmlStrlen(ent->name);
3984
174k
        const xmlChar *cur = ent->name;
3985
3986
        /*
3987
         * This may look absurd but is needed to detect
3988
         * entities problems
3989
         */
3990
174k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3991
174k
      (ent->content != NULL) && (ent->checked == 0)) {
3992
5.07k
      unsigned long oldnbent = ctxt->nbentities;
3993
3994
5.07k
      ++ctxt->depth;
3995
5.07k
      rep = xmlStringDecodeEntities(ctxt, ent->content,
3996
5.07k
              XML_SUBSTITUTE_REF, 0, 0, 0);
3997
5.07k
      --ctxt->depth;
3998
3999
5.07k
      ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4000
5.07k
      if (rep != NULL) {
4001
4.32k
          if (xmlStrchr(rep, '<'))
4002
1.57k
              ent->checked |= 1;
4003
4.32k
          xmlFree(rep);
4004
4.32k
          rep = NULL;
4005
4.32k
      } else {
4006
749
                            ent->content[0] = 0;
4007
749
                        }
4008
5.07k
        }
4009
4010
        /*
4011
         * Just output the reference
4012
         */
4013
174k
        buf[len++] = '&';
4014
178k
        while (len + i + 10 > buf_size) {
4015
8.51k
      growBuffer(buf, i + 10);
4016
8.51k
        }
4017
365k
        for (;i > 0;i--)
4018
190k
      buf[len++] = *cur++;
4019
174k
        buf[len++] = ';';
4020
174k
    }
4021
1.31M
      }
4022
1.52G
  } else {
4023
1.52G
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4024
28.5M
          if ((len != 0) || (!normalize)) {
4025
28.2M
        if ((!normalize) || (!in_space)) {
4026
27.5M
      COPY_BUF(l,buf,len,0x20);
4027
27.6M
      while (len + 10 > buf_size) {
4028
91.5k
          growBuffer(buf, 10);
4029
91.5k
      }
4030
27.5M
        }
4031
28.2M
        in_space = 1;
4032
28.2M
    }
4033
1.49G
      } else {
4034
1.49G
          in_space = 0;
4035
1.49G
    COPY_BUF(l,buf,len,c);
4036
1.49G
    if (len + 10 > buf_size) {
4037
542k
        growBuffer(buf, 10);
4038
542k
    }
4039
1.49G
      }
4040
1.52G
      NEXTL(l);
4041
1.52G
  }
4042
1.52G
  GROW;
4043
1.52G
  c = CUR_CHAR(l);
4044
1.52G
    }
4045
1.28M
    if (ctxt->instate == XML_PARSER_EOF)
4046
4
        goto error;
4047
4048
1.28M
    if ((in_space) && (normalize)) {
4049
37.3k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4050
14.6k
    }
4051
1.28M
    buf[len] = 0;
4052
1.28M
    if (RAW == '<') {
4053
638k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4054
649k
    } else if (RAW != limit) {
4055
189k
  if ((c != 0) && (!IS_CHAR(c))) {
4056
137k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4057
137k
         "invalid character in attribute value\n");
4058
137k
  } else {
4059
51.6k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4060
51.6k
         "AttValue: ' expected\n");
4061
51.6k
        }
4062
189k
    } else
4063
459k
  NEXT;
4064
4065
    /*
4066
     * There we potentially risk an overflow, don't allow attribute value of
4067
     * length more than INT_MAX it is a very reasonnable assumption !
4068
     */
4069
1.28M
    if (len >= INT_MAX) {
4070
0
        xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4071
0
                       "AttValue length too long\n");
4072
0
        goto mem_error;
4073
0
    }
4074
4075
1.28M
    if (attlen != NULL) *attlen = (int) len;
4076
1.28M
    return(buf);
4077
4078
8
mem_error:
4079
8
    xmlErrMemory(ctxt, NULL);
4080
12
error:
4081
12
    if (buf != NULL)
4082
12
        xmlFree(buf);
4083
12
    if (rep != NULL)
4084
0
        xmlFree(rep);
4085
12
    return(NULL);
4086
8
}
4087
4088
/**
4089
 * xmlParseAttValue:
4090
 * @ctxt:  an XML parser context
4091
 *
4092
 * parse a value for an attribute
4093
 * Note: the parser won't do substitution of entities here, this
4094
 * will be handled later in xmlStringGetNodeList
4095
 *
4096
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4097
 *                   "'" ([^<&'] | Reference)* "'"
4098
 *
4099
 * 3.3.3 Attribute-Value Normalization:
4100
 * Before the value of an attribute is passed to the application or
4101
 * checked for validity, the XML processor must normalize it as follows:
4102
 * - a character reference is processed by appending the referenced
4103
 *   character to the attribute value
4104
 * - an entity reference is processed by recursively processing the
4105
 *   replacement text of the entity
4106
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4107
 *   appending #x20 to the normalized value, except that only a single
4108
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4109
 *   parsed entity or the literal entity value of an internal parsed entity
4110
 * - other characters are processed by appending them to the normalized value
4111
 * If the declared value is not CDATA, then the XML processor must further
4112
 * process the normalized attribute value by discarding any leading and
4113
 * trailing space (#x20) characters, and by replacing sequences of space
4114
 * (#x20) characters by a single space (#x20) character.
4115
 * All attributes for which no declaration has been read should be treated
4116
 * by a non-validating parser as if declared CDATA.
4117
 *
4118
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4119
 */
4120
4121
4122
xmlChar *
4123
244k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4124
244k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4125
244k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4126
244k
}
4127
4128
/**
4129
 * xmlParseSystemLiteral:
4130
 * @ctxt:  an XML parser context
4131
 *
4132
 * parse an XML Literal
4133
 *
4134
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4135
 *
4136
 * Returns the SystemLiteral parsed or NULL
4137
 */
4138
4139
xmlChar *
4140
50.5k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4141
50.5k
    xmlChar *buf = NULL;
4142
50.5k
    int len = 0;
4143
50.5k
    int size = XML_PARSER_BUFFER_SIZE;
4144
50.5k
    int cur, l;
4145
50.5k
    xmlChar stop;
4146
50.5k
    int state = ctxt->instate;
4147
50.5k
    int count = 0;
4148
4149
50.5k
    SHRINK;
4150
50.5k
    if (RAW == '"') {
4151
26.3k
        NEXT;
4152
26.3k
  stop = '"';
4153
26.3k
    } else if (RAW == '\'') {
4154
16.3k
        NEXT;
4155
16.3k
  stop = '\'';
4156
16.3k
    } else {
4157
7.83k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4158
7.83k
  return(NULL);
4159
7.83k
    }
4160
4161
42.7k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4162
42.7k
    if (buf == NULL) {
4163
0
        xmlErrMemory(ctxt, NULL);
4164
0
  return(NULL);
4165
0
    }
4166
42.7k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4167
42.7k
    cur = CUR_CHAR(l);
4168
48.0M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4169
48.0M
  if (len + 5 >= size) {
4170
28.2k
      xmlChar *tmp;
4171
4172
28.2k
            if ((size > XML_MAX_NAME_LENGTH) &&
4173
634
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4174
634
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4175
634
                xmlFree(buf);
4176
634
    ctxt->instate = (xmlParserInputState) state;
4177
634
                return(NULL);
4178
634
            }
4179
27.6k
      size *= 2;
4180
27.6k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4181
27.6k
      if (tmp == NULL) {
4182
0
          xmlFree(buf);
4183
0
    xmlErrMemory(ctxt, NULL);
4184
0
    ctxt->instate = (xmlParserInputState) state;
4185
0
    return(NULL);
4186
0
      }
4187
27.6k
      buf = tmp;
4188
27.6k
  }
4189
48.0M
  count++;
4190
48.0M
  if (count > 50) {
4191
928k
      GROW;
4192
928k
      count = 0;
4193
928k
            if (ctxt->instate == XML_PARSER_EOF) {
4194
0
          xmlFree(buf);
4195
0
    return(NULL);
4196
0
            }
4197
928k
  }
4198
48.0M
  COPY_BUF(l,buf,len,cur);
4199
48.0M
  NEXTL(l);
4200
48.0M
  cur = CUR_CHAR(l);
4201
48.0M
  if (cur == 0) {
4202
3.73k
      GROW;
4203
3.73k
      SHRINK;
4204
3.73k
      cur = CUR_CHAR(l);
4205
3.73k
  }
4206
48.0M
    }
4207
42.0k
    buf[len] = 0;
4208
42.0k
    ctxt->instate = (xmlParserInputState) state;
4209
42.0k
    if (!IS_CHAR(cur)) {
4210
4.56k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4211
37.5k
    } else {
4212
37.5k
  NEXT;
4213
37.5k
    }
4214
42.0k
    return(buf);
4215
42.7k
}
4216
4217
/**
4218
 * xmlParsePubidLiteral:
4219
 * @ctxt:  an XML parser context
4220
 *
4221
 * parse an XML public literal
4222
 *
4223
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4224
 *
4225
 * Returns the PubidLiteral parsed or NULL.
4226
 */
4227
4228
xmlChar *
4229
47.3k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4230
47.3k
    xmlChar *buf = NULL;
4231
47.3k
    int len = 0;
4232
47.3k
    int size = XML_PARSER_BUFFER_SIZE;
4233
47.3k
    xmlChar cur;
4234
47.3k
    xmlChar stop;
4235
47.3k
    int count = 0;
4236
47.3k
    xmlParserInputState oldstate = ctxt->instate;
4237
4238
47.3k
    SHRINK;
4239
47.3k
    if (RAW == '"') {
4240
40.6k
        NEXT;
4241
40.6k
  stop = '"';
4242
40.6k
    } else if (RAW == '\'') {
4243
2.18k
        NEXT;
4244
2.18k
  stop = '\'';
4245
4.45k
    } else {
4246
4.45k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4247
4.45k
  return(NULL);
4248
4.45k
    }
4249
42.8k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4250
42.8k
    if (buf == NULL) {
4251
0
  xmlErrMemory(ctxt, NULL);
4252
0
  return(NULL);
4253
0
    }
4254
42.8k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4255
42.8k
    cur = CUR;
4256
3.25M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4257
3.20M
  if (len + 1 >= size) {
4258
10.0k
      xmlChar *tmp;
4259
4260
10.0k
            if ((size > XML_MAX_NAME_LENGTH) &&
4261
13
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4262
13
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4263
13
                xmlFree(buf);
4264
13
                return(NULL);
4265
13
            }
4266
10.0k
      size *= 2;
4267
10.0k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4268
10.0k
      if (tmp == NULL) {
4269
0
    xmlErrMemory(ctxt, NULL);
4270
0
    xmlFree(buf);
4271
0
    return(NULL);
4272
0
      }
4273
10.0k
      buf = tmp;
4274
10.0k
  }
4275
3.20M
  buf[len++] = cur;
4276
3.20M
  count++;
4277
3.20M
  if (count > 50) {
4278
53.6k
      GROW;
4279
53.6k
      count = 0;
4280
53.6k
            if (ctxt->instate == XML_PARSER_EOF) {
4281
0
    xmlFree(buf);
4282
0
    return(NULL);
4283
0
            }
4284
53.6k
  }
4285
3.20M
  NEXT;
4286
3.20M
  cur = CUR;
4287
3.20M
  if (cur == 0) {
4288
7.36k
      GROW;
4289
7.36k
      SHRINK;
4290
7.36k
      cur = CUR;
4291
7.36k
  }
4292
3.20M
    }
4293
42.8k
    buf[len] = 0;
4294
42.8k
    if (cur != stop) {
4295
19.6k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4296
23.1k
    } else {
4297
23.1k
  NEXT;
4298
23.1k
    }
4299
42.8k
    ctxt->instate = oldstate;
4300
42.8k
    return(buf);
4301
42.8k
}
4302
4303
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4304
4305
/*
4306
 * used for the test in the inner loop of the char data testing
4307
 */
4308
static const unsigned char test_char_data[256] = {
4309
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4310
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4311
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4312
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4313
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4314
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4315
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4316
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4317
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4318
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4319
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4320
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4321
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4322
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4323
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4324
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4325
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4326
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4336
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4337
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4338
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4339
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4340
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4341
};
4342
4343
/**
4344
 * xmlParseCharData:
4345
 * @ctxt:  an XML parser context
4346
 * @cdata:  int indicating whether we are within a CDATA section
4347
 *
4348
 * parse a CharData section.
4349
 * if we are within a CDATA section ']]>' marks an end of section.
4350
 *
4351
 * The right angle bracket (>) may be represented using the string "&gt;",
4352
 * and must, for compatibility, be escaped using "&gt;" or a character
4353
 * reference when it appears in the string "]]>" in content, when that
4354
 * string is not marking the end of a CDATA section.
4355
 *
4356
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4357
 */
4358
4359
void
4360
32.7M
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4361
32.7M
    const xmlChar *in;
4362
32.7M
    int nbchar = 0;
4363
32.7M
    int line = ctxt->input->line;
4364
32.7M
    int col = ctxt->input->col;
4365
32.7M
    int ccol;
4366
4367
32.7M
    SHRINK;
4368
32.7M
    GROW;
4369
    /*
4370
     * Accelerated common case where input don't need to be
4371
     * modified before passing it to the handler.
4372
     */
4373
32.7M
    if (!cdata) {
4374
32.7M
  in = ctxt->input->cur;
4375
36.2M
  do {
4376
38.6M
get_more_space:
4377
72.0M
      while (*in == 0x20) { in++; ctxt->input->col++; }
4378
38.6M
      if (*in == 0xA) {
4379
15.9M
    do {
4380
15.9M
        ctxt->input->line++; ctxt->input->col = 1;
4381
15.9M
        in++;
4382
15.9M
    } while (*in == 0xA);
4383
2.46M
    goto get_more_space;
4384
2.46M
      }
4385
36.2M
      if (*in == '<') {
4386
3.83M
    nbchar = in - ctxt->input->cur;
4387
3.83M
    if (nbchar > 0) {
4388
3.82M
        const xmlChar *tmp = ctxt->input->cur;
4389
3.82M
        ctxt->input->cur = in;
4390
4391
3.82M
        if ((ctxt->sax != NULL) &&
4392
3.82M
            (ctxt->sax->ignorableWhitespace !=
4393
3.82M
             ctxt->sax->characters)) {
4394
3.82M
      if (areBlanks(ctxt, tmp, nbchar, 1)) {
4395
2.51M
          if (ctxt->sax->ignorableWhitespace != NULL)
4396
2.51M
        ctxt->sax->ignorableWhitespace(ctxt->userData,
4397
2.51M
                   tmp, nbchar);
4398
2.51M
      } else {
4399
1.31M
          if (ctxt->sax->characters != NULL)
4400
1.31M
        ctxt->sax->characters(ctxt->userData,
4401
1.31M
                  tmp, nbchar);
4402
1.31M
          if (*ctxt->space == -1)
4403
298k
              *ctxt->space = -2;
4404
1.31M
      }
4405
3.82M
        } else if ((ctxt->sax != NULL) &&
4406
0
                   (ctxt->sax->characters != NULL)) {
4407
0
      ctxt->sax->characters(ctxt->userData,
4408
0
                tmp, nbchar);
4409
0
        }
4410
3.82M
    }
4411
3.83M
    return;
4412
3.83M
      }
4413
4414
34.0M
get_more:
4415
34.0M
            ccol = ctxt->input->col;
4416
265M
      while (test_char_data[*in]) {
4417
230M
    in++;
4418
230M
    ccol++;
4419
230M
      }
4420
34.0M
      ctxt->input->col = ccol;
4421
34.0M
      if (*in == 0xA) {
4422
10.0M
    do {
4423
10.0M
        ctxt->input->line++; ctxt->input->col = 1;
4424
10.0M
        in++;
4425
10.0M
    } while (*in == 0xA);
4426
1.09M
    goto get_more;
4427
1.09M
      }
4428
33.0M
      if (*in == ']') {
4429
624k
    if ((in[1] == ']') && (in[2] == '>')) {
4430
14.6k
        xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4431
14.6k
        ctxt->input->cur = in + 1;
4432
14.6k
        return;
4433
14.6k
    }
4434
609k
    in++;
4435
609k
    ctxt->input->col++;
4436
609k
    goto get_more;
4437
624k
      }
4438
32.3M
      nbchar = in - ctxt->input->cur;
4439
32.3M
      if (nbchar > 0) {
4440
14.3M
    if ((ctxt->sax != NULL) &&
4441
14.3M
        (ctxt->sax->ignorableWhitespace !=
4442
14.3M
         ctxt->sax->characters) &&
4443
14.3M
        (IS_BLANK_CH(*ctxt->input->cur))) {
4444
2.81M
        const xmlChar *tmp = ctxt->input->cur;
4445
2.81M
        ctxt->input->cur = in;
4446
4447
2.81M
        if (areBlanks(ctxt, tmp, nbchar, 0)) {
4448
208k
            if (ctxt->sax->ignorableWhitespace != NULL)
4449
208k
          ctxt->sax->ignorableWhitespace(ctxt->userData,
4450
208k
                 tmp, nbchar);
4451
2.60M
        } else {
4452
2.60M
            if (ctxt->sax->characters != NULL)
4453
2.60M
          ctxt->sax->characters(ctxt->userData,
4454
2.60M
              tmp, nbchar);
4455
2.60M
      if (*ctxt->space == -1)
4456
857k
          *ctxt->space = -2;
4457
2.60M
        }
4458
2.81M
                    line = ctxt->input->line;
4459
2.81M
                    col = ctxt->input->col;
4460
11.4M
    } else if (ctxt->sax != NULL) {
4461
11.4M
        if (ctxt->sax->characters != NULL)
4462
11.4M
      ctxt->sax->characters(ctxt->userData,
4463
11.4M
                ctxt->input->cur, nbchar);
4464
11.4M
                    line = ctxt->input->line;
4465
11.4M
                    col = ctxt->input->col;
4466
11.4M
    }
4467
                /* something really bad happened in the SAX callback */
4468
14.3M
                if (ctxt->instate != XML_PARSER_CONTENT)
4469
0
                    return;
4470
14.3M
      }
4471
32.3M
      ctxt->input->cur = in;
4472
32.3M
      if (*in == 0xD) {
4473
4.14M
    in++;
4474
4.14M
    if (*in == 0xA) {
4475
3.52M
        ctxt->input->cur = in;
4476
3.52M
        in++;
4477
3.52M
        ctxt->input->line++; ctxt->input->col = 1;
4478
3.52M
        continue; /* while */
4479
3.52M
    }
4480
615k
    in--;
4481
615k
      }
4482
28.8M
      if (*in == '<') {
4483
10.4M
    return;
4484
10.4M
      }
4485
18.3M
      if (*in == '&') {
4486
596k
    return;
4487
596k
      }
4488
17.7M
      SHRINK;
4489
17.7M
      GROW;
4490
17.7M
            if (ctxt->instate == XML_PARSER_EOF)
4491
0
    return;
4492
17.7M
      in = ctxt->input->cur;
4493
21.3M
  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4494
17.8M
  nbchar = 0;
4495
17.8M
    }
4496
17.8M
    ctxt->input->line = line;
4497
17.8M
    ctxt->input->col = col;
4498
17.8M
    xmlParseCharDataComplex(ctxt, cdata);
4499
17.8M
}
4500
4501
/**
4502
 * xmlParseCharDataComplex:
4503
 * @ctxt:  an XML parser context
4504
 * @cdata:  int indicating whether we are within a CDATA section
4505
 *
4506
 * parse a CharData section.this is the fallback function
4507
 * of xmlParseCharData() when the parsing requires handling
4508
 * of non-ASCII characters.
4509
 */
4510
static void
4511
17.8M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4512
17.8M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4513
17.8M
    int nbchar = 0;
4514
17.8M
    int cur, l;
4515
17.8M
    int count = 0;
4516
4517
17.8M
    SHRINK;
4518
17.8M
    GROW;
4519
17.8M
    cur = CUR_CHAR(l);
4520
1.14G
    while ((cur != '<') && /* checked */
4521
1.14G
           (cur != '&') &&
4522
1.14G
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4523
1.12G
  if ((cur == ']') && (NXT(1) == ']') &&
4524
93.4k
      (NXT(2) == '>')) {
4525
33.2k
      if (cdata) break;
4526
33.2k
      else {
4527
33.2k
    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4528
33.2k
      }
4529
33.2k
  }
4530
1.12G
  COPY_BUF(l,buf,nbchar,cur);
4531
1.12G
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4532
10.0M
      buf[nbchar] = 0;
4533
4534
      /*
4535
       * OK the segment is to be consumed as chars.
4536
       */
4537
10.0M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4538
5.96M
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4539
2.28k
        if (ctxt->sax->ignorableWhitespace != NULL)
4540
2.28k
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4541
2.28k
                                     buf, nbchar);
4542
5.95M
    } else {
4543
5.95M
        if (ctxt->sax->characters != NULL)
4544
5.95M
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4545
5.95M
        if ((ctxt->sax->characters !=
4546
5.95M
             ctxt->sax->ignorableWhitespace) &&
4547
5.95M
      (*ctxt->space == -1))
4548
29.1k
      *ctxt->space = -2;
4549
5.95M
    }
4550
5.96M
      }
4551
10.0M
      nbchar = 0;
4552
            /* something really bad happened in the SAX callback */
4553
10.0M
            if (ctxt->instate != XML_PARSER_CONTENT)
4554
17
                return;
4555
10.0M
  }
4556
1.12G
  count++;
4557
1.12G
  if (count > 50) {
4558
21.2M
      GROW;
4559
21.2M
      count = 0;
4560
21.2M
            if (ctxt->instate == XML_PARSER_EOF)
4561
2
    return;
4562
21.2M
  }
4563
1.12G
  NEXTL(l);
4564
1.12G
  cur = CUR_CHAR(l);
4565
1.12G
    }
4566
17.8M
    if (nbchar != 0) {
4567
3.67M
        buf[nbchar] = 0;
4568
  /*
4569
   * OK the segment is to be consumed as chars.
4570
   */
4571
3.67M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4572
3.18M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4573
46.7k
    if (ctxt->sax->ignorableWhitespace != NULL)
4574
46.7k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4575
3.13M
      } else {
4576
3.13M
    if (ctxt->sax->characters != NULL)
4577
3.13M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4578
3.13M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4579
3.13M
        (*ctxt->space == -1))
4580
1.17M
        *ctxt->space = -2;
4581
3.13M
      }
4582
3.18M
  }
4583
3.67M
    }
4584
17.8M
    if ((cur != 0) && (!IS_CHAR(cur))) {
4585
  /* Generate the error and skip the offending character */
4586
14.6M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4587
14.6M
                          "PCDATA invalid Char value %d\n",
4588
14.6M
                    cur);
4589
14.6M
  NEXTL(l);
4590
14.6M
    }
4591
17.8M
}
4592
4593
/**
4594
 * xmlParseExternalID:
4595
 * @ctxt:  an XML parser context
4596
 * @publicID:  a xmlChar** receiving PubidLiteral
4597
 * @strict: indicate whether we should restrict parsing to only
4598
 *          production [75], see NOTE below
4599
 *
4600
 * Parse an External ID or a Public ID
4601
 *
4602
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4603
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4604
 *
4605
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4606
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4607
 *
4608
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4609
 *
4610
 * Returns the function returns SystemLiteral and in the second
4611
 *                case publicID receives PubidLiteral, is strict is off
4612
 *                it is possible to return NULL and have publicID set.
4613
 */
4614
4615
xmlChar *
4616
153k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4617
153k
    xmlChar *URI = NULL;
4618
4619
153k
    SHRINK;
4620
4621
153k
    *publicID = NULL;
4622
153k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4623
24.2k
        SKIP(6);
4624
24.2k
  if (SKIP_BLANKS == 0) {
4625
6.26k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4626
6.26k
                     "Space required after 'SYSTEM'\n");
4627
6.26k
  }
4628
24.2k
  URI = xmlParseSystemLiteral(ctxt);
4629
24.2k
  if (URI == NULL) {
4630
1.59k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4631
1.59k
        }
4632
129k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4633
47.3k
        SKIP(6);
4634
47.3k
  if (SKIP_BLANKS == 0) {
4635
8.21k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4636
8.21k
        "Space required after 'PUBLIC'\n");
4637
8.21k
  }
4638
47.3k
  *publicID = xmlParsePubidLiteral(ctxt);
4639
47.3k
  if (*publicID == NULL) {
4640
4.47k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4641
4.47k
  }
4642
47.3k
  if (strict) {
4643
      /*
4644
       * We don't handle [83] so "S SystemLiteral" is required.
4645
       */
4646
22.1k
      if (SKIP_BLANKS == 0) {
4647
9.59k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4648
9.59k
      "Space required after the Public Identifier\n");
4649
9.59k
      }
4650
25.1k
  } else {
4651
      /*
4652
       * We handle [83] so we return immediately, if
4653
       * "S SystemLiteral" is not detected. We skip blanks if no
4654
             * system literal was found, but this is harmless since we must
4655
             * be at the end of a NotationDecl.
4656
       */
4657
25.1k
      if (SKIP_BLANKS == 0) return(NULL);
4658
11.6k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4659
11.6k
  }
4660
26.3k
  URI = xmlParseSystemLiteral(ctxt);
4661
26.3k
  if (URI == NULL) {
4662
6.87k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4663
6.87k
        }
4664
26.3k
    }
4665
132k
    return(URI);
4666
153k
}
4667
4668
/**
4669
 * xmlParseCommentComplex:
4670
 * @ctxt:  an XML parser context
4671
 * @buf:  the already parsed part of the buffer
4672
 * @len:  number of bytes filles in the buffer
4673
 * @size:  allocated size of the buffer
4674
 *
4675
 * Skip an XML (SGML) comment <!-- .... -->
4676
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4677
 *  must not occur within comments. "
4678
 * This is the slow routine in case the accelerator for ascii didn't work
4679
 *
4680
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4681
 */
4682
static void
4683
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4684
407k
                       size_t len, size_t size) {
4685
407k
    int q, ql;
4686
407k
    int r, rl;
4687
407k
    int cur, l;
4688
407k
    size_t count = 0;
4689
407k
    int inputid;
4690
4691
407k
    inputid = ctxt->input->id;
4692
4693
407k
    if (buf == NULL) {
4694
237k
        len = 0;
4695
237k
  size = XML_PARSER_BUFFER_SIZE;
4696
237k
  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4697
237k
  if (buf == NULL) {
4698
0
      xmlErrMemory(ctxt, NULL);
4699
0
      return;
4700
0
  }
4701
237k
    }
4702
407k
    GROW; /* Assure there's enough input data */
4703
407k
    q = CUR_CHAR(ql);
4704
407k
    if (q == 0)
4705
6.88k
        goto not_terminated;
4706
400k
    if (!IS_CHAR(q)) {
4707
14.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4708
14.2k
                          "xmlParseComment: invalid xmlChar value %d\n",
4709
14.2k
                    q);
4710
14.2k
  xmlFree (buf);
4711
14.2k
  return;
4712
14.2k
    }
4713
386k
    NEXTL(ql);
4714
386k
    r = CUR_CHAR(rl);
4715
386k
    if (r == 0)
4716
1.34k
        goto not_terminated;
4717
385k
    if (!IS_CHAR(r)) {
4718
9.07k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4719
9.07k
                          "xmlParseComment: invalid xmlChar value %d\n",
4720
9.07k
                    q);
4721
9.07k
  xmlFree (buf);
4722
9.07k
  return;
4723
9.07k
    }
4724
376k
    NEXTL(rl);
4725
376k
    cur = CUR_CHAR(l);
4726
376k
    if (cur == 0)
4727
1.22k
        goto not_terminated;
4728
257M
    while (IS_CHAR(cur) && /* checked */
4729
257M
           ((cur != '>') ||
4730
257M
      (r != '-') || (q != '-'))) {
4731
257M
  if ((r == '-') && (q == '-')) {
4732
176k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4733
176k
  }
4734
257M
        if ((len > XML_MAX_TEXT_LENGTH) &&
4735
6
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4736
6
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4737
6
                         "Comment too big found", NULL);
4738
6
            xmlFree (buf);
4739
6
            return;
4740
6
        }
4741
257M
  if (len + 5 >= size) {
4742
45.2k
      xmlChar *new_buf;
4743
45.2k
            size_t new_size;
4744
4745
45.2k
      new_size = size * 2;
4746
45.2k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4747
45.2k
      if (new_buf == NULL) {
4748
0
    xmlFree (buf);
4749
0
    xmlErrMemory(ctxt, NULL);
4750
0
    return;
4751
0
      }
4752
45.2k
      buf = new_buf;
4753
45.2k
            size = new_size;
4754
45.2k
  }
4755
257M
  COPY_BUF(ql,buf,len,q);
4756
257M
  q = r;
4757
257M
  ql = rl;
4758
257M
  r = cur;
4759
257M
  rl = l;
4760
4761
257M
  count++;
4762
257M
  if (count > 50) {
4763
5.00M
      GROW;
4764
5.00M
      count = 0;
4765
5.00M
            if (ctxt->instate == XML_PARSER_EOF) {
4766
1
    xmlFree(buf);
4767
1
    return;
4768
1
            }
4769
5.00M
  }
4770
257M
  NEXTL(l);
4771
257M
  cur = CUR_CHAR(l);
4772
257M
  if (cur == 0) {
4773
257k
      SHRINK;
4774
257k
      GROW;
4775
257k
      cur = CUR_CHAR(l);
4776
257k
  }
4777
257M
    }
4778
375k
    buf[len] = 0;
4779
375k
    if (cur == 0) {
4780
257k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4781
257k
                       "Comment not terminated \n<!--%.50s\n", buf);
4782
257k
    } else if (!IS_CHAR(cur)) {
4783
19.0k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4784
19.0k
                          "xmlParseComment: invalid xmlChar value %d\n",
4785
19.0k
                    cur);
4786
98.2k
    } else {
4787
98.2k
  if (inputid != ctxt->input->id) {
4788
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4789
0
               "Comment doesn't start and stop in the same"
4790
0
                           " entity\n");
4791
0
  }
4792
98.2k
        NEXT;
4793
98.2k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4794
98.2k
      (!ctxt->disableSAX))
4795
25.2k
      ctxt->sax->comment(ctxt->userData, buf);
4796
98.2k
    }
4797
375k
    xmlFree(buf);
4798
375k
    return;
4799
9.45k
not_terminated:
4800
9.45k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4801
9.45k
       "Comment not terminated\n", NULL);
4802
9.45k
    xmlFree(buf);
4803
9.45k
    return;
4804
375k
}
4805
4806
/**
4807
 * xmlParseComment:
4808
 * @ctxt:  an XML parser context
4809
 *
4810
 * Skip an XML (SGML) comment <!-- .... -->
4811
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4812
 *  must not occur within comments. "
4813
 *
4814
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4815
 */
4816
void
4817
595k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4818
595k
    xmlChar *buf = NULL;
4819
595k
    size_t size = XML_PARSER_BUFFER_SIZE;
4820
595k
    size_t len = 0;
4821
595k
    xmlParserInputState state;
4822
595k
    const xmlChar *in;
4823
595k
    size_t nbchar = 0;
4824
595k
    int ccol;
4825
595k
    int inputid;
4826
4827
    /*
4828
     * Check that there is a comment right here.
4829
     */
4830
595k
    if ((RAW != '<') || (NXT(1) != '!') ||
4831
595k
        (NXT(2) != '-') || (NXT(3) != '-')) return;
4832
593k
    state = ctxt->instate;
4833
593k
    ctxt->instate = XML_PARSER_COMMENT;
4834
593k
    inputid = ctxt->input->id;
4835
593k
    SKIP(4);
4836
593k
    SHRINK;
4837
593k
    GROW;
4838
4839
    /*
4840
     * Accelerated common case where input don't need to be
4841
     * modified before passing it to the handler.
4842
     */
4843
593k
    in = ctxt->input->cur;
4844
613k
    do {
4845
613k
  if (*in == 0xA) {
4846
277k
      do {
4847
277k
    ctxt->input->line++; ctxt->input->col = 1;
4848
277k
    in++;
4849
277k
      } while (*in == 0xA);
4850
8.57k
  }
4851
1.08M
get_more:
4852
1.08M
        ccol = ctxt->input->col;
4853
7.73M
  while (((*in > '-') && (*in <= 0x7F)) ||
4854
1.71M
         ((*in >= 0x20) && (*in < '-')) ||
4855
6.65M
         (*in == 0x09)) {
4856
6.65M
        in++;
4857
6.65M
        ccol++;
4858
6.65M
  }
4859
1.08M
  ctxt->input->col = ccol;
4860
1.08M
  if (*in == 0xA) {
4861
381k
      do {
4862
381k
    ctxt->input->line++; ctxt->input->col = 1;
4863
381k
    in++;
4864
381k
      } while (*in == 0xA);
4865
16.6k
      goto get_more;
4866
16.6k
  }
4867
1.06M
  nbchar = in - ctxt->input->cur;
4868
  /*
4869
   * save current set of data
4870
   */
4871
1.06M
  if (nbchar > 0) {
4872
662k
      if ((ctxt->sax != NULL) &&
4873
662k
    (ctxt->sax->comment != NULL)) {
4874
662k
    if (buf == NULL) {
4875
257k
        if ((*in == '-') && (in[1] == '-'))
4876
91.4k
            size = nbchar + 1;
4877
165k
        else
4878
165k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4879
257k
        buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4880
257k
        if (buf == NULL) {
4881
0
            xmlErrMemory(ctxt, NULL);
4882
0
      ctxt->instate = state;
4883
0
      return;
4884
0
        }
4885
257k
        len = 0;
4886
405k
    } else if (len + nbchar + 1 >= size) {
4887
30.4k
        xmlChar *new_buf;
4888
30.4k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4889
30.4k
        new_buf = (xmlChar *) xmlRealloc(buf,
4890
30.4k
                                         size * sizeof(xmlChar));
4891
30.4k
        if (new_buf == NULL) {
4892
0
            xmlFree (buf);
4893
0
      xmlErrMemory(ctxt, NULL);
4894
0
      ctxt->instate = state;
4895
0
      return;
4896
0
        }
4897
30.4k
        buf = new_buf;
4898
30.4k
    }
4899
662k
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4900
662k
    len += nbchar;
4901
662k
    buf[len] = 0;
4902
662k
      }
4903
662k
  }
4904
1.06M
        if ((len > XML_MAX_TEXT_LENGTH) &&
4905
0
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4906
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4907
0
                         "Comment too big found", NULL);
4908
0
            xmlFree (buf);
4909
0
            return;
4910
0
        }
4911
1.06M
  ctxt->input->cur = in;
4912
1.06M
  if (*in == 0xA) {
4913
0
      in++;
4914
0
      ctxt->input->line++; ctxt->input->col = 1;
4915
0
  }
4916
1.06M
  if (*in == 0xD) {
4917
35.8k
      in++;
4918
35.8k
      if (*in == 0xA) {
4919
23.4k
    ctxt->input->cur = in;
4920
23.4k
    in++;
4921
23.4k
    ctxt->input->line++; ctxt->input->col = 1;
4922
23.4k
    continue; /* while */
4923
23.4k
      }
4924
12.3k
      in--;
4925
12.3k
  }
4926
1.04M
  SHRINK;
4927
1.04M
  GROW;
4928
1.04M
        if (ctxt->instate == XML_PARSER_EOF) {
4929
0
            xmlFree(buf);
4930
0
            return;
4931
0
        }
4932
1.04M
  in = ctxt->input->cur;
4933
1.04M
  if (*in == '-') {
4934
640k
      if (in[1] == '-') {
4935
529k
          if (in[2] == '>') {
4936
186k
        if (ctxt->input->id != inputid) {
4937
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4938
0
                     "comment doesn't start and stop in the"
4939
0
                                       " same entity\n");
4940
0
        }
4941
186k
        SKIP(3);
4942
186k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4943
186k
            (!ctxt->disableSAX)) {
4944
86.4k
      if (buf != NULL)
4945
54.3k
          ctxt->sax->comment(ctxt->userData, buf);
4946
32.1k
      else
4947
32.1k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4948
86.4k
        }
4949
186k
        if (buf != NULL)
4950
86.6k
            xmlFree(buf);
4951
186k
        if (ctxt->instate != XML_PARSER_EOF)
4952
186k
      ctxt->instate = state;
4953
186k
        return;
4954
186k
    }
4955
343k
    if (buf != NULL) {
4956
328k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957
328k
                          "Double hyphen within comment: "
4958
328k
                                      "<!--%.50s\n",
4959
328k
              buf);
4960
328k
    } else
4961
14.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4962
14.5k
                          "Double hyphen within comment\n", NULL);
4963
343k
    in++;
4964
343k
    ctxt->input->col++;
4965
343k
      }
4966
453k
      in++;
4967
453k
      ctxt->input->col++;
4968
453k
      goto get_more;
4969
640k
  }
4970
1.04M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4971
407k
    xmlParseCommentComplex(ctxt, buf, len, size);
4972
407k
    ctxt->instate = state;
4973
407k
    return;
4974
593k
}
4975
4976
4977
/**
4978
 * xmlParsePITarget:
4979
 * @ctxt:  an XML parser context
4980
 *
4981
 * parse the name of a PI
4982
 *
4983
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4984
 *
4985
 * Returns the PITarget name or NULL
4986
 */
4987
4988
const xmlChar *
4989
663k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4990
663k
    const xmlChar *name;
4991
4992
663k
    name = xmlParseName(ctxt);
4993
663k
    if ((name != NULL) &&
4994
426k
        ((name[0] == 'x') || (name[0] == 'X')) &&
4995
173k
        ((name[1] == 'm') || (name[1] == 'M')) &&
4996
107k
        ((name[2] == 'l') || (name[2] == 'L'))) {
4997
90.9k
  int i;
4998
90.9k
  if ((name[0] == 'x') && (name[1] == 'm') &&
4999
74.9k
      (name[2] == 'l') && (name[3] == 0)) {
5000
45.0k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5001
45.0k
     "XML declaration allowed only at the start of the document\n");
5002
45.0k
      return(name);
5003
45.9k
  } else if (name[3] == 0) {
5004
7.79k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5005
7.79k
      return(name);
5006
7.79k
  }
5007
111k
  for (i = 0;;i++) {
5008
111k
      if (xmlW3CPIs[i] == NULL) break;
5009
74.9k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5010
1.75k
          return(name);
5011
74.9k
  }
5012
36.4k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5013
36.4k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5014
36.4k
          NULL, NULL);
5015
36.4k
    }
5016
608k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5017
13.8k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5018
13.8k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5019
13.8k
    }
5020
608k
    return(name);
5021
663k
}
5022
5023
#ifdef LIBXML_CATALOG_ENABLED
5024
/**
5025
 * xmlParseCatalogPI:
5026
 * @ctxt:  an XML parser context
5027
 * @catalog:  the PI value string
5028
 *
5029
 * parse an XML Catalog Processing Instruction.
5030
 *
5031
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5032
 *
5033
 * Occurs only if allowed by the user and if happening in the Misc
5034
 * part of the document before any doctype informations
5035
 * This will add the given catalog to the parsing context in order
5036
 * to be used if there is a resolution need further down in the document
5037
 */
5038
5039
static void
5040
48.7k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5041
48.7k
    xmlChar *URL = NULL;
5042
48.7k
    const xmlChar *tmp, *base;
5043
48.7k
    xmlChar marker;
5044
5045
48.7k
    tmp = catalog;
5046
48.7k
    while (IS_BLANK_CH(*tmp)) tmp++;
5047
48.7k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5048
13.7k
  goto error;
5049
35.0k
    tmp += 7;
5050
253k
    while (IS_BLANK_CH(*tmp)) tmp++;
5051
35.0k
    if (*tmp != '=') {
5052
8.68k
  return;
5053
8.68k
    }
5054
26.3k
    tmp++;
5055
226k
    while (IS_BLANK_CH(*tmp)) tmp++;
5056
26.3k
    marker = *tmp;
5057
26.3k
    if ((marker != '\'') && (marker != '"'))
5058
11.4k
  goto error;
5059
14.8k
    tmp++;
5060
14.8k
    base = tmp;
5061
325k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5062
14.8k
    if (*tmp == 0)
5063
1.98k
  goto error;
5064
12.8k
    URL = xmlStrndup(base, tmp - base);
5065
12.8k
    tmp++;
5066
47.7k
    while (IS_BLANK_CH(*tmp)) tmp++;
5067
12.8k
    if (*tmp != 0)
5068
4.77k
  goto error;
5069
5070
8.07k
    if (URL != NULL) {
5071
8.07k
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5072
8.07k
  xmlFree(URL);
5073
8.07k
    }
5074
8.07k
    return;
5075
5076
31.9k
error:
5077
31.9k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5078
31.9k
            "Catalog PI syntax error: %s\n",
5079
31.9k
      catalog, NULL);
5080
31.9k
    if (URL != NULL)
5081
4.77k
  xmlFree(URL);
5082
31.9k
}
5083
#endif
5084
5085
/**
5086
 * xmlParsePI:
5087
 * @ctxt:  an XML parser context
5088
 *
5089
 * parse an XML Processing Instruction.
5090
 *
5091
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5092
 *
5093
 * The processing is transfered to SAX once parsed.
5094
 */
5095
5096
void
5097
663k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5098
663k
    xmlChar *buf = NULL;
5099
663k
    size_t len = 0;
5100
663k
    size_t size = XML_PARSER_BUFFER_SIZE;
5101
663k
    int cur, l;
5102
663k
    const xmlChar *target;
5103
663k
    xmlParserInputState state;
5104
663k
    int count = 0;
5105
5106
663k
    if ((RAW == '<') && (NXT(1) == '?')) {
5107
663k
  int inputid = ctxt->input->id;
5108
663k
  state = ctxt->instate;
5109
663k
        ctxt->instate = XML_PARSER_PI;
5110
  /*
5111
   * this is a Processing Instruction.
5112
   */
5113
663k
  SKIP(2);
5114
663k
  SHRINK;
5115
5116
  /*
5117
   * Parse the target name and check for special support like
5118
   * namespace.
5119
   */
5120
663k
        target = xmlParsePITarget(ctxt);
5121
663k
  if (target != NULL) {
5122
426k
      if ((RAW == '?') && (NXT(1) == '>')) {
5123
87.4k
    if (inputid != ctxt->input->id) {
5124
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5125
0
                             "PI declaration doesn't start and stop in"
5126
0
                                   " the same entity\n");
5127
0
    }
5128
87.4k
    SKIP(2);
5129
5130
    /*
5131
     * SAX: PI detected.
5132
     */
5133
87.4k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5134
68.9k
        (ctxt->sax->processingInstruction != NULL))
5135
68.9k
        ctxt->sax->processingInstruction(ctxt->userData,
5136
68.9k
                                         target, NULL);
5137
87.4k
    if (ctxt->instate != XML_PARSER_EOF)
5138
87.4k
        ctxt->instate = state;
5139
87.4k
    return;
5140
87.4k
      }
5141
338k
      buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5142
338k
      if (buf == NULL) {
5143
0
    xmlErrMemory(ctxt, NULL);
5144
0
    ctxt->instate = state;
5145
0
    return;
5146
0
      }
5147
338k
      if (SKIP_BLANKS == 0) {
5148
84.3k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5149
84.3k
        "ParsePI: PI %s space expected\n", target);
5150
84.3k
      }
5151
338k
      cur = CUR_CHAR(l);
5152
548M
      while (IS_CHAR(cur) && /* checked */
5153
548M
       ((cur != '?') || (NXT(1) != '>'))) {
5154
547M
    if (len + 5 >= size) {
5155
81.3k
        xmlChar *tmp;
5156
81.3k
                    size_t new_size = size * 2;
5157
81.3k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5158
81.3k
        if (tmp == NULL) {
5159
0
      xmlErrMemory(ctxt, NULL);
5160
0
      xmlFree(buf);
5161
0
      ctxt->instate = state;
5162
0
      return;
5163
0
        }
5164
81.3k
        buf = tmp;
5165
81.3k
                    size = new_size;
5166
81.3k
    }
5167
547M
    count++;
5168
547M
    if (count > 50) {
5169
10.6M
        GROW;
5170
10.6M
                    if (ctxt->instate == XML_PARSER_EOF) {
5171
5
                        xmlFree(buf);
5172
5
                        return;
5173
5
                    }
5174
10.6M
        count = 0;
5175
10.6M
                    if ((len > XML_MAX_TEXT_LENGTH) &&
5176
7
                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5177
7
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5178
7
                                          "PI %s too big found", target);
5179
7
                        xmlFree(buf);
5180
7
                        ctxt->instate = state;
5181
7
                        return;
5182
7
                    }
5183
10.6M
    }
5184
547M
    COPY_BUF(l,buf,len,cur);
5185
547M
    NEXTL(l);
5186
547M
    cur = CUR_CHAR(l);
5187
547M
    if (cur == 0) {
5188
13.0k
        SHRINK;
5189
13.0k
        GROW;
5190
13.0k
        cur = CUR_CHAR(l);
5191
13.0k
    }
5192
547M
      }
5193
338k
            if ((len > XML_MAX_TEXT_LENGTH) &&
5194
6
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5195
6
                xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5196
6
                                  "PI %s too big found", target);
5197
6
                xmlFree(buf);
5198
6
                ctxt->instate = state;
5199
6
                return;
5200
6
            }
5201
338k
      buf[len] = 0;
5202
338k
      if (cur != '?') {
5203
74.0k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5204
74.0k
          "ParsePI: PI %s never end ...\n", target);
5205
264k
      } else {
5206
264k
    if (inputid != ctxt->input->id) {
5207
745
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5208
745
                             "PI declaration doesn't start and stop in"
5209
745
                                   " the same entity\n");
5210
745
    }
5211
264k
    SKIP(2);
5212
5213
264k
#ifdef LIBXML_CATALOG_ENABLED
5214
264k
    if (((state == XML_PARSER_MISC) ||
5215
121k
               (state == XML_PARSER_START)) &&
5216
142k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5217
48.7k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5218
48.7k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5219
48.7k
      (allow == XML_CATA_ALLOW_ALL))
5220
48.7k
      xmlParseCatalogPI(ctxt, buf);
5221
48.7k
    }
5222
264k
#endif
5223
5224
5225
    /*
5226
     * SAX: PI detected.
5227
     */
5228
264k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5229
232k
        (ctxt->sax->processingInstruction != NULL))
5230
232k
        ctxt->sax->processingInstruction(ctxt->userData,
5231
232k
                                         target, buf);
5232
264k
      }
5233
338k
      xmlFree(buf);
5234
338k
  } else {
5235
237k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5236
237k
  }
5237
575k
  if (ctxt->instate != XML_PARSER_EOF)
5238
575k
      ctxt->instate = state;
5239
575k
    }
5240
663k
}
5241
5242
/**
5243
 * xmlParseNotationDecl:
5244
 * @ctxt:  an XML parser context
5245
 *
5246
 * parse a notation declaration
5247
 *
5248
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5249
 *
5250
 * Hence there is actually 3 choices:
5251
 *     'PUBLIC' S PubidLiteral
5252
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5253
 * and 'SYSTEM' S SystemLiteral
5254
 *
5255
 * See the NOTE on xmlParseExternalID().
5256
 */
5257
5258
void
5259
59.4k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5260
59.4k
    const xmlChar *name;
5261
59.4k
    xmlChar *Pubid;
5262
59.4k
    xmlChar *Systemid;
5263
5264
59.4k
    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5265
48.6k
  int inputid = ctxt->input->id;
5266
48.6k
  SHRINK;
5267
48.6k
  SKIP(10);
5268
48.6k
  if (SKIP_BLANKS == 0) {
5269
1.52k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5270
1.52k
         "Space required after '<!NOTATION'\n");
5271
1.52k
      return;
5272
1.52k
  }
5273
5274
47.1k
        name = xmlParseName(ctxt);
5275
47.1k
  if (name == NULL) {
5276
1.94k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5277
1.94k
      return;
5278
1.94k
  }
5279
45.1k
  if (xmlStrchr(name, ':') != NULL) {
5280
2.91k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5281
2.91k
         "colons are forbidden from notation names '%s'\n",
5282
2.91k
         name, NULL, NULL);
5283
2.91k
  }
5284
45.1k
  if (SKIP_BLANKS == 0) {
5285
4.16k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5286
4.16k
         "Space required after the NOTATION name'\n");
5287
4.16k
      return;
5288
4.16k
  }
5289
5290
  /*
5291
   * Parse the IDs.
5292
   */
5293
41.0k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5294
41.0k
  SKIP_BLANKS;
5295
5296
41.0k
  if (RAW == '>') {
5297
9.03k
      if (inputid != ctxt->input->id) {
5298
1.31k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5299
1.31k
                         "Notation declaration doesn't start and stop"
5300
1.31k
                               " in the same entity\n");
5301
1.31k
      }
5302
9.03k
      NEXT;
5303
9.03k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5304
9.03k
    (ctxt->sax->notationDecl != NULL))
5305
9.03k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5306
31.9k
  } else {
5307
31.9k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5308
31.9k
  }
5309
41.0k
  if (Systemid != NULL) xmlFree(Systemid);
5310
41.0k
  if (Pubid != NULL) xmlFree(Pubid);
5311
41.0k
    }
5312
59.4k
}
5313
5314
/**
5315
 * xmlParseEntityDecl:
5316
 * @ctxt:  an XML parser context
5317
 *
5318
 * parse <!ENTITY declarations
5319
 *
5320
 * [70] EntityDecl ::= GEDecl | PEDecl
5321
 *
5322
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5323
 *
5324
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5325
 *
5326
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5327
 *
5328
 * [74] PEDef ::= EntityValue | ExternalID
5329
 *
5330
 * [76] NDataDecl ::= S 'NDATA' S Name
5331
 *
5332
 * [ VC: Notation Declared ]
5333
 * The Name must match the declared name of a notation.
5334
 */
5335
5336
void
5337
230k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5338
230k
    const xmlChar *name = NULL;
5339
230k
    xmlChar *value = NULL;
5340
230k
    xmlChar *URI = NULL, *literal = NULL;
5341
230k
    const xmlChar *ndata = NULL;
5342
230k
    int isParameter = 0;
5343
230k
    xmlChar *orig = NULL;
5344
5345
    /* GROW; done in the caller */
5346
230k
    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5347
223k
  int inputid = ctxt->input->id;
5348
223k
  SHRINK;
5349
223k
  SKIP(8);
5350
223k
  if (SKIP_BLANKS == 0) {
5351
91.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352
91.7k
         "Space required after '<!ENTITY'\n");
5353
91.7k
  }
5354
5355
223k
  if (RAW == '%') {
5356
37.4k
      NEXT;
5357
37.4k
      if (SKIP_BLANKS == 0) {
5358
23.0k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5359
23.0k
             "Space required after '%%'\n");
5360
23.0k
      }
5361
37.4k
      isParameter = 1;
5362
37.4k
  }
5363
5364
223k
        name = xmlParseName(ctxt);
5365
223k
  if (name == NULL) {
5366
4.68k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5367
4.68k
                     "xmlParseEntityDecl: no name\n");
5368
4.68k
            return;
5369
4.68k
  }
5370
218k
  if (xmlStrchr(name, ':') != NULL) {
5371
8.06k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5372
8.06k
         "colons are forbidden from entities names '%s'\n",
5373
8.06k
         name, NULL, NULL);
5374
8.06k
  }
5375
218k
  if (SKIP_BLANKS == 0) {
5376
78.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5377
78.4k
         "Space required after the entity name\n");
5378
78.4k
  }
5379
5380
218k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5381
  /*
5382
   * handle the various case of definitions...
5383
   */
5384
218k
  if (isParameter) {
5385
37.2k
      if ((RAW == '"') || (RAW == '\'')) {
5386
23.1k
          value = xmlParseEntityValue(ctxt, &orig);
5387
23.1k
    if (value) {
5388
17.2k
        if ((ctxt->sax != NULL) &&
5389
17.2k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5390
17.2k
      ctxt->sax->entityDecl(ctxt->userData, name,
5391
17.2k
                        XML_INTERNAL_PARAMETER_ENTITY,
5392
17.2k
            NULL, NULL, value);
5393
17.2k
    }
5394
23.1k
      } else {
5395
14.0k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5396
14.0k
    if ((URI == NULL) && (literal == NULL)) {
5397
1.88k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5398
1.88k
    }
5399
14.0k
    if (URI) {
5400
9.60k
        xmlURIPtr uri;
5401
5402
9.60k
        uri = xmlParseURI((const char *) URI);
5403
9.60k
        if (uri == NULL) {
5404
2.56k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5405
2.56k
             "Invalid URI: %s\n", URI);
5406
      /*
5407
       * This really ought to be a well formedness error
5408
       * but the XML Core WG decided otherwise c.f. issue
5409
       * E26 of the XML erratas.
5410
       */
5411
7.04k
        } else {
5412
7.04k
      if (uri->fragment != NULL) {
5413
          /*
5414
           * Okay this is foolish to block those but not
5415
           * invalid URIs.
5416
           */
5417
820
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5418
6.22k
      } else {
5419
6.22k
          if ((ctxt->sax != NULL) &&
5420
6.22k
        (!ctxt->disableSAX) &&
5421
6.22k
        (ctxt->sax->entityDecl != NULL))
5422
6.22k
        ctxt->sax->entityDecl(ctxt->userData, name,
5423
6.22k
              XML_EXTERNAL_PARAMETER_ENTITY,
5424
6.22k
              literal, URI, NULL);
5425
6.22k
      }
5426
7.04k
      xmlFreeURI(uri);
5427
7.04k
        }
5428
9.60k
    }
5429
14.0k
      }
5430
181k
  } else {
5431
181k
      if ((RAW == '"') || (RAW == '\'')) {
5432
152k
          value = xmlParseEntityValue(ctxt, &orig);
5433
152k
    if ((ctxt->sax != NULL) &&
5434
152k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5435
152k
        ctxt->sax->entityDecl(ctxt->userData, name,
5436
152k
        XML_INTERNAL_GENERAL_ENTITY,
5437
152k
        NULL, NULL, value);
5438
    /*
5439
     * For expat compatibility in SAX mode.
5440
     */
5441
152k
    if ((ctxt->myDoc == NULL) ||
5442
152k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5443
0
        if (ctxt->myDoc == NULL) {
5444
0
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5445
0
      if (ctxt->myDoc == NULL) {
5446
0
          xmlErrMemory(ctxt, "New Doc failed");
5447
0
          return;
5448
0
      }
5449
0
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5450
0
        }
5451
0
        if (ctxt->myDoc->intSubset == NULL)
5452
0
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5453
0
              BAD_CAST "fake", NULL, NULL);
5454
5455
0
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5456
0
                    NULL, NULL, value);
5457
0
    }
5458
152k
      } else {
5459
29.2k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5460
29.2k
    if ((URI == NULL) && (literal == NULL)) {
5461
6.00k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5462
6.00k
    }
5463
29.2k
    if (URI) {
5464
20.7k
        xmlURIPtr uri;
5465
5466
20.7k
        uri = xmlParseURI((const char *)URI);
5467
20.7k
        if (uri == NULL) {
5468
7.74k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5469
7.74k
             "Invalid URI: %s\n", URI);
5470
      /*
5471
       * This really ought to be a well formedness error
5472
       * but the XML Core WG decided otherwise c.f. issue
5473
       * E26 of the XML erratas.
5474
       */
5475
13.0k
        } else {
5476
13.0k
      if (uri->fragment != NULL) {
5477
          /*
5478
           * Okay this is foolish to block those but not
5479
           * invalid URIs.
5480
           */
5481
4.84k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5482
4.84k
      }
5483
13.0k
      xmlFreeURI(uri);
5484
13.0k
        }
5485
20.7k
    }
5486
29.2k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5487
6.28k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5488
6.28k
           "Space required before 'NDATA'\n");
5489
6.28k
    }
5490
29.2k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5491
5.05k
        SKIP(5);
5492
5.05k
        if (SKIP_BLANKS == 0) {
5493
982
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5494
982
               "Space required after 'NDATA'\n");
5495
982
        }
5496
5.05k
        ndata = xmlParseName(ctxt);
5497
5.05k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5498
5.05k
            (ctxt->sax->unparsedEntityDecl != NULL))
5499
5.05k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5500
5.05k
            literal, URI, ndata);
5501
24.1k
    } else {
5502
24.1k
        if ((ctxt->sax != NULL) &&
5503
24.1k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5504
24.1k
      ctxt->sax->entityDecl(ctxt->userData, name,
5505
24.1k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5506
24.1k
            literal, URI, NULL);
5507
        /*
5508
         * For expat compatibility in SAX mode.
5509
         * assuming the entity repalcement was asked for
5510
         */
5511
24.1k
        if ((ctxt->replaceEntities != 0) &&
5512
0
      ((ctxt->myDoc == NULL) ||
5513
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5514
0
      if (ctxt->myDoc == NULL) {
5515
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5516
0
          if (ctxt->myDoc == NULL) {
5517
0
              xmlErrMemory(ctxt, "New Doc failed");
5518
0
        return;
5519
0
          }
5520
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5521
0
      }
5522
5523
0
      if (ctxt->myDoc->intSubset == NULL)
5524
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525
0
            BAD_CAST "fake", NULL, NULL);
5526
0
      xmlSAX2EntityDecl(ctxt, name,
5527
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5528
0
                  literal, URI, NULL);
5529
0
        }
5530
24.1k
    }
5531
29.2k
      }
5532
181k
  }
5533
218k
  if (ctxt->instate == XML_PARSER_EOF)
5534
8
      goto done;
5535
218k
  SKIP_BLANKS;
5536
218k
  if (RAW != '>') {
5537
6.97k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5538
6.97k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5539
6.97k
      xmlHaltParser(ctxt);
5540
211k
  } else {
5541
211k
      if (inputid != ctxt->input->id) {
5542
1.06k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5543
1.06k
                         "Entity declaration doesn't start and stop in"
5544
1.06k
                               " the same entity\n");
5545
1.06k
      }
5546
211k
      NEXT;
5547
211k
  }
5548
218k
  if (orig != NULL) {
5549
      /*
5550
       * Ugly mechanism to save the raw entity value.
5551
       */
5552
154k
      xmlEntityPtr cur = NULL;
5553
5554
154k
      if (isParameter) {
5555
20.6k
          if ((ctxt->sax != NULL) &&
5556
20.6k
        (ctxt->sax->getParameterEntity != NULL))
5557
20.6k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5558
134k
      } else {
5559
134k
          if ((ctxt->sax != NULL) &&
5560
134k
        (ctxt->sax->getEntity != NULL))
5561
134k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5562
134k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5563
0
        cur = xmlSAX2GetEntity(ctxt, name);
5564
0
    }
5565
134k
      }
5566
154k
            if ((cur != NULL) && (cur->orig == NULL)) {
5567
66.5k
    cur->orig = orig;
5568
66.5k
                orig = NULL;
5569
66.5k
      }
5570
154k
  }
5571
5572
218k
done:
5573
218k
  if (value != NULL) xmlFree(value);
5574
218k
  if (URI != NULL) xmlFree(URI);
5575
218k
  if (literal != NULL) xmlFree(literal);
5576
218k
        if (orig != NULL) xmlFree(orig);
5577
218k
    }
5578
230k
}
5579
5580
/**
5581
 * xmlParseDefaultDecl:
5582
 * @ctxt:  an XML parser context
5583
 * @value:  Receive a possible fixed default value for the attribute
5584
 *
5585
 * Parse an attribute default declaration
5586
 *
5587
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5588
 *
5589
 * [ VC: Required Attribute ]
5590
 * if the default declaration is the keyword #REQUIRED, then the
5591
 * attribute must be specified for all elements of the type in the
5592
 * attribute-list declaration.
5593
 *
5594
 * [ VC: Attribute Default Legal ]
5595
 * The declared default value must meet the lexical constraints of
5596
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5597
 *
5598
 * [ VC: Fixed Attribute Default ]
5599
 * if an attribute has a default value declared with the #FIXED
5600
 * keyword, instances of that attribute must match the default value.
5601
 *
5602
 * [ WFC: No < in Attribute Values ]
5603
 * handled in xmlParseAttValue()
5604
 *
5605
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5606
 *          or XML_ATTRIBUTE_FIXED.
5607
 */
5608
5609
int
5610
274k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5611
274k
    int val;
5612
274k
    xmlChar *ret;
5613
5614
274k
    *value = NULL;
5615
274k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5616
2.20k
  SKIP(9);
5617
2.20k
  return(XML_ATTRIBUTE_REQUIRED);
5618
2.20k
    }
5619
271k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5620
27.2k
  SKIP(8);
5621
27.2k
  return(XML_ATTRIBUTE_IMPLIED);
5622
27.2k
    }
5623
244k
    val = XML_ATTRIBUTE_NONE;
5624
244k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5625
3.80k
  SKIP(6);
5626
3.80k
  val = XML_ATTRIBUTE_FIXED;
5627
3.80k
  if (SKIP_BLANKS == 0) {
5628
558
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5629
558
         "Space required after '#FIXED'\n");
5630
558
  }
5631
3.80k
    }
5632
244k
    ret = xmlParseAttValue(ctxt);
5633
244k
    ctxt->instate = XML_PARSER_DTD;
5634
244k
    if (ret == NULL) {
5635
29.9k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5636
29.9k
           "Attribute default value declaration error\n");
5637
29.9k
    } else
5638
214k
        *value = ret;
5639
244k
    return(val);
5640
271k
}
5641
5642
/**
5643
 * xmlParseNotationType:
5644
 * @ctxt:  an XML parser context
5645
 *
5646
 * parse an Notation attribute type.
5647
 *
5648
 * Note: the leading 'NOTATION' S part has already being parsed...
5649
 *
5650
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5651
 *
5652
 * [ VC: Notation Attributes ]
5653
 * Values of this type must match one of the notation names included
5654
 * in the declaration; all notation names in the declaration must be declared.
5655
 *
5656
 * Returns: the notation attribute tree built while parsing
5657
 */
5658
5659
xmlEnumerationPtr
5660
20.9k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5661
20.9k
    const xmlChar *name;
5662
20.9k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5663
5664
20.9k
    if (RAW != '(') {
5665
1.51k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5666
1.51k
  return(NULL);
5667
1.51k
    }
5668
19.4k
    SHRINK;
5669
24.0k
    do {
5670
24.0k
        NEXT;
5671
24.0k
  SKIP_BLANKS;
5672
24.0k
        name = xmlParseName(ctxt);
5673
24.0k
  if (name == NULL) {
5674
4.25k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5675
4.25k
         "Name expected in NOTATION declaration\n");
5676
4.25k
            xmlFreeEnumeration(ret);
5677
4.25k
      return(NULL);
5678
4.25k
  }
5679
19.7k
  tmp = ret;
5680
37.8k
  while (tmp != NULL) {
5681
20.4k
      if (xmlStrEqual(name, tmp->name)) {
5682
2.38k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5683
2.38k
    "standalone: attribute notation value token %s duplicated\n",
5684
2.38k
         name, NULL);
5685
2.38k
    if (!xmlDictOwns(ctxt->dict, name))
5686
0
        xmlFree((xmlChar *) name);
5687
2.38k
    break;
5688
2.38k
      }
5689
18.0k
      tmp = tmp->next;
5690
18.0k
  }
5691
19.7k
  if (tmp == NULL) {
5692
17.3k
      cur = xmlCreateEnumeration(name);
5693
17.3k
      if (cur == NULL) {
5694
0
                xmlFreeEnumeration(ret);
5695
0
                return(NULL);
5696
0
            }
5697
17.3k
      if (last == NULL) ret = last = cur;
5698
2.13k
      else {
5699
2.13k
    last->next = cur;
5700
2.13k
    last = cur;
5701
2.13k
      }
5702
17.3k
  }
5703
19.7k
  SKIP_BLANKS;
5704
19.7k
    } while (RAW == '|');
5705
15.1k
    if (RAW != ')') {
5706
2.21k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5707
2.21k
        xmlFreeEnumeration(ret);
5708
2.21k
  return(NULL);
5709
2.21k
    }
5710
12.9k
    NEXT;
5711
12.9k
    return(ret);
5712
15.1k
}
5713
5714
/**
5715
 * xmlParseEnumerationType:
5716
 * @ctxt:  an XML parser context
5717
 *
5718
 * parse an Enumeration attribute type.
5719
 *
5720
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5721
 *
5722
 * [ VC: Enumeration ]
5723
 * Values of this type must match one of the Nmtoken tokens in
5724
 * the declaration
5725
 *
5726
 * Returns: the enumeration attribute tree built while parsing
5727
 */
5728
5729
xmlEnumerationPtr
5730
121k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5731
121k
    xmlChar *name;
5732
121k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5733
5734
121k
    if (RAW != '(') {
5735
31.6k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5736
31.6k
  return(NULL);
5737
31.6k
    }
5738
89.6k
    SHRINK;
5739
120k
    do {
5740
120k
        NEXT;
5741
120k
  SKIP_BLANKS;
5742
120k
        name = xmlParseNmtoken(ctxt);
5743
120k
  if (name == NULL) {
5744
3.31k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5745
3.31k
      return(ret);
5746
3.31k
  }
5747
116k
  tmp = ret;
5748
197k
  while (tmp != NULL) {
5749
103k
      if (xmlStrEqual(name, tmp->name)) {
5750
22.7k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5751
22.7k
    "standalone: attribute enumeration value token %s duplicated\n",
5752
22.7k
         name, NULL);
5753
22.7k
    if (!xmlDictOwns(ctxt->dict, name))
5754
22.7k
        xmlFree(name);
5755
22.7k
    break;
5756
22.7k
      }
5757
81.0k
      tmp = tmp->next;
5758
81.0k
  }
5759
116k
  if (tmp == NULL) {
5760
94.1k
      cur = xmlCreateEnumeration(name);
5761
94.1k
      if (!xmlDictOwns(ctxt->dict, name))
5762
94.1k
    xmlFree(name);
5763
94.1k
      if (cur == NULL) {
5764
0
                xmlFreeEnumeration(ret);
5765
0
                return(NULL);
5766
0
            }
5767
94.1k
      if (last == NULL) ret = last = cur;
5768
7.50k
      else {
5769
7.50k
    last->next = cur;
5770
7.50k
    last = cur;
5771
7.50k
      }
5772
94.1k
  }
5773
116k
  SKIP_BLANKS;
5774
116k
    } while (RAW == '|');
5775
86.3k
    if (RAW != ')') {
5776
6.22k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5777
6.22k
  return(ret);
5778
6.22k
    }
5779
80.1k
    NEXT;
5780
80.1k
    return(ret);
5781
86.3k
}
5782
5783
/**
5784
 * xmlParseEnumeratedType:
5785
 * @ctxt:  an XML parser context
5786
 * @tree:  the enumeration tree built while parsing
5787
 *
5788
 * parse an Enumerated attribute type.
5789
 *
5790
 * [57] EnumeratedType ::= NotationType | Enumeration
5791
 *
5792
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5793
 *
5794
 *
5795
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5796
 */
5797
5798
int
5799
143k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5800
143k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5801
22.2k
  SKIP(8);
5802
22.2k
  if (SKIP_BLANKS == 0) {
5803
1.27k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5804
1.27k
         "Space required after 'NOTATION'\n");
5805
1.27k
      return(0);
5806
1.27k
  }
5807
20.9k
  *tree = xmlParseNotationType(ctxt);
5808
20.9k
  if (*tree == NULL) return(0);
5809
12.9k
  return(XML_ATTRIBUTE_NOTATION);
5810
20.9k
    }
5811
121k
    *tree = xmlParseEnumerationType(ctxt);
5812
121k
    if (*tree == NULL) return(0);
5813
86.6k
    return(XML_ATTRIBUTE_ENUMERATION);
5814
121k
}
5815
5816
/**
5817
 * xmlParseAttributeType:
5818
 * @ctxt:  an XML parser context
5819
 * @tree:  the enumeration tree built while parsing
5820
 *
5821
 * parse the Attribute list def for an element
5822
 *
5823
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5824
 *
5825
 * [55] StringType ::= 'CDATA'
5826
 *
5827
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5828
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5829
 *
5830
 * Validity constraints for attribute values syntax are checked in
5831
 * xmlValidateAttributeValue()
5832
 *
5833
 * [ VC: ID ]
5834
 * Values of type ID must match the Name production. A name must not
5835
 * appear more than once in an XML document as a value of this type;
5836
 * i.e., ID values must uniquely identify the elements which bear them.
5837
 *
5838
 * [ VC: One ID per Element Type ]
5839
 * No element type may have more than one ID attribute specified.
5840
 *
5841
 * [ VC: ID Attribute Default ]
5842
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5843
 *
5844
 * [ VC: IDREF ]
5845
 * Values of type IDREF must match the Name production, and values
5846
 * of type IDREFS must match Names; each IDREF Name must match the value
5847
 * of an ID attribute on some element in the XML document; i.e. IDREF
5848
 * values must match the value of some ID attribute.
5849
 *
5850
 * [ VC: Entity Name ]
5851
 * Values of type ENTITY must match the Name production, values
5852
 * of type ENTITIES must match Names; each Entity Name must match the
5853
 * name of an unparsed entity declared in the DTD.
5854
 *
5855
 * [ VC: Name Token ]
5856
 * Values of type NMTOKEN must match the Nmtoken production; values
5857
 * of type NMTOKENS must match Nmtokens.
5858
 *
5859
 * Returns the attribute type
5860
 */
5861
int
5862
329k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5863
329k
    SHRINK;
5864
329k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5865
21.6k
  SKIP(5);
5866
21.6k
  return(XML_ATTRIBUTE_CDATA);
5867
308k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5868
32.6k
  SKIP(6);
5869
32.6k
  return(XML_ATTRIBUTE_IDREFS);
5870
275k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5871
1.95k
  SKIP(5);
5872
1.95k
  return(XML_ATTRIBUTE_IDREF);
5873
273k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5874
104k
        SKIP(2);
5875
104k
  return(XML_ATTRIBUTE_ID);
5876
169k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5877
5.02k
  SKIP(6);
5878
5.02k
  return(XML_ATTRIBUTE_ENTITY);
5879
164k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5880
3.07k
  SKIP(8);
5881
3.07k
  return(XML_ATTRIBUTE_ENTITIES);
5882
161k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5883
1.36k
  SKIP(8);
5884
1.36k
  return(XML_ATTRIBUTE_NMTOKENS);
5885
160k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5886
16.5k
  SKIP(7);
5887
16.5k
  return(XML_ATTRIBUTE_NMTOKEN);
5888
16.5k
     }
5889
143k
     return(xmlParseEnumeratedType(ctxt, tree));
5890
329k
}
5891
5892
/**
5893
 * xmlParseAttributeListDecl:
5894
 * @ctxt:  an XML parser context
5895
 *
5896
 * : parse the Attribute list def for an element
5897
 *
5898
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5899
 *
5900
 * [53] AttDef ::= S Name S AttType S DefaultDecl
5901
 *
5902
 */
5903
void
5904
247k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5905
247k
    const xmlChar *elemName;
5906
247k
    const xmlChar *attrName;
5907
247k
    xmlEnumerationPtr tree;
5908
5909
247k
    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5910
237k
  int inputid = ctxt->input->id;
5911
5912
237k
  SKIP(9);
5913
237k
  if (SKIP_BLANKS == 0) {
5914
185k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5915
185k
                     "Space required after '<!ATTLIST'\n");
5916
185k
  }
5917
237k
        elemName = xmlParseName(ctxt);
5918
237k
  if (elemName == NULL) {
5919
2.80k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5920
2.80k
         "ATTLIST: no name for Element\n");
5921
2.80k
      return;
5922
2.80k
  }
5923
234k
  SKIP_BLANKS;
5924
234k
  GROW;
5925
468k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5926
430k
      int type;
5927
430k
      int def;
5928
430k
      xmlChar *defaultValue = NULL;
5929
5930
430k
      GROW;
5931
430k
            tree = NULL;
5932
430k
      attrName = xmlParseName(ctxt);
5933
430k
      if (attrName == NULL) {
5934
83.8k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5935
83.8k
             "ATTLIST: no name for Attribute\n");
5936
83.8k
    break;
5937
83.8k
      }
5938
346k
      GROW;
5939
346k
      if (SKIP_BLANKS == 0) {
5940
17.2k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5941
17.2k
            "Space required after the attribute name\n");
5942
17.2k
    break;
5943
17.2k
      }
5944
5945
329k
      type = xmlParseAttributeType(ctxt, &tree);
5946
329k
      if (type <= 0) {
5947
43.9k
          break;
5948
43.9k
      }
5949
5950
285k
      GROW;
5951
285k
      if (SKIP_BLANKS == 0) {
5952
11.6k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5953
11.6k
             "Space required after the attribute type\n");
5954
11.6k
          if (tree != NULL)
5955
7.00k
        xmlFreeEnumeration(tree);
5956
11.6k
    break;
5957
11.6k
      }
5958
5959
274k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
5960
274k
      if (def <= 0) {
5961
0
                if (defaultValue != NULL)
5962
0
        xmlFree(defaultValue);
5963
0
          if (tree != NULL)
5964
0
        xmlFreeEnumeration(tree);
5965
0
          break;
5966
0
      }
5967
274k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5968
198k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
5969
5970
274k
      GROW;
5971
274k
            if (RAW != '>') {
5972
247k
    if (SKIP_BLANKS == 0) {
5973
39.6k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5974
39.6k
      "Space required after the attribute default value\n");
5975
39.6k
        if (defaultValue != NULL)
5976
11.9k
      xmlFree(defaultValue);
5977
39.6k
        if (tree != NULL)
5978
17.5k
      xmlFreeEnumeration(tree);
5979
39.6k
        break;
5980
39.6k
    }
5981
247k
      }
5982
234k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5983
234k
    (ctxt->sax->attributeDecl != NULL))
5984
234k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5985
234k
                          type, def, defaultValue, tree);
5986
0
      else if (tree != NULL)
5987
0
    xmlFreeEnumeration(tree);
5988
5989
234k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
5990
202k
          (def != XML_ATTRIBUTE_IMPLIED) &&
5991
202k
    (def != XML_ATTRIBUTE_REQUIRED)) {
5992
202k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5993
202k
      }
5994
234k
      if (ctxt->sax2) {
5995
234k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5996
234k
      }
5997
234k
      if (defaultValue != NULL)
5998
202k
          xmlFree(defaultValue);
5999
234k
      GROW;
6000
234k
  }
6001
234k
  if (RAW == '>') {
6002
41.1k
      if (inputid != ctxt->input->id) {
6003
3.27k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6004
3.27k
                               "Attribute list declaration doesn't start and"
6005
3.27k
                               " stop in the same entity\n");
6006
3.27k
      }
6007
41.1k
      NEXT;
6008
41.1k
  }
6009
234k
    }
6010
247k
}
6011
6012
/**
6013
 * xmlParseElementMixedContentDecl:
6014
 * @ctxt:  an XML parser context
6015
 * @inputchk:  the input used for the current entity, needed for boundary checks
6016
 *
6017
 * parse the declaration for a Mixed Element content
6018
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6019
 *
6020
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6021
 *                '(' S? '#PCDATA' S? ')'
6022
 *
6023
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6024
 *
6025
 * [ VC: No Duplicate Types ]
6026
 * The same name must not appear more than once in a single
6027
 * mixed-content declaration.
6028
 *
6029
 * returns: the list of the xmlElementContentPtr describing the element choices
6030
 */
6031
xmlElementContentPtr
6032
27.1k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6033
27.1k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6034
27.1k
    const xmlChar *elem = NULL;
6035
6036
27.1k
    GROW;
6037
27.1k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6038
27.1k
  SKIP(7);
6039
27.1k
  SKIP_BLANKS;
6040
27.1k
  SHRINK;
6041
27.1k
  if (RAW == ')') {
6042
11.6k
      if (ctxt->input->id != inputchk) {
6043
3.43k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6044
3.43k
                               "Element content declaration doesn't start and"
6045
3.43k
                               " stop in the same entity\n");
6046
3.43k
      }
6047
11.6k
      NEXT;
6048
11.6k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6049
11.6k
      if (ret == NULL)
6050
0
          return(NULL);
6051
11.6k
      if (RAW == '*') {
6052
1.36k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6053
1.36k
    NEXT;
6054
1.36k
      }
6055
11.6k
      return(ret);
6056
11.6k
  }
6057
15.5k
  if ((RAW == '(') || (RAW == '|')) {
6058
12.3k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6059
12.3k
      if (ret == NULL) return(NULL);
6060
12.3k
  }
6061
34.7k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6062
20.7k
      NEXT;
6063
20.7k
      if (elem == NULL) {
6064
11.7k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6065
11.7k
    if (ret == NULL) return(NULL);
6066
11.7k
    ret->c1 = cur;
6067
11.7k
    if (cur != NULL)
6068
11.7k
        cur->parent = ret;
6069
11.7k
    cur = ret;
6070
11.7k
      } else {
6071
8.96k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6072
8.96k
    if (n == NULL) return(NULL);
6073
8.96k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6074
8.96k
    if (n->c1 != NULL)
6075
8.96k
        n->c1->parent = n;
6076
8.96k
          cur->c2 = n;
6077
8.96k
    if (n != NULL)
6078
8.96k
        n->parent = cur;
6079
8.96k
    cur = n;
6080
8.96k
      }
6081
20.7k
      SKIP_BLANKS;
6082
20.7k
      elem = xmlParseName(ctxt);
6083
20.7k
      if (elem == NULL) {
6084
1.54k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6085
1.54k
      "xmlParseElementMixedContentDecl : Name expected\n");
6086
1.54k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6087
1.54k
    return(NULL);
6088
1.54k
      }
6089
19.1k
      SKIP_BLANKS;
6090
19.1k
      GROW;
6091
19.1k
  }
6092
13.9k
  if ((RAW == ')') && (NXT(1) == '*')) {
6093
1.77k
      if (elem != NULL) {
6094
1.77k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6095
1.77k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6096
1.77k
    if (cur->c2 != NULL)
6097
1.77k
        cur->c2->parent = cur;
6098
1.77k
            }
6099
1.77k
            if (ret != NULL)
6100
1.77k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6101
1.77k
      if (ctxt->input->id != inputchk) {
6102
2
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6103
2
                               "Element content declaration doesn't start and"
6104
2
                               " stop in the same entity\n");
6105
2
      }
6106
1.77k
      SKIP(2);
6107
12.2k
  } else {
6108
12.2k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6109
12.2k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6110
12.2k
      return(NULL);
6111
12.2k
  }
6112
6113
13.9k
    } else {
6114
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6115
0
    }
6116
1.77k
    return(ret);
6117
27.1k
}
6118
6119
/**
6120
 * xmlParseElementChildrenContentDeclPriv:
6121
 * @ctxt:  an XML parser context
6122
 * @inputchk:  the input used for the current entity, needed for boundary checks
6123
 * @depth: the level of recursion
6124
 *
6125
 * parse the declaration for a Mixed Element content
6126
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6127
 *
6128
 *
6129
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6130
 *
6131
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6132
 *
6133
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6134
 *
6135
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6136
 *
6137
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6138
 * TODO Parameter-entity replacement text must be properly nested
6139
 *  with parenthesized groups. That is to say, if either of the
6140
 *  opening or closing parentheses in a choice, seq, or Mixed
6141
 *  construct is contained in the replacement text for a parameter
6142
 *  entity, both must be contained in the same replacement text. For
6143
 *  interoperability, if a parameter-entity reference appears in a
6144
 *  choice, seq, or Mixed construct, its replacement text should not
6145
 *  be empty, and neither the first nor last non-blank character of
6146
 *  the replacement text should be a connector (| or ,).
6147
 *
6148
 * Returns the tree of xmlElementContentPtr describing the element
6149
 *          hierarchy.
6150
 */
6151
static xmlElementContentPtr
6152
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6153
446k
                                       int depth) {
6154
446k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6155
446k
    const xmlChar *elem;
6156
446k
    xmlChar type = 0;
6157
6158
446k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6159
445k
        (depth >  2048)) {
6160
622
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6161
622
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6162
622
                          depth);
6163
622
  return(NULL);
6164
622
    }
6165
445k
    SKIP_BLANKS;
6166
445k
    GROW;
6167
445k
    if (RAW == '(') {
6168
282k
  int inputid = ctxt->input->id;
6169
6170
        /* Recurse on first child */
6171
282k
  NEXT;
6172
282k
  SKIP_BLANKS;
6173
282k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6174
282k
                                                           depth + 1);
6175
282k
  SKIP_BLANKS;
6176
282k
  GROW;
6177
282k
    } else {
6178
163k
  elem = xmlParseName(ctxt);
6179
163k
  if (elem == NULL) {
6180
40.8k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6181
40.8k
      return(NULL);
6182
40.8k
  }
6183
122k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6184
122k
  if (cur == NULL) {
6185
0
      xmlErrMemory(ctxt, NULL);
6186
0
      return(NULL);
6187
0
  }
6188
122k
  GROW;
6189
122k
  if (RAW == '?') {
6190
6.99k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6191
6.99k
      NEXT;
6192
115k
  } else if (RAW == '*') {
6193
12.8k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6194
12.8k
      NEXT;
6195
102k
  } else if (RAW == '+') {
6196
3.21k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6197
3.21k
      NEXT;
6198
99.1k
  } else {
6199
99.1k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6200
99.1k
  }
6201
122k
  GROW;
6202
122k
    }
6203
405k
    SKIP_BLANKS;
6204
405k
    SHRINK;
6205
569k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6206
        /*
6207
   * Each loop we parse one separator and one element.
6208
   */
6209
458k
        if (RAW == ',') {
6210
70.1k
      if (type == 0) type = CUR;
6211
6212
      /*
6213
       * Detect "Name | Name , Name" error
6214
       */
6215
20.9k
      else if (type != CUR) {
6216
5.20k
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6217
5.20k
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6218
5.20k
                      type);
6219
5.20k
    if ((last != NULL) && (last != ret))
6220
2.21k
        xmlFreeDocElementContent(ctxt->myDoc, last);
6221
5.20k
    if (ret != NULL)
6222
5.20k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6223
5.20k
    return(NULL);
6224
5.20k
      }
6225
64.9k
      NEXT;
6226
6227
64.9k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6228
64.9k
      if (op == NULL) {
6229
0
    if ((last != NULL) && (last != ret))
6230
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6231
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6232
0
    return(NULL);
6233
0
      }
6234
64.9k
      if (last == NULL) {
6235
56.3k
    op->c1 = ret;
6236
56.3k
    if (ret != NULL)
6237
41.9k
        ret->parent = op;
6238
56.3k
    ret = cur = op;
6239
56.3k
      } else {
6240
8.67k
          cur->c2 = op;
6241
8.67k
    if (op != NULL)
6242
8.67k
        op->parent = cur;
6243
8.67k
    op->c1 = last;
6244
8.67k
    if (last != NULL)
6245
8.67k
        last->parent = op;
6246
8.67k
    cur =op;
6247
8.67k
    last = NULL;
6248
8.67k
      }
6249
388k
  } else if (RAW == '|') {
6250
132k
      if (type == 0) type = CUR;
6251
6252
      /*
6253
       * Detect "Name , Name | Name" error
6254
       */
6255
47.8k
      else if (type != CUR) {
6256
11.7k
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6257
11.7k
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6258
11.7k
          type);
6259
11.7k
    if ((last != NULL) && (last != ret))
6260
4.30k
        xmlFreeDocElementContent(ctxt->myDoc, last);
6261
11.7k
    if (ret != NULL)
6262
11.7k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6263
11.7k
    return(NULL);
6264
11.7k
      }
6265
121k
      NEXT;
6266
6267
121k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6268
121k
      if (op == NULL) {
6269
0
    if ((last != NULL) && (last != ret))
6270
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6271
0
    if (ret != NULL)
6272
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6273
0
    return(NULL);
6274
0
      }
6275
121k
      if (last == NULL) {
6276
90.2k
    op->c1 = ret;
6277
90.2k
    if (ret != NULL)
6278
69.3k
        ret->parent = op;
6279
90.2k
    ret = cur = op;
6280
90.2k
      } else {
6281
30.9k
          cur->c2 = op;
6282
30.9k
    if (op != NULL)
6283
30.9k
        op->parent = cur;
6284
30.9k
    op->c1 = last;
6285
30.9k
    if (last != NULL)
6286
30.9k
        last->parent = op;
6287
30.9k
    cur =op;
6288
30.9k
    last = NULL;
6289
30.9k
      }
6290
255k
  } else {
6291
255k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6292
255k
      if ((last != NULL) && (last != ret))
6293
7.77k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6294
255k
      if (ret != NULL)
6295
74.5k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6296
255k
      return(NULL);
6297
255k
  }
6298
186k
  GROW;
6299
186k
  SKIP_BLANKS;
6300
186k
  GROW;
6301
186k
  if (RAW == '(') {
6302
108k
      int inputid = ctxt->input->id;
6303
      /* Recurse on second child */
6304
108k
      NEXT;
6305
108k
      SKIP_BLANKS;
6306
108k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6307
108k
                                                          depth + 1);
6308
108k
      SKIP_BLANKS;
6309
108k
  } else {
6310
78.0k
      elem = xmlParseName(ctxt);
6311
78.0k
      if (elem == NULL) {
6312
21.6k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6313
21.6k
    if (ret != NULL)
6314
21.6k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6315
21.6k
    return(NULL);
6316
21.6k
      }
6317
56.3k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6318
56.3k
      if (last == NULL) {
6319
0
    if (ret != NULL)
6320
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6321
0
    return(NULL);
6322
0
      }
6323
56.3k
      if (RAW == '?') {
6324
13.5k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6325
13.5k
    NEXT;
6326
42.8k
      } else if (RAW == '*') {
6327
3.29k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6328
3.29k
    NEXT;
6329
39.5k
      } else if (RAW == '+') {
6330
1.50k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6331
1.50k
    NEXT;
6332
38.0k
      } else {
6333
38.0k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6334
38.0k
      }
6335
56.3k
  }
6336
164k
  SKIP_BLANKS;
6337
164k
  GROW;
6338
164k
    }
6339
110k
    if ((cur != NULL) && (last != NULL)) {
6340
37.9k
        cur->c2 = last;
6341
37.9k
  if (last != NULL)
6342
37.9k
      last->parent = cur;
6343
37.9k
    }
6344
110k
    if (ctxt->input->id != inputchk) {
6345
3.19k
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6346
3.19k
                       "Element content declaration doesn't start and stop in"
6347
3.19k
                       " the same entity\n");
6348
3.19k
    }
6349
110k
    NEXT;
6350
110k
    if (RAW == '?') {
6351
17.4k
  if (ret != NULL) {
6352
15.5k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6353
13.7k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6354
4.98k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6355
10.5k
      else
6356
10.5k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6357
15.5k
  }
6358
17.4k
  NEXT;
6359
93.2k
    } else if (RAW == '*') {
6360
22.8k
  if (ret != NULL) {
6361
18.8k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6362
18.8k
      cur = ret;
6363
      /*
6364
       * Some normalization:
6365
       * (a | b* | c?)* == (a | b | c)*
6366
       */
6367
61.0k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6368
42.2k
    if ((cur->c1 != NULL) &&
6369
36.5k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6370
32.9k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6371
11.8k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6372
42.2k
    if ((cur->c2 != NULL) &&
6373
38.2k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6374
34.7k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6375
8.63k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6376
42.2k
    cur = cur->c2;
6377
42.2k
      }
6378
18.8k
  }
6379
22.8k
  NEXT;
6380
70.3k
    } else if (RAW == '+') {
6381
25.8k
  if (ret != NULL) {
6382
22.5k
      int found = 0;
6383
6384
22.5k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6385
19.3k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6386
8.56k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6387
14.0k
      else
6388
14.0k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6389
      /*
6390
       * Some normalization:
6391
       * (a | b*)+ == (a | b)*
6392
       * (a | b?)+ == (a | b)*
6393
       */
6394
70.6k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6395
48.0k
    if ((cur->c1 != NULL) &&
6396
38.8k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6397
35.2k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6398
9.21k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6399
9.21k
        found = 1;
6400
9.21k
    }
6401
48.0k
    if ((cur->c2 != NULL) &&
6402
40.7k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6403
36.2k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6404
8.92k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6405
8.92k
        found = 1;
6406
8.92k
    }
6407
48.0k
    cur = cur->c2;
6408
48.0k
      }
6409
22.5k
      if (found)
6410
11.5k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6411
22.5k
  }
6412
25.8k
  NEXT;
6413
25.8k
    }
6414
110k
    return(ret);
6415
405k
}
6416
6417
/**
6418
 * xmlParseElementChildrenContentDecl:
6419
 * @ctxt:  an XML parser context
6420
 * @inputchk:  the input used for the current entity, needed for boundary checks
6421
 *
6422
 * parse the declaration for a Mixed Element content
6423
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6424
 *
6425
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6426
 *
6427
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6428
 *
6429
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6430
 *
6431
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6432
 *
6433
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6434
 * TODO Parameter-entity replacement text must be properly nested
6435
 *  with parenthesized groups. That is to say, if either of the
6436
 *  opening or closing parentheses in a choice, seq, or Mixed
6437
 *  construct is contained in the replacement text for a parameter
6438
 *  entity, both must be contained in the same replacement text. For
6439
 *  interoperability, if a parameter-entity reference appears in a
6440
 *  choice, seq, or Mixed construct, its replacement text should not
6441
 *  be empty, and neither the first nor last non-blank character of
6442
 *  the replacement text should be a connector (| or ,).
6443
 *
6444
 * Returns the tree of xmlElementContentPtr describing the element
6445
 *          hierarchy.
6446
 */
6447
xmlElementContentPtr
6448
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6449
    /* stub left for API/ABI compat */
6450
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6451
0
}
6452
6453
/**
6454
 * xmlParseElementContentDecl:
6455
 * @ctxt:  an XML parser context
6456
 * @name:  the name of the element being defined.
6457
 * @result:  the Element Content pointer will be stored here if any
6458
 *
6459
 * parse the declaration for an Element content either Mixed or Children,
6460
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6461
 *
6462
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6463
 *
6464
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6465
 */
6466
6467
int
6468
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6469
82.5k
                           xmlElementContentPtr *result) {
6470
6471
82.5k
    xmlElementContentPtr tree = NULL;
6472
82.5k
    int inputid = ctxt->input->id;
6473
82.5k
    int res;
6474
6475
82.5k
    *result = NULL;
6476
6477
82.5k
    if (RAW != '(') {
6478
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6479
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6480
0
  return(-1);
6481
0
    }
6482
82.5k
    NEXT;
6483
82.5k
    GROW;
6484
82.5k
    if (ctxt->instate == XML_PARSER_EOF)
6485
0
        return(-1);
6486
82.5k
    SKIP_BLANKS;
6487
82.5k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6488
27.1k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6489
27.1k
  res = XML_ELEMENT_TYPE_MIXED;
6490
55.4k
    } else {
6491
55.4k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6492
55.4k
  res = XML_ELEMENT_TYPE_ELEMENT;
6493
55.4k
    }
6494
82.5k
    SKIP_BLANKS;
6495
82.5k
    *result = tree;
6496
82.5k
    return(res);
6497
82.5k
}
6498
6499
/**
6500
 * xmlParseElementDecl:
6501
 * @ctxt:  an XML parser context
6502
 *
6503
 * parse an Element declaration.
6504
 *
6505
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6506
 *
6507
 * [ VC: Unique Element Type Declaration ]
6508
 * No element type may be declared more than once
6509
 *
6510
 * Returns the type of the element, or -1 in case of error
6511
 */
6512
int
6513
104k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6514
104k
    const xmlChar *name;
6515
104k
    int ret = -1;
6516
104k
    xmlElementContentPtr content  = NULL;
6517
6518
    /* GROW; done in the caller */
6519
104k
    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6520
96.5k
  int inputid = ctxt->input->id;
6521
6522
96.5k
  SKIP(9);
6523
96.5k
  if (SKIP_BLANKS == 0) {
6524
1.69k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6525
1.69k
               "Space required after 'ELEMENT'\n");
6526
1.69k
      return(-1);
6527
1.69k
  }
6528
94.8k
        name = xmlParseName(ctxt);
6529
94.8k
  if (name == NULL) {
6530
3.00k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6531
3.00k
         "xmlParseElementDecl: no name for Element\n");
6532
3.00k
      return(-1);
6533
3.00k
  }
6534
91.8k
  if (SKIP_BLANKS == 0) {
6535
54.3k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6536
54.3k
         "Space required after the element name\n");
6537
54.3k
  }
6538
91.8k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6539
986
      SKIP(5);
6540
      /*
6541
       * Element must always be empty.
6542
       */
6543
986
      ret = XML_ELEMENT_TYPE_EMPTY;
6544
90.8k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6545
898
             (NXT(2) == 'Y')) {
6546
448
      SKIP(3);
6547
      /*
6548
       * Element is a generic container.
6549
       */
6550
448
      ret = XML_ELEMENT_TYPE_ANY;
6551
90.4k
  } else if (RAW == '(') {
6552
82.5k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6553
82.5k
  } else {
6554
      /*
6555
       * [ WFC: PEs in Internal Subset ] error handling.
6556
       */
6557
7.81k
      if ((RAW == '%') && (ctxt->external == 0) &&
6558
693
          (ctxt->inputNr == 1)) {
6559
693
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6560
693
    "PEReference: forbidden within markup decl in internal subset\n");
6561
7.12k
      } else {
6562
7.12k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6563
7.12k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6564
7.12k
            }
6565
7.81k
      return(-1);
6566
7.81k
  }
6567
6568
84.0k
  SKIP_BLANKS;
6569
6570
84.0k
  if (RAW != '>') {
6571
51.7k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6572
51.7k
      if (content != NULL) {
6573
6.65k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6574
6.65k
      }
6575
51.7k
  } else {
6576
32.3k
      if (inputid != ctxt->input->id) {
6577
4.18k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6578
4.18k
                               "Element declaration doesn't start and stop in"
6579
4.18k
                               " the same entity\n");
6580
4.18k
      }
6581
6582
32.3k
      NEXT;
6583
32.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6584
32.3k
    (ctxt->sax->elementDecl != NULL)) {
6585
32.3k
    if (content != NULL)
6586
15.5k
        content->parent = NULL;
6587
32.3k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6588
32.3k
                           content);
6589
32.3k
    if ((content != NULL) && (content->parent == NULL)) {
6590
        /*
6591
         * this is a trick: if xmlAddElementDecl is called,
6592
         * instead of copying the full tree it is plugged directly
6593
         * if called from the parser. Avoid duplicating the
6594
         * interfaces or change the API/ABI
6595
         */
6596
13.4k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6597
13.4k
    }
6598
32.3k
      } else if (content != NULL) {
6599
0
    xmlFreeDocElementContent(ctxt->myDoc, content);
6600
0
      }
6601
32.3k
  }
6602
84.0k
    }
6603
91.5k
    return(ret);
6604
104k
}
6605
6606
/**
6607
 * xmlParseConditionalSections
6608
 * @ctxt:  an XML parser context
6609
 *
6610
 * [61] conditionalSect ::= includeSect | ignoreSect
6611
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6612
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6613
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6614
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6615
 */
6616
6617
static void
6618
262k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6619
262k
    int id = ctxt->input->id;
6620
6621
262k
    SKIP(3);
6622
262k
    SKIP_BLANKS;
6623
262k
    if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6624
254k
  SKIP(7);
6625
254k
  SKIP_BLANKS;
6626
254k
  if (RAW != '[') {
6627
22
      xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6628
22
      xmlHaltParser(ctxt);
6629
22
      return;
6630
254k
  } else {
6631
254k
      if (ctxt->input->id != id) {
6632
2
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6633
2
                         "All markup of the conditional section is not"
6634
2
                               " in the same entity\n");
6635
2
      }
6636
254k
      NEXT;
6637
254k
  }
6638
254k
  if (xmlParserDebugEntities) {
6639
0
      if ((ctxt->input != NULL) && (ctxt->input->filename))
6640
0
    xmlGenericError(xmlGenericErrorContext,
6641
0
      "%s(%d): ", ctxt->input->filename,
6642
0
      ctxt->input->line);
6643
0
      xmlGenericError(xmlGenericErrorContext,
6644
0
        "Entering INCLUDE Conditional Section\n");
6645
0
  }
6646
6647
254k
        SKIP_BLANKS;
6648
254k
        GROW;
6649
767k
  while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6650
513k
          (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6651
513k
      const xmlChar *check = CUR_PTR;
6652
513k
      unsigned int cons = ctxt->input->consumed;
6653
6654
513k
      if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6655
256k
    xmlParseConditionalSections(ctxt);
6656
256k
      } else
6657
257k
    xmlParseMarkupDecl(ctxt);
6658
6659
513k
            SKIP_BLANKS;
6660
513k
            GROW;
6661
6662
513k
      if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6663
338
    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6664
338
    xmlHaltParser(ctxt);
6665
338
    break;
6666
338
      }
6667
513k
  }
6668
254k
  if (xmlParserDebugEntities) {
6669
0
      if ((ctxt->input != NULL) && (ctxt->input->filename))
6670
0
    xmlGenericError(xmlGenericErrorContext,
6671
0
      "%s(%d): ", ctxt->input->filename,
6672
0
      ctxt->input->line);
6673
0
      xmlGenericError(xmlGenericErrorContext,
6674
0
        "Leaving INCLUDE Conditional Section\n");
6675
0
  }
6676
6677
254k
    } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6678
7.54k
  int state;
6679
7.54k
  xmlParserInputState instate;
6680
7.54k
  int depth = 0;
6681
6682
7.54k
  SKIP(6);
6683
7.54k
  SKIP_BLANKS;
6684
7.54k
  if (RAW != '[') {
6685
21
      xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6686
21
      xmlHaltParser(ctxt);
6687
21
      return;
6688
7.52k
  } else {
6689
7.52k
      if (ctxt->input->id != id) {
6690
244
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6691
244
                         "All markup of the conditional section is not"
6692
244
                               " in the same entity\n");
6693
244
      }
6694
7.52k
      NEXT;
6695
7.52k
  }
6696
7.52k
  if (xmlParserDebugEntities) {
6697
0
      if ((ctxt->input != NULL) && (ctxt->input->filename))
6698
0
    xmlGenericError(xmlGenericErrorContext,
6699
0
      "%s(%d): ", ctxt->input->filename,
6700
0
      ctxt->input->line);
6701
0
      xmlGenericError(xmlGenericErrorContext,
6702
0
        "Entering IGNORE Conditional Section\n");
6703
0
  }
6704
6705
  /*
6706
   * Parse up to the end of the conditional section
6707
   * But disable SAX event generating DTD building in the meantime
6708
   */
6709
7.52k
  state = ctxt->disableSAX;
6710
7.52k
  instate = ctxt->instate;
6711
7.52k
  if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6712
7.52k
  ctxt->instate = XML_PARSER_IGNORE;
6713
6714
1.70M
  while (((depth >= 0) && (RAW != 0)) &&
6715
1.69M
               (ctxt->instate != XML_PARSER_EOF)) {
6716
1.69M
    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6717
13.7k
      depth++;
6718
13.7k
      SKIP(3);
6719
13.7k
      continue;
6720
13.7k
    }
6721
1.68M
    if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6722
5.50k
      if (--depth >= 0) SKIP(3);
6723
5.50k
      continue;
6724
5.50k
    }
6725
1.67M
    NEXT;
6726
1.67M
    continue;
6727
1.68M
  }
6728
6729
7.52k
  ctxt->disableSAX = state;
6730
7.52k
  ctxt->instate = instate;
6731
6732
7.52k
  if (xmlParserDebugEntities) {
6733
0
      if ((ctxt->input != NULL) && (ctxt->input->filename))
6734
0
    xmlGenericError(xmlGenericErrorContext,
6735
0
      "%s(%d): ", ctxt->input->filename,
6736
0
      ctxt->input->line);
6737
0
      xmlGenericError(xmlGenericErrorContext,
6738
0
        "Leaving IGNORE Conditional Section\n");
6739
0
  }
6740
6741
7.52k
    } else {
6742
133
  xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6743
133
  xmlHaltParser(ctxt);
6744
133
  return;
6745
133
    }
6746
6747
262k
    if (RAW == 0)
6748
259k
        SHRINK;
6749
6750
262k
    if (RAW == 0) {
6751
259k
  xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6752
259k
    } else {
6753
3.20k
  if (ctxt->input->id != id) {
6754
174
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6755
174
                     "All markup of the conditional section is not in"
6756
174
                           " the same entity\n");
6757
174
  }
6758
3.20k
  if ((ctxt-> instate != XML_PARSER_EOF) &&
6759
3.20k
      ((ctxt->input->cur + 3) <= ctxt->input->end))
6760
3.20k
      SKIP(3);
6761
3.20k
    }
6762
262k
}
6763
6764
/**
6765
 * xmlParseMarkupDecl:
6766
 * @ctxt:  an XML parser context
6767
 *
6768
 * parse Markup declarations
6769
 *
6770
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6771
 *                     NotationDecl | PI | Comment
6772
 *
6773
 * [ VC: Proper Declaration/PE Nesting ]
6774
 * Parameter-entity replacement text must be properly nested with
6775
 * markup declarations. That is to say, if either the first character
6776
 * or the last character of a markup declaration (markupdecl above) is
6777
 * contained in the replacement text for a parameter-entity reference,
6778
 * both must be contained in the same replacement text.
6779
 *
6780
 * [ WFC: PEs in Internal Subset ]
6781
 * In the internal DTD subset, parameter-entity references can occur
6782
 * only where markup declarations can occur, not within markup declarations.
6783
 * (This does not apply to references that occur in external parameter
6784
 * entities or to the external subset.)
6785
 */
6786
void
6787
1.56M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6788
1.56M
    GROW;
6789
1.56M
    if (CUR == '<') {
6790
982k
        if (NXT(1) == '!') {
6791
933k
      switch (NXT(2)) {
6792
336k
          case 'E':
6793
336k
        if (NXT(3) == 'L')
6794
104k
      xmlParseElementDecl(ctxt);
6795
232k
        else if (NXT(3) == 'N')
6796
230k
      xmlParseEntityDecl(ctxt);
6797
336k
        break;
6798
247k
          case 'A':
6799
247k
        xmlParseAttributeListDecl(ctxt);
6800
247k
        break;
6801
59.4k
          case 'N':
6802
59.4k
        xmlParseNotationDecl(ctxt);
6803
59.4k
        break;
6804
283k
          case '-':
6805
283k
        xmlParseComment(ctxt);
6806
283k
        break;
6807
6.23k
    default:
6808
        /* there is an error but it will be detected later */
6809
6.23k
        break;
6810
933k
      }
6811
933k
  } else if (NXT(1) == '?') {
6812
42.4k
      xmlParsePI(ctxt);
6813
42.4k
  }
6814
982k
    }
6815
6816
    /*
6817
     * detect requirement to exit there and act accordingly
6818
     * and avoid having instate overriden later on
6819
     */
6820
1.56M
    if (ctxt->instate == XML_PARSER_EOF)
6821
6.98k
        return;
6822
6823
    /*
6824
     * Conditional sections are allowed from entities included
6825
     * by PE References in the internal subset.
6826
     */
6827
1.55M
    if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6828
827k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6829
6.14k
      xmlParseConditionalSections(ctxt);
6830
6.14k
  }
6831
827k
    }
6832
6833
1.55M
    ctxt->instate = XML_PARSER_DTD;
6834
1.55M
}
6835
6836
/**
6837
 * xmlParseTextDecl:
6838
 * @ctxt:  an XML parser context
6839
 *
6840
 * parse an XML declaration header for external entities
6841
 *
6842
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6843
 */
6844
6845
void
6846
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6847
0
    xmlChar *version;
6848
0
    const xmlChar *encoding;
6849
6850
    /*
6851
     * We know that '<?xml' is here.
6852
     */
6853
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6854
0
  SKIP(5);
6855
0
    } else {
6856
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6857
0
  return;
6858
0
    }
6859
6860
0
    if (SKIP_BLANKS == 0) {
6861
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6862
0
           "Space needed after '<?xml'\n");
6863
0
    }
6864
6865
    /*
6866
     * We may have the VersionInfo here.
6867
     */
6868
0
    version = xmlParseVersionInfo(ctxt);
6869
0
    if (version == NULL)
6870
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6871
0
    else {
6872
0
  if (SKIP_BLANKS == 0) {
6873
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6874
0
               "Space needed here\n");
6875
0
  }
6876
0
    }
6877
0
    ctxt->input->version = version;
6878
6879
    /*
6880
     * We must have the encoding declaration
6881
     */
6882
0
    encoding = xmlParseEncodingDecl(ctxt);
6883
0
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6884
  /*
6885
   * The XML REC instructs us to stop parsing right here
6886
   */
6887
0
        return;
6888
0
    }
6889
0
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6890
0
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6891
0
           "Missing encoding in text declaration\n");
6892
0
    }
6893
6894
0
    SKIP_BLANKS;
6895
0
    if ((RAW == '?') && (NXT(1) == '>')) {
6896
0
        SKIP(2);
6897
0
    } else if (RAW == '>') {
6898
        /* Deprecated old WD ... */
6899
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6900
0
  NEXT;
6901
0
    } else {
6902
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6903
0
  MOVETO_ENDTAG(CUR_PTR);
6904
0
  NEXT;
6905
0
    }
6906
0
}
6907
6908
/**
6909
 * xmlParseExternalSubset:
6910
 * @ctxt:  an XML parser context
6911
 * @ExternalID: the external identifier
6912
 * @SystemID: the system identifier (or URL)
6913
 *
6914
 * parse Markup declarations from an external subset
6915
 *
6916
 * [30] extSubset ::= textDecl? extSubsetDecl
6917
 *
6918
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6919
 */
6920
void
6921
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6922
0
                       const xmlChar *SystemID) {
6923
0
    xmlDetectSAX2(ctxt);
6924
0
    GROW;
6925
6926
0
    if ((ctxt->encoding == NULL) &&
6927
0
        (ctxt->input->end - ctxt->input->cur >= 4)) {
6928
0
        xmlChar start[4];
6929
0
  xmlCharEncoding enc;
6930
6931
0
  start[0] = RAW;
6932
0
  start[1] = NXT(1);
6933
0
  start[2] = NXT(2);
6934
0
  start[3] = NXT(3);
6935
0
  enc = xmlDetectCharEncoding(start, 4);
6936
0
  if (enc != XML_CHAR_ENCODING_NONE)
6937
0
      xmlSwitchEncoding(ctxt, enc);
6938
0
    }
6939
6940
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6941
0
  xmlParseTextDecl(ctxt);
6942
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6943
      /*
6944
       * The XML REC instructs us to stop parsing right here
6945
       */
6946
0
      xmlHaltParser(ctxt);
6947
0
      return;
6948
0
  }
6949
0
    }
6950
0
    if (ctxt->myDoc == NULL) {
6951
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6952
0
  if (ctxt->myDoc == NULL) {
6953
0
      xmlErrMemory(ctxt, "New Doc failed");
6954
0
      return;
6955
0
  }
6956
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
6957
0
    }
6958
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6959
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6960
6961
0
    ctxt->instate = XML_PARSER_DTD;
6962
0
    ctxt->external = 1;
6963
0
    SKIP_BLANKS;
6964
0
    while (((RAW == '<') && (NXT(1) == '?')) ||
6965
0
           ((RAW == '<') && (NXT(1) == '!')) ||
6966
0
     (RAW == '%')) {
6967
0
  const xmlChar *check = CUR_PTR;
6968
0
  unsigned int cons = ctxt->input->consumed;
6969
6970
0
  GROW;
6971
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6972
0
      xmlParseConditionalSections(ctxt);
6973
0
  } else
6974
0
      xmlParseMarkupDecl(ctxt);
6975
0
        SKIP_BLANKS;
6976
6977
0
  if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6978
0
      xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6979
0
      break;
6980
0
  }
6981
0
    }
6982
6983
0
    if (RAW != 0) {
6984
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6985
0
    }
6986
6987
0
}
6988
6989
/**
6990
 * xmlParseReference:
6991
 * @ctxt:  an XML parser context
6992
 *
6993
 * parse and handle entity references in content, depending on the SAX
6994
 * interface, this may end-up in a call to character() if this is a
6995
 * CharRef, a predefined entity, if there is no reference() callback.
6996
 * or if the parser was asked to switch to that mode.
6997
 *
6998
 * [67] Reference ::= EntityRef | CharRef
6999
 */
7000
void
7001
2.15M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7002
2.15M
    xmlEntityPtr ent;
7003
2.15M
    xmlChar *val;
7004
2.15M
    int was_checked;
7005
2.15M
    xmlNodePtr list = NULL;
7006
2.15M
    xmlParserErrors ret = XML_ERR_OK;
7007
7008
7009
2.15M
    if (RAW != '&')
7010
0
        return;
7011
7012
    /*
7013
     * Simple case of a CharRef
7014
     */
7015
2.15M
    if (NXT(1) == '#') {
7016
413k
  int i = 0;
7017
413k
  xmlChar out[10];
7018
413k
  int hex = NXT(2);
7019
413k
  int value = xmlParseCharRef(ctxt);
7020
7021
413k
  if (value == 0)
7022
267k
      return;
7023
145k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7024
      /*
7025
       * So we are using non-UTF-8 buffers
7026
       * Check that the char fit on 8bits, if not
7027
       * generate a CharRef.
7028
       */
7029
74.8k
      if (value <= 0xFF) {
7030
45.9k
    out[0] = value;
7031
45.9k
    out[1] = 0;
7032
45.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7033
45.9k
        (!ctxt->disableSAX))
7034
45.9k
        ctxt->sax->characters(ctxt->userData, out, 1);
7035
45.9k
      } else {
7036
28.8k
    if ((hex == 'x') || (hex == 'X'))
7037
4.66k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7038
24.1k
    else
7039
24.1k
        snprintf((char *)out, sizeof(out), "#%d", value);
7040
28.8k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7041
28.8k
        (!ctxt->disableSAX))
7042
28.8k
        ctxt->sax->reference(ctxt->userData, out);
7043
28.8k
      }
7044
74.8k
  } else {
7045
      /*
7046
       * Just encode the value in UTF-8
7047
       */
7048
70.5k
      COPY_BUF(0 ,out, i, value);
7049
70.5k
      out[i] = 0;
7050
70.5k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7051
70.5k
    (!ctxt->disableSAX))
7052
39.6k
    ctxt->sax->characters(ctxt->userData, out, i);
7053
70.5k
  }
7054
145k
  return;
7055
413k
    }
7056
7057
    /*
7058
     * We are seeing an entity reference
7059
     */
7060
1.74M
    ent = xmlParseEntityRef(ctxt);
7061
1.74M
    if (ent == NULL) return;
7062
696k
    if (!ctxt->wellFormed)
7063
458k
  return;
7064
238k
    was_checked = ent->checked;
7065
7066
    /* special case of predefined entities */
7067
238k
    if ((ent->name == NULL) ||
7068
238k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7069
13.3k
  val = ent->content;
7070
13.3k
  if (val == NULL) return;
7071
  /*
7072
   * inline the entity.
7073
   */
7074
13.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7075
13.3k
      (!ctxt->disableSAX))
7076
13.3k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7077
13.3k
  return;
7078
13.3k
    }
7079
7080
    /*
7081
     * The first reference to the entity trigger a parsing phase
7082
     * where the ent->children is filled with the result from
7083
     * the parsing.
7084
     * Note: external parsed entities will not be loaded, it is not
7085
     * required for a non-validating parser, unless the parsing option
7086
     * of validating, or substituting entities were given. Doing so is
7087
     * far more secure as the parser will only process data coming from
7088
     * the document entity by default.
7089
     */
7090
225k
    if (((ent->checked == 0) ||
7091
26.5k
         ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7092
198k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7093
198k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7094
198k
  unsigned long oldnbent = ctxt->nbentities;
7095
7096
  /*
7097
   * This is a bit hackish but this seems the best
7098
   * way to make sure both SAX and DOM entity support
7099
   * behaves okay.
7100
   */
7101
198k
  void *user_data;
7102
198k
  if (ctxt->userData == ctxt)
7103
198k
      user_data = NULL;
7104
0
  else
7105
0
      user_data = ctxt->userData;
7106
7107
  /*
7108
   * Check that this entity is well formed
7109
   * 4.3.2: An internal general parsed entity is well-formed
7110
   * if its replacement text matches the production labeled
7111
   * content.
7112
   */
7113
198k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7114
198k
      ctxt->depth++;
7115
198k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7116
198k
                                                user_data, &list);
7117
198k
      ctxt->depth--;
7118
7119
198k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7120
0
      ctxt->depth++;
7121
0
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7122
0
                                     user_data, ctxt->depth, ent->URI,
7123
0
             ent->ExternalID, &list);
7124
0
      ctxt->depth--;
7125
0
  } else {
7126
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7127
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7128
0
       "invalid entity type found\n", NULL);
7129
0
  }
7130
7131
  /*
7132
   * Store the number of entities needing parsing for this entity
7133
   * content and do checkings
7134
   */
7135
198k
  ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7136
198k
  if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7137
178k
      ent->checked |= 1;
7138
198k
  if (ret == XML_ERR_ENTITY_LOOP) {
7139
19.3k
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7140
19.3k
      xmlFreeNodeList(list);
7141
19.3k
      return;
7142
19.3k
  }
7143
178k
  if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7144
38.8k
      xmlFreeNodeList(list);
7145
38.8k
      return;
7146
38.8k
  }
7147
7148
139k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7149
766
      if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7150
0
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7151
766
    (ent->children == NULL)) {
7152
766
    ent->children = list;
7153
766
    if (ctxt->replaceEntities) {
7154
        /*
7155
         * Prune it directly in the generated document
7156
         * except for single text nodes.
7157
         */
7158
0
        if (((list->type == XML_TEXT_NODE) &&
7159
0
       (list->next == NULL)) ||
7160
0
      (ctxt->parseMode == XML_PARSE_READER)) {
7161
0
      list->parent = (xmlNodePtr) ent;
7162
0
      list = NULL;
7163
0
      ent->owner = 1;
7164
0
        } else {
7165
0
      ent->owner = 0;
7166
0
      while (list != NULL) {
7167
0
          list->parent = (xmlNodePtr) ctxt->node;
7168
0
          list->doc = ctxt->myDoc;
7169
0
          if (list->next == NULL)
7170
0
        ent->last = list;
7171
0
          list = list->next;
7172
0
      }
7173
0
      list = ent->children;
7174
0
#ifdef LIBXML_LEGACY_ENABLED
7175
0
      if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7176
0
        xmlAddEntityReference(ent, list, NULL);
7177
0
#endif /* LIBXML_LEGACY_ENABLED */
7178
0
        }
7179
766
    } else {
7180
766
        ent->owner = 1;
7181
3.85k
        while (list != NULL) {
7182
3.08k
      list->parent = (xmlNodePtr) ent;
7183
3.08k
      xmlSetTreeDoc(list, ent->doc);
7184
3.08k
      if (list->next == NULL)
7185
766
          ent->last = list;
7186
3.08k
      list = list->next;
7187
3.08k
        }
7188
766
    }
7189
766
      } else {
7190
0
    xmlFreeNodeList(list);
7191
0
    list = NULL;
7192
0
      }
7193
139k
  } else if ((ret != XML_ERR_OK) &&
7194
139k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7195
138k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7196
138k
         "Entity '%s' failed to parse\n", ent->name);
7197
138k
      xmlParserEntityCheck(ctxt, 0, ent, 0);
7198
138k
  } else if (list != NULL) {
7199
0
      xmlFreeNodeList(list);
7200
0
      list = NULL;
7201
0
  }
7202
139k
  if (ent->checked == 0)
7203
0
      ent->checked = 2;
7204
7205
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7206
139k
        was_checked = 0;
7207
139k
    } else if (ent->checked != 1) {
7208
26.9k
  ctxt->nbentities += ent->checked / 2;
7209
26.9k
    }
7210
7211
    /*
7212
     * Now that the entity content has been gathered
7213
     * provide it to the application, this can take different forms based
7214
     * on the parsing modes.
7215
     */
7216
166k
    if (ent->children == NULL) {
7217
  /*
7218
   * Probably running in SAX mode and the callbacks don't
7219
   * build the entity content. So unless we already went
7220
   * though parsing for first checking go though the entity
7221
   * content to generate callbacks associated to the entity
7222
   */
7223
155k
  if (was_checked != 0) {
7224
16.1k
      void *user_data;
7225
      /*
7226
       * This is a bit hackish but this seems the best
7227
       * way to make sure both SAX and DOM entity support
7228
       * behaves okay.
7229
       */
7230
16.1k
      if (ctxt->userData == ctxt)
7231
16.1k
    user_data = NULL;
7232
0
      else
7233
0
    user_data = ctxt->userData;
7234
7235
16.1k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7236
16.1k
    ctxt->depth++;
7237
16.1k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7238
16.1k
           ent->content, user_data, NULL);
7239
16.1k
    ctxt->depth--;
7240
16.1k
      } else if (ent->etype ==
7241
0
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7242
0
    ctxt->depth++;
7243
0
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7244
0
         ctxt->sax, user_data, ctxt->depth,
7245
0
         ent->URI, ent->ExternalID, NULL);
7246
0
    ctxt->depth--;
7247
0
      } else {
7248
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7249
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7250
0
           "invalid entity type found\n", NULL);
7251
0
      }
7252
16.1k
      if (ret == XML_ERR_ENTITY_LOOP) {
7253
38
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7254
38
    return;
7255
38
      }
7256
16.1k
  }
7257
155k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7258
155k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7259
      /*
7260
       * Entity reference callback comes second, it's somewhat
7261
       * superfluous but a compatibility to historical behaviour
7262
       */
7263
32.0k
      ctxt->sax->reference(ctxt->userData, ent->name);
7264
32.0k
  }
7265
155k
  return;
7266
155k
    }
7267
7268
    /*
7269
     * If we didn't get any children for the entity being built
7270
     */
7271
11.2k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7272
11.2k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7273
  /*
7274
   * Create a node.
7275
   */
7276
11.2k
  ctxt->sax->reference(ctxt->userData, ent->name);
7277
11.2k
  return;
7278
11.2k
    }
7279
7280
0
    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7281
  /*
7282
   * There is a problem on the handling of _private for entities
7283
   * (bug 155816): Should we copy the content of the field from
7284
   * the entity (possibly overwriting some value set by the user
7285
   * when a copy is created), should we leave it alone, or should
7286
   * we try to take care of different situations?  The problem
7287
   * is exacerbated by the usage of this field by the xmlReader.
7288
   * To fix this bug, we look at _private on the created node
7289
   * and, if it's NULL, we copy in whatever was in the entity.
7290
   * If it's not NULL we leave it alone.  This is somewhat of a
7291
   * hack - maybe we should have further tests to determine
7292
   * what to do.
7293
   */
7294
0
  if ((ctxt->node != NULL) && (ent->children != NULL)) {
7295
      /*
7296
       * Seems we are generating the DOM content, do
7297
       * a simple tree copy for all references except the first
7298
       * In the first occurrence list contains the replacement.
7299
       */
7300
0
      if (((list == NULL) && (ent->owner == 0)) ||
7301
0
    (ctxt->parseMode == XML_PARSE_READER)) {
7302
0
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7303
7304
    /*
7305
     * We are copying here, make sure there is no abuse
7306
     */
7307
0
    ctxt->sizeentcopy += ent->length + 5;
7308
0
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7309
0
        return;
7310
7311
    /*
7312
     * when operating on a reader, the entities definitions
7313
     * are always owning the entities subtree.
7314
    if (ctxt->parseMode == XML_PARSE_READER)
7315
        ent->owner = 1;
7316
     */
7317
7318
0
    cur = ent->children;
7319
0
    while (cur != NULL) {
7320
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7321
0
        if (nw != NULL) {
7322
0
      if (nw->_private == NULL)
7323
0
          nw->_private = cur->_private;
7324
0
      if (firstChild == NULL){
7325
0
          firstChild = nw;
7326
0
      }
7327
0
      nw = xmlAddChild(ctxt->node, nw);
7328
0
        }
7329
0
        if (cur == ent->last) {
7330
      /*
7331
       * needed to detect some strange empty
7332
       * node cases in the reader tests
7333
       */
7334
0
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7335
0
          (nw != NULL) &&
7336
0
          (nw->type == XML_ELEMENT_NODE) &&
7337
0
          (nw->children == NULL))
7338
0
          nw->extra = 1;
7339
7340
0
      break;
7341
0
        }
7342
0
        cur = cur->next;
7343
0
    }
7344
0
#ifdef LIBXML_LEGACY_ENABLED
7345
0
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7346
0
      xmlAddEntityReference(ent, firstChild, nw);
7347
0
#endif /* LIBXML_LEGACY_ENABLED */
7348
0
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7349
0
    xmlNodePtr nw = NULL, cur, next, last,
7350
0
         firstChild = NULL;
7351
7352
    /*
7353
     * We are copying here, make sure there is no abuse
7354
     */
7355
0
    ctxt->sizeentcopy += ent->length + 5;
7356
0
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7357
0
        return;
7358
7359
    /*
7360
     * Copy the entity child list and make it the new
7361
     * entity child list. The goal is to make sure any
7362
     * ID or REF referenced will be the one from the
7363
     * document content and not the entity copy.
7364
     */
7365
0
    cur = ent->children;
7366
0
    ent->children = NULL;
7367
0
    last = ent->last;
7368
0
    ent->last = NULL;
7369
0
    while (cur != NULL) {
7370
0
        next = cur->next;
7371
0
        cur->next = NULL;
7372
0
        cur->parent = NULL;
7373
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7374
0
        if (nw != NULL) {
7375
0
      if (nw->_private == NULL)
7376
0
          nw->_private = cur->_private;
7377
0
      if (firstChild == NULL){
7378
0
          firstChild = cur;
7379
0
      }
7380
0
      xmlAddChild((xmlNodePtr) ent, nw);
7381
0
      xmlAddChild(ctxt->node, cur);
7382
0
        }
7383
0
        if (cur == last)
7384
0
      break;
7385
0
        cur = next;
7386
0
    }
7387
0
    if (ent->owner == 0)
7388
0
        ent->owner = 1;
7389
0
#ifdef LIBXML_LEGACY_ENABLED
7390
0
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7391
0
      xmlAddEntityReference(ent, firstChild, nw);
7392
0
#endif /* LIBXML_LEGACY_ENABLED */
7393
0
      } else {
7394
0
    const xmlChar *nbktext;
7395
7396
    /*
7397
     * the name change is to avoid coalescing of the
7398
     * node with a possible previous text one which
7399
     * would make ent->children a dangling pointer
7400
     */
7401
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7402
0
          -1);
7403
0
    if (ent->children->type == XML_TEXT_NODE)
7404
0
        ent->children->name = nbktext;
7405
0
    if ((ent->last != ent->children) &&
7406
0
        (ent->last->type == XML_TEXT_NODE))
7407
0
        ent->last->name = nbktext;
7408
0
    xmlAddChildList(ctxt->node, ent->children);
7409
0
      }
7410
7411
      /*
7412
       * This is to avoid a nasty side effect, see
7413
       * characters() in SAX.c
7414
       */
7415
0
      ctxt->nodemem = 0;
7416
0
      ctxt->nodelen = 0;
7417
0
      return;
7418
0
  }
7419
0
    }
7420
0
}
7421
7422
/**
7423
 * xmlParseEntityRef:
7424
 * @ctxt:  an XML parser context
7425
 *
7426
 * parse ENTITY references declarations
7427
 *
7428
 * [68] EntityRef ::= '&' Name ';'
7429
 *
7430
 * [ WFC: Entity Declared ]
7431
 * In a document without any DTD, a document with only an internal DTD
7432
 * subset which contains no parameter entity references, or a document
7433
 * with "standalone='yes'", the Name given in the entity reference
7434
 * must match that in an entity declaration, except that well-formed
7435
 * documents need not declare any of the following entities: amp, lt,
7436
 * gt, apos, quot.  The declaration of a parameter entity must precede
7437
 * any reference to it.  Similarly, the declaration of a general entity
7438
 * must precede any reference to it which appears in a default value in an
7439
 * attribute-list declaration. Note that if entities are declared in the
7440
 * external subset or in external parameter entities, a non-validating
7441
 * processor is not obligated to read and process their declarations;
7442
 * for such documents, the rule that an entity must be declared is a
7443
 * well-formedness constraint only if standalone='yes'.
7444
 *
7445
 * [ WFC: Parsed Entity ]
7446
 * An entity reference must not contain the name of an unparsed entity
7447
 *
7448
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7449
 */
7450
xmlEntityPtr
7451
3.05M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7452
3.05M
    const xmlChar *name;
7453
3.05M
    xmlEntityPtr ent = NULL;
7454
7455
3.05M
    GROW;
7456
3.05M
    if (ctxt->instate == XML_PARSER_EOF)
7457
0
        return(NULL);
7458
7459
3.05M
    if (RAW != '&')
7460
0
        return(NULL);
7461
3.05M
    NEXT;
7462
3.05M
    name = xmlParseName(ctxt);
7463
3.05M
    if (name == NULL) {
7464
775k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7465
775k
           "xmlParseEntityRef: no name\n");
7466
775k
        return(NULL);
7467
775k
    }
7468
2.28M
    if (RAW != ';') {
7469
750k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7470
750k
  return(NULL);
7471
750k
    }
7472
1.53M
    NEXT;
7473
7474
    /*
7475
     * Predefined entities override any extra definition
7476
     */
7477
1.53M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7478
1.53M
        ent = xmlGetPredefinedEntity(name);
7479
1.53M
        if (ent != NULL)
7480
574k
            return(ent);
7481
1.53M
    }
7482
7483
    /*
7484
     * Increase the number of entity references parsed
7485
     */
7486
957k
    ctxt->nbentities++;
7487
7488
    /*
7489
     * Ask first SAX for entity resolution, otherwise try the
7490
     * entities which may have stored in the parser context.
7491
     */
7492
957k
    if (ctxt->sax != NULL) {
7493
957k
  if (ctxt->sax->getEntity != NULL)
7494
957k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7495
957k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7496
4.74k
      (ctxt->options & XML_PARSE_OLDSAX))
7497
0
      ent = xmlGetPredefinedEntity(name);
7498
957k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7499
4.74k
      (ctxt->userData==ctxt)) {
7500
4.74k
      ent = xmlSAX2GetEntity(ctxt, name);
7501
4.74k
  }
7502
957k
    }
7503
957k
    if (ctxt->instate == XML_PARSER_EOF)
7504
0
  return(NULL);
7505
    /*
7506
     * [ WFC: Entity Declared ]
7507
     * In a document without any DTD, a document with only an
7508
     * internal DTD subset which contains no parameter entity
7509
     * references, or a document with "standalone='yes'", the
7510
     * Name given in the entity reference must match that in an
7511
     * entity declaration, except that well-formed documents
7512
     * need not declare any of the following entities: amp, lt,
7513
     * gt, apos, quot.
7514
     * The declaration of a parameter entity must precede any
7515
     * reference to it.
7516
     * Similarly, the declaration of a general entity must
7517
     * precede any reference to it which appears in a default
7518
     * value in an attribute-list declaration. Note that if
7519
     * entities are declared in the external subset or in
7520
     * external parameter entities, a non-validating processor
7521
     * is not obligated to read and process their declarations;
7522
     * for such documents, the rule that an entity must be
7523
     * declared is a well-formedness constraint only if
7524
     * standalone='yes'.
7525
     */
7526
957k
    if (ent == NULL) {
7527
419k
  if ((ctxt->standalone == 1) ||
7528
416k
      ((ctxt->hasExternalSubset == 0) &&
7529
388k
       (ctxt->hasPErefs == 0))) {
7530
137k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7531
137k
         "Entity '%s' not defined\n", name);
7532
282k
  } else {
7533
282k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7534
282k
         "Entity '%s' not defined\n", name);
7535
282k
      if ((ctxt->inSubset == 0) &&
7536
37.4k
    (ctxt->sax != NULL) &&
7537
37.4k
    (ctxt->sax->reference != NULL)) {
7538
37.4k
    ctxt->sax->reference(ctxt->userData, name);
7539
37.4k
      }
7540
282k
  }
7541
419k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7542
419k
  ctxt->valid = 0;
7543
419k
    }
7544
7545
    /*
7546
     * [ WFC: Parsed Entity ]
7547
     * An entity reference must not contain the name of an
7548
     * unparsed entity
7549
     */
7550
538k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7551
315
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7552
315
     "Entity reference to unparsed entity %s\n", name);
7553
315
    }
7554
7555
    /*
7556
     * [ WFC: No External Entity References ]
7557
     * Attribute values cannot contain direct or indirect
7558
     * entity references to external entities.
7559
     */
7560
537k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7561
174k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7562
1.03k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7563
1.03k
       "Attribute references external entity '%s'\n", name);
7564
1.03k
    }
7565
    /*
7566
     * [ WFC: No < in Attribute Values ]
7567
     * The replacement text of any entity referred to directly or
7568
     * indirectly in an attribute value (other than "&lt;") must
7569
     * not contain a <.
7570
     */
7571
536k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7572
173k
       (ent != NULL) && 
7573
173k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7574
173k
  if (((ent->checked & 1) || (ent->checked == 0)) &&
7575
76.6k
       (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7576
60.7k
      xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7577
60.7k
  "'<' in entity '%s' is not allowed in attributes values\n", name);
7578
60.7k
        }
7579
173k
    }
7580
7581
    /*
7582
     * Internal check, no parameter entities here ...
7583
     */
7584
363k
    else {
7585
363k
  switch (ent->etype) {
7586
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7587
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7588
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7589
0
       "Attempt to reference the parameter entity '%s'\n",
7590
0
            name);
7591
0
      break;
7592
363k
      default:
7593
363k
      break;
7594
363k
  }
7595
363k
    }
7596
7597
    /*
7598
     * [ WFC: No Recursion ]
7599
     * A parsed entity must not contain a recursive reference
7600
     * to itself, either directly or indirectly.
7601
     * Done somewhere else
7602
     */
7603
957k
    return(ent);
7604
957k
}
7605
7606
/**
7607
 * xmlParseStringEntityRef:
7608
 * @ctxt:  an XML parser context
7609
 * @str:  a pointer to an index in the string
7610
 *
7611
 * parse ENTITY references declarations, but this version parses it from
7612
 * a string value.
7613
 *
7614
 * [68] EntityRef ::= '&' Name ';'
7615
 *
7616
 * [ WFC: Entity Declared ]
7617
 * In a document without any DTD, a document with only an internal DTD
7618
 * subset which contains no parameter entity references, or a document
7619
 * with "standalone='yes'", the Name given in the entity reference
7620
 * must match that in an entity declaration, except that well-formed
7621
 * documents need not declare any of the following entities: amp, lt,
7622
 * gt, apos, quot.  The declaration of a parameter entity must precede
7623
 * any reference to it.  Similarly, the declaration of a general entity
7624
 * must precede any reference to it which appears in a default value in an
7625
 * attribute-list declaration. Note that if entities are declared in the
7626
 * external subset or in external parameter entities, a non-validating
7627
 * processor is not obligated to read and process their declarations;
7628
 * for such documents, the rule that an entity must be declared is a
7629
 * well-formedness constraint only if standalone='yes'.
7630
 *
7631
 * [ WFC: Parsed Entity ]
7632
 * An entity reference must not contain the name of an unparsed entity
7633
 *
7634
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7635
 * is updated to the current location in the string.
7636
 */
7637
static xmlEntityPtr
7638
1.16M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7639
1.16M
    xmlChar *name;
7640
1.16M
    const xmlChar *ptr;
7641
1.16M
    xmlChar cur;
7642
1.16M
    xmlEntityPtr ent = NULL;
7643
7644
1.16M
    if ((str == NULL) || (*str == NULL))
7645
0
        return(NULL);
7646
1.16M
    ptr = *str;
7647
1.16M
    cur = *ptr;
7648
1.16M
    if (cur != '&')
7649
0
  return(NULL);
7650
7651
1.16M
    ptr++;
7652
1.16M
    name = xmlParseStringName(ctxt, &ptr);
7653
1.16M
    if (name == NULL) {
7654
16.8k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7655
16.8k
           "xmlParseStringEntityRef: no name\n");
7656
16.8k
  *str = ptr;
7657
16.8k
  return(NULL);
7658
16.8k
    }
7659
1.14M
    if (*ptr != ';') {
7660
41.4k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7661
41.4k
        xmlFree(name);
7662
41.4k
  *str = ptr;
7663
41.4k
  return(NULL);
7664
41.4k
    }
7665
1.10M
    ptr++;
7666
7667
7668
    /*
7669
     * Predefined entities override any extra definition
7670
     */
7671
1.10M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7672
1.10M
        ent = xmlGetPredefinedEntity(name);
7673
1.10M
        if (ent != NULL) {
7674
50.3k
            xmlFree(name);
7675
50.3k
            *str = ptr;
7676
50.3k
            return(ent);
7677
50.3k
        }
7678
1.10M
    }
7679
7680
    /*
7681
     * Increate the number of entity references parsed
7682
     */
7683
1.05M
    ctxt->nbentities++;
7684
7685
    /*
7686
     * Ask first SAX for entity resolution, otherwise try the
7687
     * entities which may have stored in the parser context.
7688
     */
7689
1.05M
    if (ctxt->sax != NULL) {
7690
1.05M
  if (ctxt->sax->getEntity != NULL)
7691
1.05M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7692
1.05M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7693
0
      ent = xmlGetPredefinedEntity(name);
7694
1.05M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7695
349k
      ent = xmlSAX2GetEntity(ctxt, name);
7696
349k
  }
7697
1.05M
    }
7698
1.05M
    if (ctxt->instate == XML_PARSER_EOF) {
7699
0
  xmlFree(name);
7700
0
  return(NULL);
7701
0
    }
7702
7703
    /*
7704
     * [ WFC: Entity Declared ]
7705
     * In a document without any DTD, a document with only an
7706
     * internal DTD subset which contains no parameter entity
7707
     * references, or a document with "standalone='yes'", the
7708
     * Name given in the entity reference must match that in an
7709
     * entity declaration, except that well-formed documents
7710
     * need not declare any of the following entities: amp, lt,
7711
     * gt, apos, quot.
7712
     * The declaration of a parameter entity must precede any
7713
     * reference to it.
7714
     * Similarly, the declaration of a general entity must
7715
     * precede any reference to it which appears in a default
7716
     * value in an attribute-list declaration. Note that if
7717
     * entities are declared in the external subset or in
7718
     * external parameter entities, a non-validating processor
7719
     * is not obligated to read and process their declarations;
7720
     * for such documents, the rule that an entity must be
7721
     * declared is a well-formedness constraint only if
7722
     * standalone='yes'.
7723
     */
7724
1.05M
    if (ent == NULL) {
7725
349k
  if ((ctxt->standalone == 1) ||
7726
348k
      ((ctxt->hasExternalSubset == 0) &&
7727
345k
       (ctxt->hasPErefs == 0))) {
7728
342k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7729
342k
         "Entity '%s' not defined\n", name);
7730
342k
  } else {
7731
7.08k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7732
7.08k
        "Entity '%s' not defined\n",
7733
7.08k
        name);
7734
7.08k
  }
7735
349k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7736
  /* TODO ? check regressions ctxt->valid = 0; */
7737
349k
    }
7738
7739
    /*
7740
     * [ WFC: Parsed Entity ]
7741
     * An entity reference must not contain the name of an
7742
     * unparsed entity
7743
     */
7744
703k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7745
352
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7746
352
     "Entity reference to unparsed entity %s\n", name);
7747
352
    }
7748
7749
    /*
7750
     * [ WFC: No External Entity References ]
7751
     * Attribute values cannot contain direct or indirect
7752
     * entity references to external entities.
7753
     */
7754
703k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7755
703k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7756
1.83k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7757
1.83k
   "Attribute references external entity '%s'\n", name);
7758
1.83k
    }
7759
    /*
7760
     * [ WFC: No < in Attribute Values ]
7761
     * The replacement text of any entity referred to directly or
7762
     * indirectly in an attribute value (other than "&lt;") must
7763
     * not contain a <.
7764
     */
7765
701k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7766
701k
       (ent != NULL) && (ent->content != NULL) &&
7767
657k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7768
657k
       (xmlStrchr(ent->content, '<'))) {
7769
67.0k
  xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7770
67.0k
     "'<' in entity '%s' is not allowed in attributes values\n",
7771
67.0k
        name);
7772
67.0k
    }
7773
7774
    /*
7775
     * Internal check, no parameter entities here ...
7776
     */
7777
634k
    else {
7778
634k
  switch (ent->etype) {
7779
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7780
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7781
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7782
0
       "Attempt to reference the parameter entity '%s'\n",
7783
0
          name);
7784
0
      break;
7785
634k
      default:
7786
634k
      break;
7787
634k
  }
7788
634k
    }
7789
7790
    /*
7791
     * [ WFC: No Recursion ]
7792
     * A parsed entity must not contain a recursive reference
7793
     * to itself, either directly or indirectly.
7794
     * Done somewhere else
7795
     */
7796
7797
1.05M
    xmlFree(name);
7798
1.05M
    *str = ptr;
7799
1.05M
    return(ent);
7800
1.05M
}
7801
7802
/**
7803
 * xmlParsePEReference:
7804
 * @ctxt:  an XML parser context
7805
 *
7806
 * parse PEReference declarations
7807
 * The entity content is handled directly by pushing it's content as
7808
 * a new input stream.
7809
 *
7810
 * [69] PEReference ::= '%' Name ';'
7811
 *
7812
 * [ WFC: No Recursion ]
7813
 * A parsed entity must not contain a recursive
7814
 * reference to itself, either directly or indirectly.
7815
 *
7816
 * [ WFC: Entity Declared ]
7817
 * In a document without any DTD, a document with only an internal DTD
7818
 * subset which contains no parameter entity references, or a document
7819
 * with "standalone='yes'", ...  ... The declaration of a parameter
7820
 * entity must precede any reference to it...
7821
 *
7822
 * [ VC: Entity Declared ]
7823
 * In a document with an external subset or external parameter entities
7824
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7825
 * must precede any reference to it...
7826
 *
7827
 * [ WFC: In DTD ]
7828
 * Parameter-entity references may only appear in the DTD.
7829
 * NOTE: misleading but this is handled.
7830
 */
7831
void
7832
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7833
1.77M
{
7834
1.77M
    const xmlChar *name;
7835
1.77M
    xmlEntityPtr entity = NULL;
7836
1.77M
    xmlParserInputPtr input;
7837
7838
1.77M
    if (RAW != '%')
7839
841k
        return;
7840
931k
    NEXT;
7841
931k
    name = xmlParseName(ctxt);
7842
931k
    if (name == NULL) {
7843
193k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7844
193k
  return;
7845
193k
    }
7846
737k
    if (xmlParserDebugEntities)
7847
0
  xmlGenericError(xmlGenericErrorContext,
7848
0
    "PEReference: %s\n", name);
7849
737k
    if (RAW != ';') {
7850
204k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7851
204k
        return;
7852
204k
    }
7853
7854
533k
    NEXT;
7855
7856
    /*
7857
     * Increate the number of entity references parsed
7858
     */
7859
533k
    ctxt->nbentities++;
7860
7861
    /*
7862
     * Request the entity from SAX
7863
     */
7864
533k
    if ((ctxt->sax != NULL) &&
7865
533k
  (ctxt->sax->getParameterEntity != NULL))
7866
533k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7867
533k
    if (ctxt->instate == XML_PARSER_EOF)
7868
0
  return;
7869
533k
    if (entity == NULL) {
7870
  /*
7871
   * [ WFC: Entity Declared ]
7872
   * In a document without any DTD, a document with only an
7873
   * internal DTD subset which contains no parameter entity
7874
   * references, or a document with "standalone='yes'", ...
7875
   * ... The declaration of a parameter entity must precede
7876
   * any reference to it...
7877
   */
7878
55.5k
  if ((ctxt->standalone == 1) ||
7879
55.0k
      ((ctxt->hasExternalSubset == 0) &&
7880
54.6k
       (ctxt->hasPErefs == 0))) {
7881
2.82k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7882
2.82k
            "PEReference: %%%s; not found\n",
7883
2.82k
            name);
7884
52.7k
  } else {
7885
      /*
7886
       * [ VC: Entity Declared ]
7887
       * In a document with an external subset or external
7888
       * parameter entities with "standalone='no'", ...
7889
       * ... The declaration of a parameter entity must
7890
       * precede any reference to it...
7891
       */
7892
52.7k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7893
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7894
0
                                 "PEReference: %%%s; not found\n",
7895
0
                                 name, NULL);
7896
0
            } else
7897
52.7k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7898
52.7k
                              "PEReference: %%%s; not found\n",
7899
52.7k
                              name, NULL);
7900
52.7k
            ctxt->valid = 0;
7901
52.7k
  }
7902
55.5k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
7903
478k
    } else {
7904
  /*
7905
   * Internal checking in case the entity quest barfed
7906
   */
7907
478k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7908
1.34k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7909
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7910
0
      "Internal: %%%s; is not a parameter entity\n",
7911
0
        name, NULL);
7912
478k
  } else {
7913
478k
            xmlChar start[4];
7914
478k
            xmlCharEncoding enc;
7915
7916
478k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7917
1.34k
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7918
1.34k
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7919
1.34k
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7920
1.34k
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7921
1.34k
    (ctxt->replaceEntities == 0) &&
7922
1.34k
    (ctxt->validate == 0))
7923
1.34k
    return;
7924
7925
476k
      input = xmlNewEntityInputStream(ctxt, entity);
7926
476k
      if (xmlPushInput(ctxt, input) < 0) {
7927
0
                xmlFreeInputStream(input);
7928
0
    return;
7929
0
            }
7930
7931
476k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7932
                /*
7933
                 * Get the 4 first bytes and decode the charset
7934
                 * if enc != XML_CHAR_ENCODING_NONE
7935
                 * plug some encoding conversion routines.
7936
                 * Note that, since we may have some non-UTF8
7937
                 * encoding (like UTF16, bug 135229), the 'length'
7938
                 * is not known, but we can calculate based upon
7939
                 * the amount of data in the buffer.
7940
                 */
7941
0
                GROW
7942
0
                if (ctxt->instate == XML_PARSER_EOF)
7943
0
                    return;
7944
0
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
7945
0
                    start[0] = RAW;
7946
0
                    start[1] = NXT(1);
7947
0
                    start[2] = NXT(2);
7948
0
                    start[3] = NXT(3);
7949
0
                    enc = xmlDetectCharEncoding(start, 4);
7950
0
                    if (enc != XML_CHAR_ENCODING_NONE) {
7951
0
                        xmlSwitchEncoding(ctxt, enc);
7952
0
                    }
7953
0
                }
7954
7955
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7956
0
                    (IS_BLANK_CH(NXT(5)))) {
7957
0
                    xmlParseTextDecl(ctxt);
7958
0
                }
7959
0
            }
7960
476k
  }
7961
478k
    }
7962
532k
    ctxt->hasPErefs = 1;
7963
532k
}
7964
7965
/**
7966
 * xmlLoadEntityContent:
7967
 * @ctxt:  an XML parser context
7968
 * @entity: an unloaded system entity
7969
 *
7970
 * Load the original content of the given system entity from the
7971
 * ExternalID/SystemID given. This is to be used for Included in Literal
7972
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7973
 *
7974
 * Returns 0 in case of success and -1 in case of failure
7975
 */
7976
static int
7977
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7978
0
    xmlParserInputPtr input;
7979
0
    xmlBufferPtr buf;
7980
0
    int l, c;
7981
0
    int count = 0;
7982
7983
0
    if ((ctxt == NULL) || (entity == NULL) ||
7984
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7985
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7986
0
  (entity->content != NULL)) {
7987
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7988
0
              "xmlLoadEntityContent parameter error");
7989
0
        return(-1);
7990
0
    }
7991
7992
0
    if (xmlParserDebugEntities)
7993
0
  xmlGenericError(xmlGenericErrorContext,
7994
0
    "Reading %s entity content input\n", entity->name);
7995
7996
0
    buf = xmlBufferCreate();
7997
0
    if (buf == NULL) {
7998
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7999
0
              "xmlLoadEntityContent parameter error");
8000
0
        return(-1);
8001
0
    }
8002
8003
0
    input = xmlNewEntityInputStream(ctxt, entity);
8004
0
    if (input == NULL) {
8005
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8006
0
              "xmlLoadEntityContent input error");
8007
0
  xmlBufferFree(buf);
8008
0
        return(-1);
8009
0
    }
8010
8011
    /*
8012
     * Push the entity as the current input, read char by char
8013
     * saving to the buffer until the end of the entity or an error
8014
     */
8015
0
    if (xmlPushInput(ctxt, input) < 0) {
8016
0
        xmlBufferFree(buf);
8017
0
  return(-1);
8018
0
    }
8019
8020
0
    GROW;
8021
0
    c = CUR_CHAR(l);
8022
0
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8023
0
           (IS_CHAR(c))) {
8024
0
        xmlBufferAdd(buf, ctxt->input->cur, l);
8025
0
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8026
0
      count = 0;
8027
0
      GROW;
8028
0
            if (ctxt->instate == XML_PARSER_EOF) {
8029
0
                xmlBufferFree(buf);
8030
0
                return(-1);
8031
0
            }
8032
0
  }
8033
0
  NEXTL(l);
8034
0
  c = CUR_CHAR(l);
8035
0
  if (c == 0) {
8036
0
      count = 0;
8037
0
      GROW;
8038
0
            if (ctxt->instate == XML_PARSER_EOF) {
8039
0
                xmlBufferFree(buf);
8040
0
                return(-1);
8041
0
            }
8042
0
      c = CUR_CHAR(l);
8043
0
  }
8044
0
    }
8045
8046
0
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8047
0
        xmlPopInput(ctxt);
8048
0
    } else if (!IS_CHAR(c)) {
8049
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8050
0
                          "xmlLoadEntityContent: invalid char value %d\n",
8051
0
                    c);
8052
0
  xmlBufferFree(buf);
8053
0
  return(-1);
8054
0
    }
8055
0
    entity->content = buf->content;
8056
0
    buf->content = NULL;
8057
0
    xmlBufferFree(buf);
8058
8059
0
    return(0);
8060
0
}
8061
8062
/**
8063
 * xmlParseStringPEReference:
8064
 * @ctxt:  an XML parser context
8065
 * @str:  a pointer to an index in the string
8066
 *
8067
 * parse PEReference declarations
8068
 *
8069
 * [69] PEReference ::= '%' Name ';'
8070
 *
8071
 * [ WFC: No Recursion ]
8072
 * A parsed entity must not contain a recursive
8073
 * reference to itself, either directly or indirectly.
8074
 *
8075
 * [ WFC: Entity Declared ]
8076
 * In a document without any DTD, a document with only an internal DTD
8077
 * subset which contains no parameter entity references, or a document
8078
 * with "standalone='yes'", ...  ... The declaration of a parameter
8079
 * entity must precede any reference to it...
8080
 *
8081
 * [ VC: Entity Declared ]
8082
 * In a document with an external subset or external parameter entities
8083
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8084
 * must precede any reference to it...
8085
 *
8086
 * [ WFC: In DTD ]
8087
 * Parameter-entity references may only appear in the DTD.
8088
 * NOTE: misleading but this is handled.
8089
 *
8090
 * Returns the string of the entity content.
8091
 *         str is updated to the current value of the index
8092
 */
8093
static xmlEntityPtr
8094
0
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8095
0
    const xmlChar *ptr;
8096
0
    xmlChar cur;
8097
0
    xmlChar *name;
8098
0
    xmlEntityPtr entity = NULL;
8099
8100
0
    if ((str == NULL) || (*str == NULL)) return(NULL);
8101
0
    ptr = *str;
8102
0
    cur = *ptr;
8103
0
    if (cur != '%')
8104
0
        return(NULL);
8105
0
    ptr++;
8106
0
    name = xmlParseStringName(ctxt, &ptr);
8107
0
    if (name == NULL) {
8108
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8109
0
           "xmlParseStringPEReference: no name\n");
8110
0
  *str = ptr;
8111
0
  return(NULL);
8112
0
    }
8113
0
    cur = *ptr;
8114
0
    if (cur != ';') {
8115
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8116
0
  xmlFree(name);
8117
0
  *str = ptr;
8118
0
  return(NULL);
8119
0
    }
8120
0
    ptr++;
8121
8122
    /*
8123
     * Increate the number of entity references parsed
8124
     */
8125
0
    ctxt->nbentities++;
8126
8127
    /*
8128
     * Request the entity from SAX
8129
     */
8130
0
    if ((ctxt->sax != NULL) &&
8131
0
  (ctxt->sax->getParameterEntity != NULL))
8132
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8133
0
    if (ctxt->instate == XML_PARSER_EOF) {
8134
0
  xmlFree(name);
8135
0
  *str = ptr;
8136
0
  return(NULL);
8137
0
    }
8138
0
    if (entity == NULL) {
8139
  /*
8140
   * [ WFC: Entity Declared ]
8141
   * In a document without any DTD, a document with only an
8142
   * internal DTD subset which contains no parameter entity
8143
   * references, or a document with "standalone='yes'", ...
8144
   * ... The declaration of a parameter entity must precede
8145
   * any reference to it...
8146
   */
8147
0
  if ((ctxt->standalone == 1) ||
8148
0
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8149
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8150
0
     "PEReference: %%%s; not found\n", name);
8151
0
  } else {
8152
      /*
8153
       * [ VC: Entity Declared ]
8154
       * In a document with an external subset or external
8155
       * parameter entities with "standalone='no'", ...
8156
       * ... The declaration of a parameter entity must
8157
       * precede any reference to it...
8158
       */
8159
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8160
0
        "PEReference: %%%s; not found\n",
8161
0
        name, NULL);
8162
0
      ctxt->valid = 0;
8163
0
  }
8164
0
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8165
0
    } else {
8166
  /*
8167
   * Internal checking in case the entity quest barfed
8168
   */
8169
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8170
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8171
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8172
0
        "%%%s; is not a parameter entity\n",
8173
0
        name, NULL);
8174
0
  }
8175
0
    }
8176
0
    ctxt->hasPErefs = 1;
8177
0
    xmlFree(name);
8178
0
    *str = ptr;
8179
0
    return(entity);
8180
0
}
8181
8182
/**
8183
 * xmlParseDocTypeDecl:
8184
 * @ctxt:  an XML parser context
8185
 *
8186
 * parse a DOCTYPE declaration
8187
 *
8188
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8189
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8190
 *
8191
 * [ VC: Root Element Type ]
8192
 * The Name in the document type declaration must match the element
8193
 * type of the root element.
8194
 */
8195
8196
void
8197
69.3k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8198
69.3k
    const xmlChar *name = NULL;
8199
69.3k
    xmlChar *ExternalID = NULL;
8200
69.3k
    xmlChar *URI = NULL;
8201
8202
    /*
8203
     * We know that '<!DOCTYPE' has been detected.
8204
     */
8205
69.3k
    SKIP(9);
8206
8207
69.3k
    SKIP_BLANKS;
8208
8209
    /*
8210
     * Parse the DOCTYPE name.
8211
     */
8212
69.3k
    name = xmlParseName(ctxt);
8213
69.3k
    if (name == NULL) {
8214
8.11k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8215
8.11k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8216
8.11k
    }
8217
69.3k
    ctxt->intSubName = name;
8218
8219
69.3k
    SKIP_BLANKS;
8220
8221
    /*
8222
     * Check for SystemID and ExternalID
8223
     */
8224
69.3k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8225
8226
69.3k
    if ((URI != NULL) || (ExternalID != NULL)) {
8227
3.47k
        ctxt->hasExternalSubset = 1;
8228
3.47k
    }
8229
69.3k
    ctxt->extSubURI = URI;
8230
69.3k
    ctxt->extSubSystem = ExternalID;
8231
8232
69.3k
    SKIP_BLANKS;
8233
8234
    /*
8235
     * Create and update the internal subset.
8236
     */
8237
69.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8238
69.3k
  (!ctxt->disableSAX))
8239
69.3k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8240
69.3k
    if (ctxt->instate == XML_PARSER_EOF)
8241
0
  return;
8242
8243
    /*
8244
     * Is there any internal subset declarations ?
8245
     * they are handled separately in xmlParseInternalSubset()
8246
     */
8247
69.3k
    if (RAW == '[')
8248
57.7k
  return;
8249
8250
    /*
8251
     * We should be at the end of the DOCTYPE declaration.
8252
     */
8253
11.5k
    if (RAW != '>') {
8254
4.98k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8255
4.98k
    }
8256
11.5k
    NEXT;
8257
11.5k
}
8258
8259
/**
8260
 * xmlParseInternalSubset:
8261
 * @ctxt:  an XML parser context
8262
 *
8263
 * parse the internal subset declaration
8264
 *
8265
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8266
 */
8267
8268
static void
8269
59.4k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8270
    /*
8271
     * Is there any DTD definition ?
8272
     */
8273
59.4k
    if (RAW == '[') {
8274
59.4k
        int baseInputNr = ctxt->inputNr;
8275
59.4k
        ctxt->instate = XML_PARSER_DTD;
8276
59.4k
        NEXT;
8277
  /*
8278
   * Parse the succession of Markup declarations and
8279
   * PEReferences.
8280
   * Subsequence (markupdecl | PEReference | S)*
8281
   */
8282
1.34M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8283
1.31M
               (ctxt->instate != XML_PARSER_EOF)) {
8284
1.30M
      const xmlChar *check = CUR_PTR;
8285
1.30M
      unsigned int cons = ctxt->input->consumed;
8286
8287
1.30M
      SKIP_BLANKS;
8288
1.30M
      xmlParseMarkupDecl(ctxt);
8289
1.30M
      xmlParsePEReference(ctxt);
8290
8291
1.30M
      if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8292
179k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8293
179k
       "xmlParseInternalSubset: error detected in Markup declaration\n");
8294
179k
                if (ctxt->inputNr > baseInputNr)
8295
162k
                    xmlPopInput(ctxt);
8296
16.6k
                else
8297
16.6k
        break;
8298
179k
      }
8299
1.30M
  }
8300
59.4k
  if (RAW == ']') {
8301
35.8k
      NEXT;
8302
35.8k
      SKIP_BLANKS;
8303
35.8k
  }
8304
59.4k
    }
8305
8306
    /*
8307
     * We should be at the end of the DOCTYPE declaration.
8308
     */
8309
59.4k
    if (RAW != '>') {
8310
23.3k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8311
23.3k
  return;
8312
23.3k
    }
8313
36.1k
    NEXT;
8314
36.1k
}
8315
8316
#ifdef LIBXML_SAX1_ENABLED
8317
/**
8318
 * xmlParseAttribute:
8319
 * @ctxt:  an XML parser context
8320
 * @value:  a xmlChar ** used to store the value of the attribute
8321
 *
8322
 * parse an attribute
8323
 *
8324
 * [41] Attribute ::= Name Eq AttValue
8325
 *
8326
 * [ WFC: No External Entity References ]
8327
 * Attribute values cannot contain direct or indirect entity references
8328
 * to external entities.
8329
 *
8330
 * [ WFC: No < in Attribute Values ]
8331
 * The replacement text of any entity referred to directly or indirectly in
8332
 * an attribute value (other than "&lt;") must not contain a <.
8333
 *
8334
 * [ VC: Attribute Value Type ]
8335
 * The attribute must have been declared; the value must be of the type
8336
 * declared for it.
8337
 *
8338
 * [25] Eq ::= S? '=' S?
8339
 *
8340
 * With namespace:
8341
 *
8342
 * [NS 11] Attribute ::= QName Eq AttValue
8343
 *
8344
 * Also the case QName == xmlns:??? is handled independently as a namespace
8345
 * definition.
8346
 *
8347
 * Returns the attribute name, and the value in *value.
8348
 */
8349
8350
const xmlChar *
8351
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8352
0
    const xmlChar *name;
8353
0
    xmlChar *val;
8354
8355
0
    *value = NULL;
8356
0
    GROW;
8357
0
    name = xmlParseName(ctxt);
8358
0
    if (name == NULL) {
8359
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8360
0
                 "error parsing attribute name\n");
8361
0
        return(NULL);
8362
0
    }
8363
8364
    /*
8365
     * read the value
8366
     */
8367
0
    SKIP_BLANKS;
8368
0
    if (RAW == '=') {
8369
0
        NEXT;
8370
0
  SKIP_BLANKS;
8371
0
  val = xmlParseAttValue(ctxt);
8372
0
  ctxt->instate = XML_PARSER_CONTENT;
8373
0
    } else {
8374
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8375
0
         "Specification mandates value for attribute %s\n", name);
8376
0
  return(NULL);
8377
0
    }
8378
8379
    /*
8380
     * Check that xml:lang conforms to the specification
8381
     * No more registered as an error, just generate a warning now
8382
     * since this was deprecated in XML second edition
8383
     */
8384
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8385
0
  if (!xmlCheckLanguageID(val)) {
8386
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8387
0
              "Malformed value for xml:lang : %s\n",
8388
0
        val, NULL);
8389
0
  }
8390
0
    }
8391
8392
    /*
8393
     * Check that xml:space conforms to the specification
8394
     */
8395
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8396
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8397
0
      *(ctxt->space) = 0;
8398
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8399
0
      *(ctxt->space) = 1;
8400
0
  else {
8401
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8402
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8403
0
                                 val, NULL);
8404
0
  }
8405
0
    }
8406
8407
0
    *value = val;
8408
0
    return(name);
8409
0
}
8410
8411
/**
8412
 * xmlParseStartTag:
8413
 * @ctxt:  an XML parser context
8414
 *
8415
 * parse a start of tag either for rule element or
8416
 * EmptyElement. In both case we don't parse the tag closing chars.
8417
 *
8418
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8419
 *
8420
 * [ WFC: Unique Att Spec ]
8421
 * No attribute name may appear more than once in the same start-tag or
8422
 * empty-element tag.
8423
 *
8424
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8425
 *
8426
 * [ WFC: Unique Att Spec ]
8427
 * No attribute name may appear more than once in the same start-tag or
8428
 * empty-element tag.
8429
 *
8430
 * With namespace:
8431
 *
8432
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8433
 *
8434
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8435
 *
8436
 * Returns the element name parsed
8437
 */
8438
8439
const xmlChar *
8440
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8441
0
    const xmlChar *name;
8442
0
    const xmlChar *attname;
8443
0
    xmlChar *attvalue;
8444
0
    const xmlChar **atts = ctxt->atts;
8445
0
    int nbatts = 0;
8446
0
    int maxatts = ctxt->maxatts;
8447
0
    int i;
8448
8449
0
    if (RAW != '<') return(NULL);
8450
0
    NEXT1;
8451
8452
0
    name = xmlParseName(ctxt);
8453
0
    if (name == NULL) {
8454
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8455
0
       "xmlParseStartTag: invalid element name\n");
8456
0
        return(NULL);
8457
0
    }
8458
8459
    /*
8460
     * Now parse the attributes, it ends up with the ending
8461
     *
8462
     * (S Attribute)* S?
8463
     */
8464
0
    SKIP_BLANKS;
8465
0
    GROW;
8466
8467
0
    while (((RAW != '>') &&
8468
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8469
0
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8470
0
  const xmlChar *q = CUR_PTR;
8471
0
  unsigned int cons = ctxt->input->consumed;
8472
8473
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8474
0
        if ((attname != NULL) && (attvalue != NULL)) {
8475
      /*
8476
       * [ WFC: Unique Att Spec ]
8477
       * No attribute name may appear more than once in the same
8478
       * start-tag or empty-element tag.
8479
       */
8480
0
      for (i = 0; i < nbatts;i += 2) {
8481
0
          if (xmlStrEqual(atts[i], attname)) {
8482
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8483
0
        xmlFree(attvalue);
8484
0
        goto failed;
8485
0
    }
8486
0
      }
8487
      /*
8488
       * Add the pair to atts
8489
       */
8490
0
      if (atts == NULL) {
8491
0
          maxatts = 22; /* allow for 10 attrs by default */
8492
0
          atts = (const xmlChar **)
8493
0
           xmlMalloc(maxatts * sizeof(xmlChar *));
8494
0
    if (atts == NULL) {
8495
0
        xmlErrMemory(ctxt, NULL);
8496
0
        if (attvalue != NULL)
8497
0
      xmlFree(attvalue);
8498
0
        goto failed;
8499
0
    }
8500
0
    ctxt->atts = atts;
8501
0
    ctxt->maxatts = maxatts;
8502
0
      } else if (nbatts + 4 > maxatts) {
8503
0
          const xmlChar **n;
8504
8505
0
          maxatts *= 2;
8506
0
          n = (const xmlChar **) xmlRealloc((void *) atts,
8507
0
               maxatts * sizeof(const xmlChar *));
8508
0
    if (n == NULL) {
8509
0
        xmlErrMemory(ctxt, NULL);
8510
0
        if (attvalue != NULL)
8511
0
      xmlFree(attvalue);
8512
0
        goto failed;
8513
0
    }
8514
0
    atts = n;
8515
0
    ctxt->atts = atts;
8516
0
    ctxt->maxatts = maxatts;
8517
0
      }
8518
0
      atts[nbatts++] = attname;
8519
0
      atts[nbatts++] = attvalue;
8520
0
      atts[nbatts] = NULL;
8521
0
      atts[nbatts + 1] = NULL;
8522
0
  } else {
8523
0
      if (attvalue != NULL)
8524
0
    xmlFree(attvalue);
8525
0
  }
8526
8527
0
failed:
8528
8529
0
  GROW
8530
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8531
0
      break;
8532
0
  if (SKIP_BLANKS == 0) {
8533
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8534
0
         "attributes construct error\n");
8535
0
  }
8536
0
        if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8537
0
            (attname == NULL) && (attvalue == NULL)) {
8538
0
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8539
0
         "xmlParseStartTag: problem parsing attributes\n");
8540
0
      break;
8541
0
  }
8542
0
  SHRINK;
8543
0
        GROW;
8544
0
    }
8545
8546
    /*
8547
     * SAX: Start of Element !
8548
     */
8549
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8550
0
  (!ctxt->disableSAX)) {
8551
0
  if (nbatts > 0)
8552
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8553
0
  else
8554
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8555
0
    }
8556
8557
0
    if (atts != NULL) {
8558
        /* Free only the content strings */
8559
0
        for (i = 1;i < nbatts;i+=2)
8560
0
      if (atts[i] != NULL)
8561
0
         xmlFree((xmlChar *) atts[i]);
8562
0
    }
8563
0
    return(name);
8564
0
}
8565
8566
/**
8567
 * xmlParseEndTag1:
8568
 * @ctxt:  an XML parser context
8569
 * @line:  line of the start tag
8570
 * @nsNr:  number of namespaces on the start tag
8571
 *
8572
 * parse an end of tag
8573
 *
8574
 * [42] ETag ::= '</' Name S? '>'
8575
 *
8576
 * With namespace
8577
 *
8578
 * [NS 9] ETag ::= '</' QName S? '>'
8579
 */
8580
8581
static void
8582
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8583
0
    const xmlChar *name;
8584
8585
0
    GROW;
8586
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8587
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8588
0
           "xmlParseEndTag: '</' not found\n");
8589
0
  return;
8590
0
    }
8591
0
    SKIP(2);
8592
8593
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8594
8595
    /*
8596
     * We should definitely be at the ending "S? '>'" part
8597
     */
8598
0
    GROW;
8599
0
    SKIP_BLANKS;
8600
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8601
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8602
0
    } else
8603
0
  NEXT1;
8604
8605
    /*
8606
     * [ WFC: Element Type Match ]
8607
     * The Name in an element's end-tag must match the element type in the
8608
     * start-tag.
8609
     *
8610
     */
8611
0
    if (name != (xmlChar*)1) {
8612
0
        if (name == NULL) name = BAD_CAST "unparseable";
8613
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8614
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8615
0
                    ctxt->name, line, name);
8616
0
    }
8617
8618
    /*
8619
     * SAX: End of Tag
8620
     */
8621
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8622
0
  (!ctxt->disableSAX))
8623
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8624
8625
0
    namePop(ctxt);
8626
0
    spacePop(ctxt);
8627
0
    return;
8628
0
}
8629
8630
/**
8631
 * xmlParseEndTag:
8632
 * @ctxt:  an XML parser context
8633
 *
8634
 * parse an end of tag
8635
 *
8636
 * [42] ETag ::= '</' Name S? '>'
8637
 *
8638
 * With namespace
8639
 *
8640
 * [NS 9] ETag ::= '</' QName S? '>'
8641
 */
8642
8643
void
8644
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8645
0
    xmlParseEndTag1(ctxt, 0);
8646
0
}
8647
#endif /* LIBXML_SAX1_ENABLED */
8648
8649
/************************************************************************
8650
 *                  *
8651
 *          SAX 2 specific operations       *
8652
 *                  *
8653
 ************************************************************************/
8654
8655
/*
8656
 * xmlGetNamespace:
8657
 * @ctxt:  an XML parser context
8658
 * @prefix:  the prefix to lookup
8659
 *
8660
 * Lookup the namespace name for the @prefix (which ca be NULL)
8661
 * The prefix must come from the @ctxt->dict dictionary
8662
 *
8663
 * Returns the namespace name or NULL if not bound
8664
 */
8665
static const xmlChar *
8666
21.3M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8667
21.3M
    int i;
8668
8669
21.3M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8670
1.05G
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8671
1.04G
        if (ctxt->nsTab[i] == prefix) {
8672
12.5M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8673
110k
          return(NULL);
8674
12.4M
      return(ctxt->nsTab[i + 1]);
8675
12.5M
  }
8676
8.40M
    return(NULL);
8677
20.9M
}
8678
8679
/**
8680
 * xmlParseQName:
8681
 * @ctxt:  an XML parser context
8682
 * @prefix:  pointer to store the prefix part
8683
 *
8684
 * parse an XML Namespace QName
8685
 *
8686
 * [6]  QName  ::= (Prefix ':')? LocalPart
8687
 * [7]  Prefix  ::= NCName
8688
 * [8]  LocalPart  ::= NCName
8689
 *
8690
 * Returns the Name parsed or NULL
8691
 */
8692
8693
static const xmlChar *
8694
33.3M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8695
33.3M
    const xmlChar *l, *p;
8696
8697
33.3M
    GROW;
8698
8699
33.3M
    l = xmlParseNCName(ctxt);
8700
33.3M
    if (l == NULL) {
8701
3.79M
        if (CUR == ':') {
8702
353k
      l = xmlParseName(ctxt);
8703
353k
      if (l != NULL) {
8704
352k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8705
352k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8706
352k
    *prefix = NULL;
8707
352k
    return(l);
8708
352k
      }
8709
353k
  }
8710
3.44M
        return(NULL);
8711
3.79M
    }
8712
29.5M
    if (CUR == ':') {
8713
2.02M
        NEXT;
8714
2.02M
  p = l;
8715
2.02M
  l = xmlParseNCName(ctxt);
8716
2.02M
  if (l == NULL) {
8717
131k
      xmlChar *tmp;
8718
8719
131k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8720
131k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8721
131k
      l = xmlParseNmtoken(ctxt);
8722
131k
      if (l == NULL)
8723
96.6k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8724
34.9k
      else {
8725
34.9k
    tmp = xmlBuildQName(l, p, NULL, 0);
8726
34.9k
    xmlFree((char *)l);
8727
34.9k
      }
8728
131k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8729
131k
      if (tmp != NULL) xmlFree(tmp);
8730
131k
      *prefix = NULL;
8731
131k
      return(p);
8732
131k
  }
8733
1.89M
  if (CUR == ':') {
8734
418k
      xmlChar *tmp;
8735
8736
418k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8737
418k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8738
418k
      NEXT;
8739
418k
      tmp = (xmlChar *) xmlParseName(ctxt);
8740
418k
      if (tmp != NULL) {
8741
401k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8742
401k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8743
401k
    if (tmp != NULL) xmlFree(tmp);
8744
401k
    *prefix = p;
8745
401k
    return(l);
8746
401k
      }
8747
17.7k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8748
17.7k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8749
17.7k
      if (tmp != NULL) xmlFree(tmp);
8750
17.7k
      *prefix = p;
8751
17.7k
      return(l);
8752
418k
  }
8753
1.47M
  *prefix = p;
8754
1.47M
    } else
8755
27.5M
        *prefix = NULL;
8756
29.0M
    return(l);
8757
29.5M
}
8758
8759
/**
8760
 * xmlParseQNameAndCompare:
8761
 * @ctxt:  an XML parser context
8762
 * @name:  the localname
8763
 * @prefix:  the prefix, if any.
8764
 *
8765
 * parse an XML name and compares for match
8766
 * (specialized for endtag parsing)
8767
 *
8768
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8769
 * and the name for mismatch
8770
 */
8771
8772
static const xmlChar *
8773
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8774
277k
                        xmlChar const *prefix) {
8775
277k
    const xmlChar *cmp;
8776
277k
    const xmlChar *in;
8777
277k
    const xmlChar *ret;
8778
277k
    const xmlChar *prefix2;
8779
8780
277k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8781
8782
277k
    GROW;
8783
277k
    in = ctxt->input->cur;
8784
8785
277k
    cmp = prefix;
8786
715k
    while (*in != 0 && *in == *cmp) {
8787
437k
  ++in;
8788
437k
  ++cmp;
8789
437k
    }
8790
277k
    if ((*cmp == 0) && (*in == ':')) {
8791
162k
        in++;
8792
162k
  cmp = name;
8793
1.24M
  while (*in != 0 && *in == *cmp) {
8794
1.08M
      ++in;
8795
1.08M
      ++cmp;
8796
1.08M
  }
8797
162k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8798
      /* success */
8799
123k
      ctxt->input->cur = in;
8800
123k
      return((const xmlChar*) 1);
8801
123k
  }
8802
162k
    }
8803
    /*
8804
     * all strings coms from the dictionary, equality can be done directly
8805
     */
8806
154k
    ret = xmlParseQName (ctxt, &prefix2);
8807
154k
    if ((ret == name) && (prefix == prefix2))
8808
17.7k
  return((const xmlChar*) 1);
8809
136k
    return ret;
8810
154k
}
8811
8812
/**
8813
 * xmlParseAttValueInternal:
8814
 * @ctxt:  an XML parser context
8815
 * @len:  attribute len result
8816
 * @alloc:  whether the attribute was reallocated as a new string
8817
 * @normalize:  if 1 then further non-CDATA normalization must be done
8818
 *
8819
 * parse a value for an attribute.
8820
 * NOTE: if no normalization is needed, the routine will return pointers
8821
 *       directly from the data buffer.
8822
 *
8823
 * 3.3.3 Attribute-Value Normalization:
8824
 * Before the value of an attribute is passed to the application or
8825
 * checked for validity, the XML processor must normalize it as follows:
8826
 * - a character reference is processed by appending the referenced
8827
 *   character to the attribute value
8828
 * - an entity reference is processed by recursively processing the
8829
 *   replacement text of the entity
8830
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8831
 *   appending #x20 to the normalized value, except that only a single
8832
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8833
 *   parsed entity or the literal entity value of an internal parsed entity
8834
 * - other characters are processed by appending them to the normalized value
8835
 * If the declared value is not CDATA, then the XML processor must further
8836
 * process the normalized attribute value by discarding any leading and
8837
 * trailing space (#x20) characters, and by replacing sequences of space
8838
 * (#x20) characters by a single space (#x20) character.
8839
 * All attributes for which no declaration has been read should be treated
8840
 * by a non-validating parser as if declared CDATA.
8841
 *
8842
 * Returns the AttValue parsed or NULL. The value has to be freed by the
8843
 *     caller if it was copied, this can be detected by val[*len] == 0.
8844
 */
8845
8846
static xmlChar *
8847
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8848
                         int normalize)
8849
9.07M
{
8850
9.07M
    xmlChar limit = 0;
8851
9.07M
    const xmlChar *in = NULL, *start, *end, *last;
8852
9.07M
    xmlChar *ret = NULL;
8853
9.07M
    int line, col;
8854
8855
9.07M
    GROW;
8856
9.07M
    in = (xmlChar *) CUR_PTR;
8857
9.07M
    line = ctxt->input->line;
8858
9.07M
    col = ctxt->input->col;
8859
9.07M
    if (*in != '"' && *in != '\'') {
8860
251k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8861
251k
        return (NULL);
8862
251k
    }
8863
8.82M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8864
8865
    /*
8866
     * try to handle in this routine the most common case where no
8867
     * allocation of a new string is required and where content is
8868
     * pure ASCII.
8869
     */
8870
8.82M
    limit = *in++;
8871
8.82M
    col++;
8872
8.82M
    end = ctxt->input->end;
8873
8.82M
    start = in;
8874
8.82M
    if (in >= end) {
8875
1.13k
        const xmlChar *oldbase = ctxt->input->base;
8876
1.13k
  GROW;
8877
1.13k
  if (oldbase != ctxt->input->base) {
8878
0
      long delta = ctxt->input->base - oldbase;
8879
0
      start = start + delta;
8880
0
      in = in + delta;
8881
0
  }
8882
1.13k
  end = ctxt->input->end;
8883
1.13k
    }
8884
8.82M
    if (normalize) {
8885
        /*
8886
   * Skip any leading spaces
8887
   */
8888
367k
  while ((in < end) && (*in != limit) &&
8889
362k
         ((*in == 0x20) || (*in == 0x9) ||
8890
282k
          (*in == 0xA) || (*in == 0xD))) {
8891
282k
      if (*in == 0xA) {
8892
187k
          line++; col = 1;
8893
187k
      } else {
8894
94.4k
          col++;
8895
94.4k
      }
8896
282k
      in++;
8897
282k
      start = in;
8898
282k
      if (in >= end) {
8899
434
    const xmlChar *oldbase = ctxt->input->base;
8900
434
    GROW;
8901
434
                if (ctxt->instate == XML_PARSER_EOF)
8902
0
                    return(NULL);
8903
434
    if (oldbase != ctxt->input->base) {
8904
0
        long delta = ctxt->input->base - oldbase;
8905
0
        start = start + delta;
8906
0
        in = in + delta;
8907
0
    }
8908
434
    end = ctxt->input->end;
8909
434
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8910
0
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8911
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8912
0
                                   "AttValue length too long\n");
8913
0
                    return(NULL);
8914
0
                }
8915
434
      }
8916
282k
  }
8917
490k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8918
467k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8919
409k
      col++;
8920
409k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
8921
404k
      if (in >= end) {
8922
684
    const xmlChar *oldbase = ctxt->input->base;
8923
684
    GROW;
8924
684
                if (ctxt->instate == XML_PARSER_EOF)
8925
0
                    return(NULL);
8926
684
    if (oldbase != ctxt->input->base) {
8927
0
        long delta = ctxt->input->base - oldbase;
8928
0
        start = start + delta;
8929
0
        in = in + delta;
8930
0
    }
8931
684
    end = ctxt->input->end;
8932
684
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8933
0
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8934
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8935
0
                                   "AttValue length too long\n");
8936
0
                    return(NULL);
8937
0
                }
8938
684
      }
8939
404k
  }
8940
85.7k
  last = in;
8941
  /*
8942
   * skip the trailing blanks
8943
   */
8944
95.7k
  while ((last[-1] == 0x20) && (last > start)) last--;
8945
185k
  while ((in < end) && (*in != limit) &&
8946
170k
         ((*in == 0x20) || (*in == 0x9) ||
8947
134k
          (*in == 0xA) || (*in == 0xD))) {
8948
99.4k
      if (*in == 0xA) {
8949
61.7k
          line++, col = 1;
8950
61.7k
      } else {
8951
37.6k
          col++;
8952
37.6k
      }
8953
99.4k
      in++;
8954
99.4k
      if (in >= end) {
8955
523
    const xmlChar *oldbase = ctxt->input->base;
8956
523
    GROW;
8957
523
                if (ctxt->instate == XML_PARSER_EOF)
8958
0
                    return(NULL);
8959
523
    if (oldbase != ctxt->input->base) {
8960
0
        long delta = ctxt->input->base - oldbase;
8961
0
        start = start + delta;
8962
0
        in = in + delta;
8963
0
        last = last + delta;
8964
0
    }
8965
523
    end = ctxt->input->end;
8966
523
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8967
0
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8968
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8969
0
                                   "AttValue length too long\n");
8970
0
                    return(NULL);
8971
0
                }
8972
523
      }
8973
99.4k
  }
8974
85.7k
        if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8975
0
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8976
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8977
0
                           "AttValue length too long\n");
8978
0
            return(NULL);
8979
0
        }
8980
85.7k
  if (*in != limit) goto need_complex;
8981
8.73M
    } else {
8982
112M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8983
104M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8984
104M
      in++;
8985
104M
      col++;
8986
104M
      if (in >= end) {
8987
25.0k
    const xmlChar *oldbase = ctxt->input->base;
8988
25.0k
    GROW;
8989
25.0k
                if (ctxt->instate == XML_PARSER_EOF)
8990
0
                    return(NULL);
8991
25.0k
    if (oldbase != ctxt->input->base) {
8992
0
        long delta = ctxt->input->base - oldbase;
8993
0
        start = start + delta;
8994
0
        in = in + delta;
8995
0
    }
8996
25.0k
    end = ctxt->input->end;
8997
25.0k
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8998
0
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8999
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9000
0
                                   "AttValue length too long\n");
9001
0
                    return(NULL);
9002
0
                }
9003
25.0k
      }
9004
104M
  }
9005
8.73M
  last = in;
9006
8.73M
        if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9007
0
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9008
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9009
0
                           "AttValue length too long\n");
9010
0
            return(NULL);
9011
0
        }
9012
8.73M
  if (*in != limit) goto need_complex;
9013
8.73M
    }
9014
7.53M
    in++;
9015
7.53M
    col++;
9016
7.53M
    if (len != NULL) {
9017
7.42M
        *len = last - start;
9018
7.42M
        ret = (xmlChar *) start;
9019
7.42M
    } else {
9020
113k
        if (alloc) *alloc = 1;
9021
113k
        ret = xmlStrndup(start, last - start);
9022
113k
    }
9023
7.53M
    CUR_PTR = in;
9024
7.53M
    ctxt->input->line = line;
9025
7.53M
    ctxt->input->col = col;
9026
7.53M
    if (alloc) *alloc = 0;
9027
7.53M
    return ret;
9028
1.28M
need_complex:
9029
1.28M
    if (alloc) *alloc = 1;
9030
1.28M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9031
8.82M
}
9032
9033
/**
9034
 * xmlParseAttribute2:
9035
 * @ctxt:  an XML parser context
9036
 * @pref:  the element prefix
9037
 * @elem:  the element name
9038
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9039
 * @value:  a xmlChar ** used to store the value of the attribute
9040
 * @len:  an int * to save the length of the attribute
9041
 * @alloc:  an int * to indicate if the attribute was allocated
9042
 *
9043
 * parse an attribute in the new SAX2 framework.
9044
 *
9045
 * Returns the attribute name, and the value in *value, .
9046
 */
9047
9048
static const xmlChar *
9049
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9050
                   const xmlChar * pref, const xmlChar * elem,
9051
                   const xmlChar ** prefix, xmlChar ** value,
9052
                   int *len, int *alloc)
9053
12.7M
{
9054
12.7M
    const xmlChar *name;
9055
12.7M
    xmlChar *val, *internal_val = NULL;
9056
12.7M
    int normalize = 0;
9057
9058
12.7M
    *value = NULL;
9059
12.7M
    GROW;
9060
12.7M
    name = xmlParseQName(ctxt, prefix);
9061
12.7M
    if (name == NULL) {
9062
3.14M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9063
3.14M
                       "error parsing attribute name\n");
9064
3.14M
        return (NULL);
9065
3.14M
    }
9066
9067
    /*
9068
     * get the type if needed
9069
     */
9070
9.60M
    if (ctxt->attsSpecial != NULL) {
9071
486k
        int type;
9072
9073
486k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9074
486k
                                                 pref, elem, *prefix, name);
9075
486k
        if (type != 0)
9076
90.6k
            normalize = 1;
9077
486k
    }
9078
9079
    /*
9080
     * read the value
9081
     */
9082
9.60M
    SKIP_BLANKS;
9083
9.60M
    if (RAW == '=') {
9084
8.82M
        NEXT;
9085
8.82M
        SKIP_BLANKS;
9086
8.82M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9087
8.82M
  if (normalize) {
9088
      /*
9089
       * Sometimes a second normalisation pass for spaces is needed
9090
       * but that only happens if charrefs or entities refernces
9091
       * have been used in the attribute value, i.e. the attribute
9092
       * value have been extracted in an allocated string already.
9093
       */
9094
87.5k
      if (*alloc) {
9095
72.7k
          const xmlChar *val2;
9096
9097
72.7k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9098
72.7k
    if ((val2 != NULL) && (val2 != val)) {
9099
11.9k
        xmlFree(val);
9100
11.9k
        val = (xmlChar *) val2;
9101
11.9k
    }
9102
72.7k
      }
9103
87.5k
  }
9104
8.82M
        ctxt->instate = XML_PARSER_CONTENT;
9105
8.82M
    } else {
9106
773k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9107
773k
                          "Specification mandates value for attribute %s\n",
9108
773k
                          name);
9109
773k
        return (NULL);
9110
773k
    }
9111
9112
8.82M
    if (*prefix == ctxt->str_xml) {
9113
        /*
9114
         * Check that xml:lang conforms to the specification
9115
         * No more registered as an error, just generate a warning now
9116
         * since this was deprecated in XML second edition
9117
         */
9118
277k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9119
0
            internal_val = xmlStrndup(val, *len);
9120
0
            if (!xmlCheckLanguageID(internal_val)) {
9121
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9122
0
                              "Malformed value for xml:lang : %s\n",
9123
0
                              internal_val, NULL);
9124
0
            }
9125
0
        }
9126
9127
        /*
9128
         * Check that xml:space conforms to the specification
9129
         */
9130
277k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9131
17.6k
            internal_val = xmlStrndup(val, *len);
9132
17.6k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9133
1.47k
                *(ctxt->space) = 0;
9134
16.2k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9135
10.6k
                *(ctxt->space) = 1;
9136
5.59k
            else {
9137
5.59k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9138
5.59k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9139
5.59k
                              internal_val, NULL);
9140
5.59k
            }
9141
17.6k
        }
9142
277k
        if (internal_val) {
9143
14.6k
            xmlFree(internal_val);
9144
14.6k
        }
9145
277k
    }
9146
9147
8.82M
    *value = val;
9148
8.82M
    return (name);
9149
9.60M
}
9150
/**
9151
 * xmlParseStartTag2:
9152
 * @ctxt:  an XML parser context
9153
 *
9154
 * parse a start of tag either for rule element or
9155
 * EmptyElement. In both case we don't parse the tag closing chars.
9156
 * This routine is called when running SAX2 parsing
9157
 *
9158
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9159
 *
9160
 * [ WFC: Unique Att Spec ]
9161
 * No attribute name may appear more than once in the same start-tag or
9162
 * empty-element tag.
9163
 *
9164
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9165
 *
9166
 * [ WFC: Unique Att Spec ]
9167
 * No attribute name may appear more than once in the same start-tag or
9168
 * empty-element tag.
9169
 *
9170
 * With namespace:
9171
 *
9172
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9173
 *
9174
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9175
 *
9176
 * Returns the element name parsed
9177
 */
9178
9179
static const xmlChar *
9180
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9181
20.4M
                  const xmlChar **URI, int *tlen) {
9182
20.4M
    const xmlChar *localname;
9183
20.4M
    const xmlChar *prefix;
9184
20.4M
    const xmlChar *attname;
9185
20.4M
    const xmlChar *aprefix;
9186
20.4M
    const xmlChar *nsname;
9187
20.4M
    xmlChar *attvalue;
9188
20.4M
    const xmlChar **atts = ctxt->atts;
9189
20.4M
    int maxatts = ctxt->maxatts;
9190
20.4M
    int nratts, nbatts, nbdef, inputid;
9191
20.4M
    int i, j, nbNs, attval;
9192
20.4M
    unsigned long cur;
9193
20.4M
    int nsNr = ctxt->nsNr;
9194
9195
20.4M
    if (RAW != '<') return(NULL);
9196
20.4M
    NEXT1;
9197
9198
    /*
9199
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9200
     *       point since the attribute values may be stored as pointers to
9201
     *       the buffer and calling SHRINK would destroy them !
9202
     *       The Shrinking is only possible once the full set of attribute
9203
     *       callbacks have been done.
9204
     */
9205
20.4M
    SHRINK;
9206
20.4M
    cur = ctxt->input->cur - ctxt->input->base;
9207
20.4M
    inputid = ctxt->input->id;
9208
20.4M
    nbatts = 0;
9209
20.4M
    nratts = 0;
9210
20.4M
    nbdef = 0;
9211
20.4M
    nbNs = 0;
9212
20.4M
    attval = 0;
9213
    /* Forget any namespaces added during an earlier parse of this element. */
9214
20.4M
    ctxt->nsNr = nsNr;
9215
9216
20.4M
    localname = xmlParseQName(ctxt, &prefix);
9217
20.4M
    if (localname == NULL) {
9218
292k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9219
292k
           "StartTag: invalid element name\n");
9220
292k
        return(NULL);
9221
292k
    }
9222
20.1M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9223
9224
    /*
9225
     * Now parse the attributes, it ends up with the ending
9226
     *
9227
     * (S Attribute)* S?
9228
     */
9229
20.1M
    SKIP_BLANKS;
9230
20.1M
    GROW;
9231
9232
23.9M
    while (((RAW != '>') &&
9233
16.0M
     ((RAW != '/') || (NXT(1) != '>')) &&
9234
12.9M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9235
12.7M
  const xmlChar *q = CUR_PTR;
9236
12.7M
  unsigned int cons = ctxt->input->consumed;
9237
12.7M
  int len = -1, alloc = 0;
9238
9239
12.7M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9240
12.7M
                               &aprefix, &attvalue, &len, &alloc);
9241
12.7M
        if ((attname == NULL) || (attvalue == NULL))
9242
4.13M
            goto next_attr;
9243
8.60M
  if (len < 0) len = xmlStrlen(attvalue);
9244
9245
8.60M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9246
528k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9247
528k
            xmlURIPtr uri;
9248
9249
528k
            if (URL == NULL) {
9250
1
                xmlErrMemory(ctxt, "dictionary allocation failure");
9251
1
                if ((attvalue != NULL) && (alloc != 0))
9252
1
                    xmlFree(attvalue);
9253
1
                return(NULL);
9254
1
            }
9255
528k
            if (*URL != 0) {
9256
513k
                uri = xmlParseURI((const char *) URL);
9257
513k
                if (uri == NULL) {
9258
309k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9259
309k
                             "xmlns: '%s' is not a valid URI\n",
9260
309k
                                       URL, NULL, NULL);
9261
309k
                } else {
9262
203k
                    if (uri->scheme == NULL) {
9263
92.0k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9264
92.0k
                                  "xmlns: URI %s is not absolute\n",
9265
92.0k
                                  URL, NULL, NULL);
9266
92.0k
                    }
9267
203k
                    xmlFreeURI(uri);
9268
203k
                }
9269
513k
                if (URL == ctxt->str_xml_ns) {
9270
914
                    if (attname != ctxt->str_xml) {
9271
914
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9272
914
                     "xml namespace URI cannot be the default namespace\n",
9273
914
                                 NULL, NULL, NULL);
9274
914
                    }
9275
914
                    goto next_attr;
9276
914
                }
9277
512k
                if ((len == 29) &&
9278
9.69k
                    (xmlStrEqual(URL,
9279
9.69k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9280
2.72k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9281
2.72k
                         "reuse of the xmlns namespace name is forbidden\n",
9282
2.72k
                             NULL, NULL, NULL);
9283
2.72k
                    goto next_attr;
9284
2.72k
                }
9285
512k
            }
9286
            /*
9287
             * check that it's not a defined namespace
9288
             */
9289
545k
            for (j = 1;j <= nbNs;j++)
9290
49.6k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9291
28.5k
                    break;
9292
524k
            if (j <= nbNs)
9293
28.5k
                xmlErrAttributeDup(ctxt, NULL, attname);
9294
496k
            else
9295
496k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9296
9297
8.07M
        } else if (aprefix == ctxt->str_xmlns) {
9298
192k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9299
192k
            xmlURIPtr uri;
9300
9301
192k
            if (attname == ctxt->str_xml) {
9302
3.78k
                if (URL != ctxt->str_xml_ns) {
9303
3.21k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9304
3.21k
                             "xml namespace prefix mapped to wrong URI\n",
9305
3.21k
                             NULL, NULL, NULL);
9306
3.21k
                }
9307
                /*
9308
                 * Do not keep a namespace definition node
9309
                 */
9310
3.78k
                goto next_attr;
9311
3.78k
            }
9312
188k
            if (URL == ctxt->str_xml_ns) {
9313
1.83k
                if (attname != ctxt->str_xml) {
9314
1.83k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9315
1.83k
                             "xml namespace URI mapped to wrong prefix\n",
9316
1.83k
                             NULL, NULL, NULL);
9317
1.83k
                }
9318
1.83k
                goto next_attr;
9319
1.83k
            }
9320
186k
            if (attname == ctxt->str_xmlns) {
9321
8.12k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9322
8.12k
                         "redefinition of the xmlns prefix is forbidden\n",
9323
8.12k
                         NULL, NULL, NULL);
9324
8.12k
                goto next_attr;
9325
8.12k
            }
9326
178k
            if ((len == 29) &&
9327
5.15k
                (xmlStrEqual(URL,
9328
5.15k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9329
1.58k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9330
1.58k
                         "reuse of the xmlns namespace name is forbidden\n",
9331
1.58k
                         NULL, NULL, NULL);
9332
1.58k
                goto next_attr;
9333
1.58k
            }
9334
176k
            if ((URL == NULL) || (URL[0] == 0)) {
9335
6.44k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9336
6.44k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9337
6.44k
                              attname, NULL, NULL);
9338
6.44k
                goto next_attr;
9339
170k
            } else {
9340
170k
                uri = xmlParseURI((const char *) URL);
9341
170k
                if (uri == NULL) {
9342
79.6k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9343
79.6k
                         "xmlns:%s: '%s' is not a valid URI\n",
9344
79.6k
                                       attname, URL, NULL);
9345
90.6k
                } else {
9346
90.6k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9347
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9348
0
                                  "xmlns:%s: URI %s is not absolute\n",
9349
0
                                  attname, URL, NULL);
9350
0
                    }
9351
90.6k
                    xmlFreeURI(uri);
9352
90.6k
                }
9353
170k
            }
9354
9355
            /*
9356
             * check that it's not a defined namespace
9357
             */
9358
245k
            for (j = 1;j <= nbNs;j++)
9359
90.4k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9360
14.9k
                    break;
9361
170k
            if (j <= nbNs)
9362
14.9k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9363
155k
            else
9364
155k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9365
9366
7.88M
        } else {
9367
            /*
9368
             * Add the pair to atts
9369
             */
9370
7.88M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9371
133k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9372
0
                    goto next_attr;
9373
0
                }
9374
133k
                maxatts = ctxt->maxatts;
9375
133k
                atts = ctxt->atts;
9376
133k
            }
9377
7.88M
            ctxt->attallocs[nratts++] = alloc;
9378
7.88M
            atts[nbatts++] = attname;
9379
7.88M
            atts[nbatts++] = aprefix;
9380
            /*
9381
             * The namespace URI field is used temporarily to point at the
9382
             * base of the current input buffer for non-alloced attributes.
9383
             * When the input buffer is reallocated, all the pointers become
9384
             * invalid, but they can be reconstructed later.
9385
             */
9386
7.88M
            if (alloc)
9387
776k
                atts[nbatts++] = NULL;
9388
7.11M
            else
9389
7.11M
                atts[nbatts++] = ctxt->input->base;
9390
7.88M
            atts[nbatts++] = attvalue;
9391
7.88M
            attvalue += len;
9392
7.88M
            atts[nbatts++] = attvalue;
9393
            /*
9394
             * tag if some deallocation is needed
9395
             */
9396
7.88M
            if (alloc != 0) attval = 1;
9397
7.88M
            attvalue = NULL; /* moved into atts */
9398
7.88M
        }
9399
9400
12.7M
next_attr:
9401
12.7M
        if ((attvalue != NULL) && (alloc != 0)) {
9402
409k
            xmlFree(attvalue);
9403
409k
            attvalue = NULL;
9404
409k
        }
9405
9406
12.7M
  GROW
9407
12.7M
        if (ctxt->instate == XML_PARSER_EOF)
9408
3
            break;
9409
12.7M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9410
3.91M
      break;
9411
8.83M
  if (SKIP_BLANKS == 0) {
9412
5.09M
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9413
5.09M
         "attributes construct error\n");
9414
5.09M
      break;
9415
5.09M
  }
9416
3.73M
        if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9417
0
            (attname == NULL) && (attvalue == NULL)) {
9418
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9419
0
           "xmlParseStartTag: problem parsing attributes\n");
9420
0
      break;
9421
0
  }
9422
3.73M
        GROW;
9423
3.73M
    }
9424
9425
20.1M
    if (ctxt->input->id != inputid) {
9426
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9427
0
                    "Unexpected change of input\n");
9428
0
        localname = NULL;
9429
0
        goto done;
9430
0
    }
9431
9432
    /* Reconstruct attribute value pointers. */
9433
28.0M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9434
7.88M
        if (atts[i+2] != NULL) {
9435
            /*
9436
             * Arithmetic on dangling pointers is technically undefined
9437
             * behavior, but well...
9438
             */
9439
7.11M
            ptrdiff_t offset = ctxt->input->base - atts[i+2];
9440
7.11M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9441
7.11M
            atts[i+3] += offset;  /* value */
9442
7.11M
            atts[i+4] += offset;  /* valuend */
9443
7.11M
        }
9444
7.88M
    }
9445
9446
    /*
9447
     * The attributes defaulting
9448
     */
9449
20.1M
    if (ctxt->attsDefault != NULL) {
9450
904k
        xmlDefAttrsPtr defaults;
9451
9452
904k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9453
904k
  if (defaults != NULL) {
9454
919k
      for (i = 0;i < defaults->nbAttrs;i++) {
9455
673k
          attname = defaults->values[5 * i];
9456
673k
    aprefix = defaults->values[5 * i + 1];
9457
9458
                /*
9459
     * special work for namespaces defaulted defs
9460
     */
9461
673k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9462
        /*
9463
         * check that it's not a defined namespace
9464
         */
9465
128k
        for (j = 1;j <= nbNs;j++)
9466
50.4k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9467
21.6k
          break;
9468
99.7k
              if (j <= nbNs) continue;
9469
9470
78.0k
        nsname = xmlGetNamespace(ctxt, NULL);
9471
78.0k
        if (nsname != defaults->values[5 * i + 2]) {
9472
15.0k
      if (nsPush(ctxt, NULL,
9473
15.0k
                 defaults->values[5 * i + 2]) > 0)
9474
15.0k
          nbNs++;
9475
15.0k
        }
9476
573k
    } else if (aprefix == ctxt->str_xmlns) {
9477
        /*
9478
         * check that it's not a defined namespace
9479
         */
9480
822k
        for (j = 1;j <= nbNs;j++)
9481
562k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9482
10.0k
          break;
9483
270k
              if (j <= nbNs) continue;
9484
9485
260k
        nsname = xmlGetNamespace(ctxt, attname);
9486
260k
        if (nsname != defaults->values[2]) {
9487
241k
      if (nsPush(ctxt, attname,
9488
241k
                 defaults->values[5 * i + 2]) > 0)
9489
241k
          nbNs++;
9490
241k
        }
9491
302k
    } else {
9492
        /*
9493
         * check that it's not a defined attribute
9494
         */
9495
1.02M
        for (j = 0;j < nbatts;j+=5) {
9496
739k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9497
22.6k
          break;
9498
739k
        }
9499
302k
        if (j < nbatts) continue;
9500
9501
280k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9502
4.07k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9503
0
          return(NULL);
9504
0
      }
9505
4.07k
      maxatts = ctxt->maxatts;
9506
4.07k
      atts = ctxt->atts;
9507
4.07k
        }
9508
280k
        atts[nbatts++] = attname;
9509
280k
        atts[nbatts++] = aprefix;
9510
280k
        if (aprefix == NULL)
9511
143k
      atts[nbatts++] = NULL;
9512
136k
        else
9513
136k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9514
280k
        atts[nbatts++] = defaults->values[5 * i + 2];
9515
280k
        atts[nbatts++] = defaults->values[5 * i + 3];
9516
280k
        if ((ctxt->standalone == 1) &&
9517
5.00k
            (defaults->values[5 * i + 4] != NULL)) {
9518
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9519
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9520
0
                                   attname, localname);
9521
0
        }
9522
280k
        nbdef++;
9523
280k
    }
9524
673k
      }
9525
246k
  }
9526
904k
    }
9527
9528
    /*
9529
     * The attributes checkings
9530
     */
9531
28.3M
    for (i = 0; i < nbatts;i += 5) {
9532
        /*
9533
  * The default namespace does not apply to attribute names.
9534
  */
9535
8.16M
  if (atts[i + 1] != NULL) {
9536
662k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9537
662k
      if (nsname == NULL) {
9538
212k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9539
212k
        "Namespace prefix %s for %s on %s is not defined\n",
9540
212k
        atts[i + 1], atts[i], localname);
9541
212k
      }
9542
662k
      atts[i + 2] = nsname;
9543
662k
  } else
9544
7.50M
      nsname = NULL;
9545
  /*
9546
   * [ WFC: Unique Att Spec ]
9547
   * No attribute name may appear more than once in the same
9548
   * start-tag or empty-element tag.
9549
   * As extended by the Namespace in XML REC.
9550
   */
9551
16.1M
        for (j = 0; j < i;j += 5) {
9552
8.03M
      if (atts[i] == atts[j]) {
9553
183k
          if (atts[i+1] == atts[j+1]) {
9554
86.8k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9555
86.8k
        break;
9556
86.8k
    }
9557
96.6k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9558
6.31k
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9559
6.31k
           "Namespaced Attribute %s in '%s' redefined\n",
9560
6.31k
           atts[i], nsname, NULL);
9561
6.31k
        break;
9562
6.31k
    }
9563
96.6k
      }
9564
8.03M
  }
9565
8.16M
    }
9566
9567
20.1M
    nsname = xmlGetNamespace(ctxt, prefix);
9568
20.1M
    if ((prefix != NULL) && (nsname == NULL)) {
9569
811k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9570
811k
           "Namespace prefix %s on %s is not defined\n",
9571
811k
     prefix, localname, NULL);
9572
811k
    }
9573
20.1M
    *pref = prefix;
9574
20.1M
    *URI = nsname;
9575
9576
    /*
9577
     * SAX: Start of Element !
9578
     */
9579
20.1M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9580
20.1M
  (!ctxt->disableSAX)) {
9581
18.9M
  if (nbNs > 0)
9582
534k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9583
534k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9584
534k
        nbatts / 5, nbdef, atts);
9585
18.4M
  else
9586
18.4M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9587
18.4M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9588
18.9M
    }
9589
9590
20.1M
done:
9591
    /*
9592
     * Free up attribute allocated strings if needed
9593
     */
9594
20.1M
    if (attval != 0) {
9595
1.68M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9596
950k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9597
776k
          xmlFree((xmlChar *) atts[i]);
9598
731k
    }
9599
9600
20.1M
    return(localname);
9601
20.1M
}
9602
9603
/**
9604
 * xmlParseEndTag2:
9605
 * @ctxt:  an XML parser context
9606
 * @line:  line of the start tag
9607
 * @nsNr:  number of namespaces on the start tag
9608
 *
9609
 * parse an end of tag
9610
 *
9611
 * [42] ETag ::= '</' Name S? '>'
9612
 *
9613
 * With namespace
9614
 *
9615
 * [NS 9] ETag ::= '</' QName S? '>'
9616
 */
9617
9618
static void
9619
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9620
7.83M
                const xmlChar *URI, int line, int nsNr, int tlen) {
9621
7.83M
    const xmlChar *name;
9622
7.83M
    size_t curLength;
9623
9624
7.83M
    GROW;
9625
7.83M
    if ((RAW != '<') || (NXT(1) != '/')) {
9626
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9627
0
  return;
9628
0
    }
9629
7.83M
    SKIP(2);
9630
9631
7.83M
    curLength = ctxt->input->end - ctxt->input->cur;
9632
7.83M
    if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9633
131k
        (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9634
51.2k
        if ((curLength >= (size_t)(tlen + 1)) &&
9635
50.8k
      (ctxt->input->cur[tlen] == '>')) {
9636
42.9k
      ctxt->input->cur += tlen + 1;
9637
42.9k
      ctxt->input->col += tlen + 1;
9638
42.9k
      goto done;
9639
42.9k
  }
9640
8.29k
  ctxt->input->cur += tlen;
9641
8.29k
  ctxt->input->col += tlen;
9642
8.29k
  name = (xmlChar*)1;
9643
7.78M
    } else {
9644
7.78M
  if (prefix == NULL)
9645
7.50M
      name = xmlParseNameAndCompare(ctxt, ctxt->name);
9646
277k
  else
9647
277k
      name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9648
7.78M
    }
9649
9650
    /*
9651
     * We should definitely be at the ending "S? '>'" part
9652
     */
9653
7.79M
    GROW;
9654
7.79M
    if (ctxt->instate == XML_PARSER_EOF)
9655
0
        return;
9656
7.79M
    SKIP_BLANKS;
9657
7.79M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9658
1.04M
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9659
1.04M
    } else
9660
6.75M
  NEXT1;
9661
9662
    /*
9663
     * [ WFC: Element Type Match ]
9664
     * The Name in an element's end-tag must match the element type in the
9665
     * start-tag.
9666
     *
9667
     */
9668
7.79M
    if (name != (xmlChar*)1) {
9669
2.43M
        if (name == NULL) name = BAD_CAST "unparseable";
9670
2.43M
        if ((line == 0) && (ctxt->node != NULL))
9671
2.22M
            line = ctxt->node->line;
9672
2.43M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9673
2.43M
         "Opening and ending tag mismatch: %s line %d and %s\n",
9674
2.43M
                    ctxt->name, line, name);
9675
2.43M
    }
9676
9677
    /*
9678
     * SAX: End of Tag
9679
     */
9680
7.83M
done:
9681
7.83M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9682
7.83M
  (!ctxt->disableSAX))
9683
7.70M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9684
9685
7.83M
    spacePop(ctxt);
9686
7.83M
    if (nsNr != 0)
9687
84.9k
  nsPop(ctxt, nsNr);
9688
7.83M
    return;
9689
7.79M
}
9690
9691
/**
9692
 * xmlParseCDSect:
9693
 * @ctxt:  an XML parser context
9694
 *
9695
 * Parse escaped pure raw content.
9696
 *
9697
 * [18] CDSect ::= CDStart CData CDEnd
9698
 *
9699
 * [19] CDStart ::= '<![CDATA['
9700
 *
9701
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9702
 *
9703
 * [21] CDEnd ::= ']]>'
9704
 */
9705
void
9706
47.8k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9707
47.8k
    xmlChar *buf = NULL;
9708
47.8k
    int len = 0;
9709
47.8k
    int size = XML_PARSER_BUFFER_SIZE;
9710
47.8k
    int r, rl;
9711
47.8k
    int s, sl;
9712
47.8k
    int cur, l;
9713
47.8k
    int count = 0;
9714
9715
    /* Check 2.6.0 was NXT(0) not RAW */
9716
47.8k
    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9717
47.8k
  SKIP(9);
9718
47.8k
    } else
9719
0
        return;
9720
9721
47.8k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9722
47.8k
    r = CUR_CHAR(rl);
9723
47.8k
    if (!IS_CHAR(r)) {
9724
187
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9725
187
  ctxt->instate = XML_PARSER_CONTENT;
9726
187
        return;
9727
187
    }
9728
47.6k
    NEXTL(rl);
9729
47.6k
    s = CUR_CHAR(sl);
9730
47.6k
    if (!IS_CHAR(s)) {
9731
535
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9732
535
  ctxt->instate = XML_PARSER_CONTENT;
9733
535
        return;
9734
535
    }
9735
47.1k
    NEXTL(sl);
9736
47.1k
    cur = CUR_CHAR(l);
9737
47.1k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9738
47.1k
    if (buf == NULL) {
9739
0
  xmlErrMemory(ctxt, NULL);
9740
0
  return;
9741
0
    }
9742
92.3M
    while (IS_CHAR(cur) &&
9743
92.3M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9744
92.3M
  if (len + 5 >= size) {
9745
37.1k
      xmlChar *tmp;
9746
9747
37.1k
            if ((size > XML_MAX_TEXT_LENGTH) &&
9748
0
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9749
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9750
0
                             "CData section too big found", NULL);
9751
0
                xmlFree (buf);
9752
0
                return;
9753
0
            }
9754
37.1k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9755
37.1k
      if (tmp == NULL) {
9756
0
          xmlFree(buf);
9757
0
    xmlErrMemory(ctxt, NULL);
9758
0
    return;
9759
0
      }
9760
37.1k
      buf = tmp;
9761
37.1k
      size *= 2;
9762
37.1k
  }
9763
92.3M
  COPY_BUF(rl,buf,len,r);
9764
92.3M
  r = s;
9765
92.3M
  rl = sl;
9766
92.3M
  s = cur;
9767
92.3M
  sl = l;
9768
92.3M
  count++;
9769
92.3M
  if (count > 50) {
9770
1.79M
      GROW;
9771
1.79M
            if (ctxt->instate == XML_PARSER_EOF) {
9772
4
    xmlFree(buf);
9773
4
    return;
9774
4
            }
9775
1.79M
      count = 0;
9776
1.79M
  }
9777
92.3M
  NEXTL(l);
9778
92.3M
  cur = CUR_CHAR(l);
9779
92.3M
    }
9780
47.1k
    buf[len] = 0;
9781
47.1k
    ctxt->instate = XML_PARSER_CONTENT;
9782
47.1k
    if (cur != '>') {
9783
6.45k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9784
6.45k
                       "CData section not finished\n%.50s\n", buf);
9785
6.45k
  xmlFree(buf);
9786
6.45k
        return;
9787
6.45k
    }
9788
40.6k
    NEXTL(l);
9789
9790
    /*
9791
     * OK the buffer is to be consumed as cdata.
9792
     */
9793
40.6k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9794
9.64k
  if (ctxt->sax->cdataBlock != NULL)
9795
9.64k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9796
0
  else if (ctxt->sax->characters != NULL)
9797
0
      ctxt->sax->characters(ctxt->userData, buf, len);
9798
9.64k
    }
9799
40.6k
    xmlFree(buf);
9800
40.6k
}
9801
9802
/**
9803
 * xmlParseContent:
9804
 * @ctxt:  an XML parser context
9805
 *
9806
 * Parse a content:
9807
 *
9808
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9809
 */
9810
9811
void
9812
1.57M
xmlParseContent(xmlParserCtxtPtr ctxt) {
9813
1.57M
    GROW;
9814
6.38M
    while ((RAW != 0) &&
9815
5.04M
     ((RAW != '<') || (NXT(1) != '/')) &&
9816
4.90M
     (ctxt->instate != XML_PARSER_EOF)) {
9817
4.81M
  const xmlChar *test = CUR_PTR;
9818
4.81M
  unsigned int cons = ctxt->input->consumed;
9819
4.81M
  const xmlChar *cur = ctxt->input->cur;
9820
9821
  /*
9822
   * First case : a Processing Instruction.
9823
   */
9824
4.81M
  if ((*cur == '<') && (cur[1] == '?')) {
9825
92.5k
      xmlParsePI(ctxt);
9826
92.5k
  }
9827
9828
  /*
9829
   * Second case : a CDSection
9830
   */
9831
  /* 2.6.0 test was *cur not RAW */
9832
4.71M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9833
47.8k
      xmlParseCDSect(ctxt);
9834
47.8k
  }
9835
9836
  /*
9837
   * Third case :  a comment
9838
   */
9839
4.67M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9840
312k
     (NXT(2) == '-') && (NXT(3) == '-')) {
9841
187k
      xmlParseComment(ctxt);
9842
187k
      ctxt->instate = XML_PARSER_CONTENT;
9843
187k
  }
9844
9845
  /*
9846
   * Fourth case :  a sub-element.
9847
   */
9848
4.48M
  else if (*cur == '<') {
9849
2.27M
      xmlParseElement(ctxt);
9850
2.27M
  }
9851
9852
  /*
9853
   * Fifth case : a reference. If if has not been resolved,
9854
   *    parsing returns it's Name, create the node
9855
   */
9856
9857
2.20M
  else if (*cur == '&') {
9858
593k
      xmlParseReference(ctxt);
9859
593k
  }
9860
9861
  /*
9862
   * Last case, text. Note that References are handled directly.
9863
   */
9864
1.61M
  else {
9865
1.61M
      xmlParseCharData(ctxt, 0);
9866
1.61M
  }
9867
9868
4.81M
  GROW;
9869
4.81M
  SHRINK;
9870
9871
4.81M
  if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9872
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9873
0
                  "detected an error in element content\n");
9874
0
      xmlHaltParser(ctxt);
9875
0
            break;
9876
0
  }
9877
4.81M
    }
9878
1.57M
}
9879
9880
/**
9881
 * xmlParseElement:
9882
 * @ctxt:  an XML parser context
9883
 *
9884
 * parse an XML element, this is highly recursive
9885
 *
9886
 * [39] element ::= EmptyElemTag | STag content ETag
9887
 *
9888
 * [ WFC: Element Type Match ]
9889
 * The Name in an element's end-tag must match the element type in the
9890
 * start-tag.
9891
 *
9892
 */
9893
9894
void
9895
2.27M
xmlParseElement(xmlParserCtxtPtr ctxt) {
9896
2.27M
    const xmlChar *name;
9897
2.27M
    const xmlChar *prefix = NULL;
9898
2.27M
    const xmlChar *URI = NULL;
9899
2.27M
    xmlParserNodeInfo node_info;
9900
2.27M
    int line, tlen = 0;
9901
2.27M
    xmlNodePtr ret;
9902
2.27M
    int nsNr = ctxt->nsNr;
9903
9904
2.27M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9905
693
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9906
693
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9907
693
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9908
693
        xmlParserMaxDepth);
9909
693
  xmlHaltParser(ctxt);
9910
693
  return;
9911
693
    }
9912
9913
    /* Capture start position */
9914
2.27M
    if (ctxt->record_info) {
9915
0
        node_info.begin_pos = ctxt->input->consumed +
9916
0
                          (CUR_PTR - ctxt->input->base);
9917
0
  node_info.begin_line = ctxt->input->line;
9918
0
    }
9919
9920
2.27M
    if (ctxt->spaceNr == 0)
9921
0
  spacePush(ctxt, -1);
9922
2.27M
    else if (*ctxt->space == -2)
9923
486k
  spacePush(ctxt, -1);
9924
1.78M
    else
9925
1.78M
  spacePush(ctxt, *ctxt->space);
9926
9927
2.27M
    line = ctxt->input->line;
9928
2.27M
#ifdef LIBXML_SAX1_ENABLED
9929
2.27M
    if (ctxt->sax2)
9930
2.27M
#endif /* LIBXML_SAX1_ENABLED */
9931
2.27M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9932
0
#ifdef LIBXML_SAX1_ENABLED
9933
0
    else
9934
0
  name = xmlParseStartTag(ctxt);
9935
2.27M
#endif /* LIBXML_SAX1_ENABLED */
9936
2.27M
    if (ctxt->instate == XML_PARSER_EOF)
9937
16
  return;
9938
2.27M
    if (name == NULL) {
9939
285k
  spacePop(ctxt);
9940
285k
        return;
9941
285k
    }
9942
1.98M
    namePush(ctxt, name);
9943
1.98M
    ret = ctxt->node;
9944
9945
1.98M
#ifdef LIBXML_VALID_ENABLED
9946
    /*
9947
     * [ VC: Root Element Type ]
9948
     * The Name in the document type declaration must match the element
9949
     * type of the root element.
9950
     */
9951
1.98M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9952
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9953
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9954
1.98M
#endif /* LIBXML_VALID_ENABLED */
9955
9956
    /*
9957
     * Check for an Empty Element.
9958
     */
9959
1.98M
    if ((RAW == '/') && (NXT(1) == '>')) {
9960
68.0k
        SKIP(2);
9961
68.0k
  if (ctxt->sax2) {
9962
68.0k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9963
68.0k
    (!ctxt->disableSAX))
9964
27.4k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9965
68.0k
#ifdef LIBXML_SAX1_ENABLED
9966
68.0k
  } else {
9967
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9968
0
    (!ctxt->disableSAX))
9969
0
    ctxt->sax->endElement(ctxt->userData, name);
9970
0
#endif /* LIBXML_SAX1_ENABLED */
9971
0
  }
9972
68.0k
  namePop(ctxt);
9973
68.0k
  spacePop(ctxt);
9974
68.0k
  if (nsNr != ctxt->nsNr)
9975
5.98k
      nsPop(ctxt, ctxt->nsNr - nsNr);
9976
68.0k
  if ( ret != NULL && ctxt->record_info ) {
9977
0
     node_info.end_pos = ctxt->input->consumed +
9978
0
            (CUR_PTR - ctxt->input->base);
9979
0
     node_info.end_line = ctxt->input->line;
9980
0
     node_info.node = ret;
9981
0
     xmlParserAddNodeInfo(ctxt, &node_info);
9982
0
  }
9983
68.0k
  return;
9984
68.0k
    }
9985
1.92M
    if (RAW == '>') {
9986
1.37M
        NEXT1;
9987
1.37M
    } else {
9988
545k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9989
545k
         "Couldn't find end of Start Tag %s line %d\n",
9990
545k
                    name, line, NULL);
9991
9992
  /*
9993
   * end of parsing of this node.
9994
   */
9995
545k
  nodePop(ctxt);
9996
545k
  namePop(ctxt);
9997
545k
  spacePop(ctxt);
9998
545k
  if (nsNr != ctxt->nsNr)
9999
154k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10000
10001
  /*
10002
   * Capture end position and add node
10003
   */
10004
545k
  if ( ret != NULL && ctxt->record_info ) {
10005
0
     node_info.end_pos = ctxt->input->consumed +
10006
0
            (CUR_PTR - ctxt->input->base);
10007
0
     node_info.end_line = ctxt->input->line;
10008
0
     node_info.node = ret;
10009
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10010
0
  }
10011
545k
  return;
10012
545k
    }
10013
10014
    /*
10015
     * Parse the content of the element:
10016
     */
10017
1.37M
    xmlParseContent(ctxt);
10018
1.37M
    if (ctxt->instate == XML_PARSER_EOF)
10019
181k
  return;
10020
1.19M
    if (!IS_BYTE_CHAR(RAW)) {
10021
1.05M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10022
1.05M
   "Premature end of data in tag %s line %d\n",
10023
1.05M
                    name, line, NULL);
10024
10025
  /*
10026
   * end of parsing of this node.
10027
   */
10028
1.05M
  nodePop(ctxt);
10029
1.05M
  namePop(ctxt);
10030
1.05M
  spacePop(ctxt);
10031
1.05M
  if (nsNr != ctxt->nsNr)
10032
78.4k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10033
1.05M
  return;
10034
1.05M
    }
10035
10036
    /*
10037
     * parse the end of tag: '</' should be here.
10038
     */
10039
135k
    if (ctxt->sax2) {
10040
135k
  xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10041
135k
  namePop(ctxt);
10042
135k
    }
10043
0
#ifdef LIBXML_SAX1_ENABLED
10044
0
      else
10045
0
  xmlParseEndTag1(ctxt, line);
10046
135k
#endif /* LIBXML_SAX1_ENABLED */
10047
10048
    /*
10049
     * Capture end position and add node
10050
     */
10051
135k
    if ( ret != NULL && ctxt->record_info ) {
10052
0
       node_info.end_pos = ctxt->input->consumed +
10053
0
                          (CUR_PTR - ctxt->input->base);
10054
0
       node_info.end_line = ctxt->input->line;
10055
0
       node_info.node = ret;
10056
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10057
0
    }
10058
135k
}
10059
10060
/**
10061
 * xmlParseVersionNum:
10062
 * @ctxt:  an XML parser context
10063
 *
10064
 * parse the XML version value.
10065
 *
10066
 * [26] VersionNum ::= '1.' [0-9]+
10067
 *
10068
 * In practice allow [0-9].[0-9]+ at that level
10069
 *
10070
 * Returns the string giving the XML version number, or NULL
10071
 */
10072
xmlChar *
10073
62.5k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10074
62.5k
    xmlChar *buf = NULL;
10075
62.5k
    int len = 0;
10076
62.5k
    int size = 10;
10077
62.5k
    xmlChar cur;
10078
10079
62.5k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10080
62.5k
    if (buf == NULL) {
10081
0
  xmlErrMemory(ctxt, NULL);
10082
0
  return(NULL);
10083
0
    }
10084
62.5k
    cur = CUR;
10085
62.5k
    if (!((cur >= '0') && (cur <= '9'))) {
10086
283
  xmlFree(buf);
10087
283
  return(NULL);
10088
283
    }
10089
62.2k
    buf[len++] = cur;
10090
62.2k
    NEXT;
10091
62.2k
    cur=CUR;
10092
62.2k
    if (cur != '.') {
10093
158
  xmlFree(buf);
10094
158
  return(NULL);
10095
158
    }
10096
62.1k
    buf[len++] = cur;
10097
62.1k
    NEXT;
10098
62.1k
    cur=CUR;
10099
2.98M
    while ((cur >= '0') && (cur <= '9')) {
10100
2.92M
  if (len + 1 >= size) {
10101
2.20k
      xmlChar *tmp;
10102
10103
2.20k
      size *= 2;
10104
2.20k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10105
2.20k
      if (tmp == NULL) {
10106
0
          xmlFree(buf);
10107
0
    xmlErrMemory(ctxt, NULL);
10108
0
    return(NULL);
10109
0
      }
10110
2.20k
      buf = tmp;
10111
2.20k
  }
10112
2.92M
  buf[len++] = cur;
10113
2.92M
  NEXT;
10114
2.92M
  cur=CUR;
10115
2.92M
    }
10116
62.1k
    buf[len] = 0;
10117
62.1k
    return(buf);
10118
62.1k
}
10119
10120
/**
10121
 * xmlParseVersionInfo:
10122
 * @ctxt:  an XML parser context
10123
 *
10124
 * parse the XML version.
10125
 *
10126
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10127
 *
10128
 * [25] Eq ::= S? '=' S?
10129
 *
10130
 * Returns the version string, e.g. "1.0"
10131
 */
10132
10133
xmlChar *
10134
82.7k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10135
82.7k
    xmlChar *version = NULL;
10136
10137
82.7k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10138
63.4k
  SKIP(7);
10139
63.4k
  SKIP_BLANKS;
10140
63.4k
  if (RAW != '=') {
10141
591
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10142
591
      return(NULL);
10143
591
        }
10144
62.8k
  NEXT;
10145
62.8k
  SKIP_BLANKS;
10146
62.8k
  if (RAW == '"') {
10147
47.8k
      NEXT;
10148
47.8k
      version = xmlParseVersionNum(ctxt);
10149
47.8k
      if (RAW != '"') {
10150
568
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10151
568
      } else
10152
47.2k
          NEXT;
10153
47.8k
  } else if (RAW == '\''){
10154
14.7k
      NEXT;
10155
14.7k
      version = xmlParseVersionNum(ctxt);
10156
14.7k
      if (RAW != '\'') {
10157
124
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10158
124
      } else
10159
14.5k
          NEXT;
10160
14.7k
  } else {
10161
327
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10162
327
  }
10163
62.8k
    }
10164
82.1k
    return(version);
10165
82.7k
}
10166
10167
/**
10168
 * xmlParseEncName:
10169
 * @ctxt:  an XML parser context
10170
 *
10171
 * parse the XML encoding name
10172
 *
10173
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10174
 *
10175
 * Returns the encoding name value or NULL
10176
 */
10177
xmlChar *
10178
64.7k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10179
64.7k
    xmlChar *buf = NULL;
10180
64.7k
    int len = 0;
10181
64.7k
    int size = 10;
10182
64.7k
    xmlChar cur;
10183
10184
64.7k
    cur = CUR;
10185
64.7k
    if (((cur >= 'a') && (cur <= 'z')) ||
10186
64.5k
        ((cur >= 'A') && (cur <= 'Z'))) {
10187
64.5k
  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10188
64.5k
  if (buf == NULL) {
10189
0
      xmlErrMemory(ctxt, NULL);
10190
0
      return(NULL);
10191
0
  }
10192
10193
64.5k
  buf[len++] = cur;
10194
64.5k
  NEXT;
10195
64.5k
  cur = CUR;
10196
5.62M
  while (((cur >= 'a') && (cur <= 'z')) ||
10197
4.62M
         ((cur >= 'A') && (cur <= 'Z')) ||
10198
2.56M
         ((cur >= '0') && (cur <= '9')) ||
10199
110k
         (cur == '.') || (cur == '_') ||
10200
5.56M
         (cur == '-')) {
10201
5.56M
      if (len + 1 >= size) {
10202
3.82k
          xmlChar *tmp;
10203
10204
3.82k
    size *= 2;
10205
3.82k
    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10206
3.82k
    if (tmp == NULL) {
10207
0
        xmlErrMemory(ctxt, NULL);
10208
0
        xmlFree(buf);
10209
0
        return(NULL);
10210
0
    }
10211
3.82k
    buf = tmp;
10212
3.82k
      }
10213
5.56M
      buf[len++] = cur;
10214
5.56M
      NEXT;
10215
5.56M
      cur = CUR;
10216
5.56M
      if (cur == 0) {
10217
285
          SHRINK;
10218
285
    GROW;
10219
285
    cur = CUR;
10220
285
      }
10221
5.56M
        }
10222
64.5k
  buf[len] = 0;
10223
64.5k
    } else {
10224
189
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10225
189
    }
10226
64.7k
    return(buf);
10227
64.7k
}
10228
10229
/**
10230
 * xmlParseEncodingDecl:
10231
 * @ctxt:  an XML parser context
10232
 *
10233
 * parse the XML encoding declaration
10234
 *
10235
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10236
 *
10237
 * this setups the conversion filters.
10238
 *
10239
 * Returns the encoding value or NULL
10240
 */
10241
10242
const xmlChar *
10243
79.9k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10244
79.9k
    xmlChar *encoding = NULL;
10245
10246
79.9k
    SKIP_BLANKS;
10247
79.9k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10248
64.8k
  SKIP(8);
10249
64.8k
  SKIP_BLANKS;
10250
64.8k
  if (RAW != '=') {
10251
88
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10252
88
      return(NULL);
10253
88
        }
10254
64.7k
  NEXT;
10255
64.7k
  SKIP_BLANKS;
10256
64.7k
  if (RAW == '"') {
10257
49.7k
      NEXT;
10258
49.7k
      encoding = xmlParseEncName(ctxt);
10259
49.7k
      if (RAW != '"') {
10260
463
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10261
463
    xmlFree((xmlChar *) encoding);
10262
463
    return(NULL);
10263
463
      } else
10264
49.3k
          NEXT;
10265
49.7k
  } else if (RAW == '\''){
10266
14.9k
      NEXT;
10267
14.9k
      encoding = xmlParseEncName(ctxt);
10268
14.9k
      if (RAW != '\'') {
10269
256
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10270
256
    xmlFree((xmlChar *) encoding);
10271
256
    return(NULL);
10272
256
      } else
10273
14.6k
          NEXT;
10274
14.9k
  } else {
10275
68
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10276
68
  }
10277
10278
        /*
10279
         * Non standard parsing, allowing the user to ignore encoding
10280
         */
10281
64.0k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10282
0
      xmlFree((xmlChar *) encoding);
10283
0
            return(NULL);
10284
0
  }
10285
10286
  /*
10287
   * UTF-16 encoding stwich has already taken place at this stage,
10288
   * more over the little-endian/big-endian selection is already done
10289
   */
10290
64.0k
        if ((encoding != NULL) &&
10291
63.9k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10292
63.9k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10293
      /*
10294
       * If no encoding was passed to the parser, that we are
10295
       * using UTF-16 and no decoder is present i.e. the
10296
       * document is apparently UTF-8 compatible, then raise an
10297
       * encoding mismatch fatal error
10298
       */
10299
20
      if ((ctxt->encoding == NULL) &&
10300
20
          (ctxt->input->buf != NULL) &&
10301
20
          (ctxt->input->buf->encoder == NULL)) {
10302
14
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10303
14
      "Document labelled UTF-16 but has UTF-8 content\n");
10304
14
      }
10305
20
      if (ctxt->encoding != NULL)
10306
0
    xmlFree((xmlChar *) ctxt->encoding);
10307
20
      ctxt->encoding = encoding;
10308
20
  }
10309
  /*
10310
   * UTF-8 encoding is handled natively
10311
   */
10312
64.0k
        else if ((encoding != NULL) &&
10313
63.9k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10314
33.4k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10315
33.4k
      if (ctxt->encoding != NULL)
10316
0
    xmlFree((xmlChar *) ctxt->encoding);
10317
33.4k
      ctxt->encoding = encoding;
10318
33.4k
  }
10319
30.6k
  else if (encoding != NULL) {
10320
30.5k
      xmlCharEncodingHandlerPtr handler;
10321
10322
30.5k
      if (ctxt->input->encoding != NULL)
10323
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10324
30.5k
      ctxt->input->encoding = encoding;
10325
10326
30.5k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10327
30.5k
      if (handler != NULL) {
10328
28.0k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10329
        /* failed to convert */
10330
44
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10331
44
        return(NULL);
10332
44
    }
10333
28.0k
      } else {
10334
2.44k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10335
2.44k
      "Unsupported encoding %s\n", encoding);
10336
2.44k
    return(NULL);
10337
2.44k
      }
10338
30.5k
  }
10339
64.0k
    }
10340
76.6k
    return(encoding);
10341
79.9k
}
10342
10343
/**
10344
 * xmlParseSDDecl:
10345
 * @ctxt:  an XML parser context
10346
 *
10347
 * parse the XML standalone declaration
10348
 *
10349
 * [32] SDDecl ::= S 'standalone' Eq
10350
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10351
 *
10352
 * [ VC: Standalone Document Declaration ]
10353
 * TODO The standalone document declaration must have the value "no"
10354
 * if any external markup declarations contain declarations of:
10355
 *  - attributes with default values, if elements to which these
10356
 *    attributes apply appear in the document without specifications
10357
 *    of values for these attributes, or
10358
 *  - entities (other than amp, lt, gt, apos, quot), if references
10359
 *    to those entities appear in the document, or
10360
 *  - attributes with values subject to normalization, where the
10361
 *    attribute appears in the document with a value which will change
10362
 *    as a result of normalization, or
10363
 *  - element types with element content, if white space occurs directly
10364
 *    within any instance of those types.
10365
 *
10366
 * Returns:
10367
 *   1 if standalone="yes"
10368
 *   0 if standalone="no"
10369
 *  -2 if standalone attribute is missing or invalid
10370
 *    (A standalone value of -2 means that the XML declaration was found,
10371
 *     but no value was specified for the standalone attribute).
10372
 */
10373
10374
int
10375
53.9k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10376
53.9k
    int standalone = -2;
10377
10378
53.9k
    SKIP_BLANKS;
10379
53.9k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10380
11.9k
  SKIP(10);
10381
11.9k
        SKIP_BLANKS;
10382
11.9k
  if (RAW != '=') {
10383
45
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10384
45
      return(standalone);
10385
45
        }
10386
11.9k
  NEXT;
10387
11.9k
  SKIP_BLANKS;
10388
11.9k
        if (RAW == '\''){
10389
293
      NEXT;
10390
293
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10391
167
          standalone = 0;
10392
167
                SKIP(2);
10393
167
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10394
86
                 (NXT(2) == 's')) {
10395
78
          standalone = 1;
10396
78
    SKIP(3);
10397
78
            } else {
10398
48
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10399
48
      }
10400
293
      if (RAW != '\'') {
10401
129
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10402
129
      } else
10403
164
          NEXT;
10404
11.6k
  } else if (RAW == '"'){
10405
11.6k
      NEXT;
10406
11.6k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10407
24
          standalone = 0;
10408
24
    SKIP(2);
10409
11.5k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10410
11.5k
                 (NXT(2) == 's')) {
10411
11.4k
          standalone = 1;
10412
11.4k
                SKIP(3);
10413
11.4k
            } else {
10414
129
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10415
129
      }
10416
11.6k
      if (RAW != '"') {
10417
192
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10418
192
      } else
10419
11.4k
          NEXT;
10420
11.6k
  } else {
10421
16
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10422
16
        }
10423
11.9k
    }
10424
53.9k
    return(standalone);
10425
53.9k
}
10426
10427
/**
10428
 * xmlParseXMLDecl:
10429
 * @ctxt:  an XML parser context
10430
 *
10431
 * parse an XML declaration header
10432
 *
10433
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10434
 */
10435
10436
void
10437
82.7k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10438
82.7k
    xmlChar *version;
10439
10440
    /*
10441
     * This value for standalone indicates that the document has an
10442
     * XML declaration but it does not have a standalone attribute.
10443
     * It will be overwritten later if a standalone attribute is found.
10444
     */
10445
82.7k
    ctxt->input->standalone = -2;
10446
10447
    /*
10448
     * We know that '<?xml' is here.
10449
     */
10450
82.7k
    SKIP(5);
10451
10452
82.7k
    if (!IS_BLANK_CH(RAW)) {
10453
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10454
0
                 "Blank needed after '<?xml'\n");
10455
0
    }
10456
82.7k
    SKIP_BLANKS;
10457
10458
    /*
10459
     * We must have the VersionInfo here.
10460
     */
10461
82.7k
    version = xmlParseVersionInfo(ctxt);
10462
82.7k
    if (version == NULL) {
10463
20.6k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10464
62.1k
    } else {
10465
62.1k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10466
      /*
10467
       * Changed here for XML-1.0 5th edition
10468
       */
10469
12.9k
      if (ctxt->options & XML_PARSE_OLD10) {
10470
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10471
0
                "Unsupported version '%s'\n",
10472
0
                version);
10473
12.9k
      } else {
10474
12.9k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10475
12.1k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10476
12.1k
                      "Unsupported version '%s'\n",
10477
12.1k
          version, NULL);
10478
12.1k
    } else {
10479
757
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10480
757
              "Unsupported version '%s'\n",
10481
757
              version);
10482
757
    }
10483
12.9k
      }
10484
12.9k
  }
10485
62.1k
  if (ctxt->version != NULL)
10486
0
      xmlFree((void *) ctxt->version);
10487
62.1k
  ctxt->version = version;
10488
62.1k
    }
10489
10490
    /*
10491
     * We may have the encoding declaration
10492
     */
10493
82.7k
    if (!IS_BLANK_CH(RAW)) {
10494
23.4k
        if ((RAW == '?') && (NXT(1) == '>')) {
10495
2.78k
      SKIP(2);
10496
2.78k
      return;
10497
2.78k
  }
10498
20.6k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10499
20.6k
    }
10500
79.9k
    xmlParseEncodingDecl(ctxt);
10501
79.9k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10502
77.4k
         (ctxt->instate == XML_PARSER_EOF)) {
10503
  /*
10504
   * The XML REC instructs us to stop parsing right here
10505
   */
10506
2.49k
        return;
10507
2.49k
    }
10508
10509
    /*
10510
     * We may have the standalone status.
10511
     */
10512
77.4k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10513
27.5k
        if ((RAW == '?') && (NXT(1) == '>')) {
10514
23.4k
      SKIP(2);
10515
23.4k
      return;
10516
23.4k
  }
10517
4.09k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10518
4.09k
    }
10519
10520
    /*
10521
     * We can grow the input buffer freely at that point
10522
     */
10523
53.9k
    GROW;
10524
10525
53.9k
    SKIP_BLANKS;
10526
53.9k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10527
10528
53.9k
    SKIP_BLANKS;
10529
53.9k
    if ((RAW == '?') && (NXT(1) == '>')) {
10530
33.6k
        SKIP(2);
10531
33.6k
    } else if (RAW == '>') {
10532
        /* Deprecated old WD ... */
10533
6.63k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10534
6.63k
  NEXT;
10535
13.6k
    } else {
10536
13.6k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10537
13.6k
  MOVETO_ENDTAG(CUR_PTR);
10538
13.6k
  NEXT;
10539
13.6k
    }
10540
53.9k
}
10541
10542
/**
10543
 * xmlParseMisc:
10544
 * @ctxt:  an XML parser context
10545
 *
10546
 * parse an XML Misc* optional field.
10547
 *
10548
 * [27] Misc ::= Comment | PI |  S
10549
 */
10550
10551
void
10552
0
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10553
0
    while ((ctxt->instate != XML_PARSER_EOF) &&
10554
0
           (((RAW == '<') && (NXT(1) == '?')) ||
10555
0
            (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10556
0
            IS_BLANK_CH(CUR))) {
10557
0
        if ((RAW == '<') && (NXT(1) == '?')) {
10558
0
      xmlParsePI(ctxt);
10559
0
  } else if (IS_BLANK_CH(CUR)) {
10560
0
      NEXT;
10561
0
  } else
10562
0
      xmlParseComment(ctxt);
10563
0
    }
10564
0
}
10565
10566
/**
10567
 * xmlParseDocument:
10568
 * @ctxt:  an XML parser context
10569
 *
10570
 * parse an XML document (and build a tree if using the standard SAX
10571
 * interface).
10572
 *
10573
 * [1] document ::= prolog element Misc*
10574
 *
10575
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10576
 *
10577
 * Returns 0, -1 in case of error. the parser context is augmented
10578
 *                as a result of the parsing.
10579
 */
10580
10581
int
10582
0
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10583
0
    xmlChar start[4];
10584
0
    xmlCharEncoding enc;
10585
10586
0
    xmlInitParser();
10587
10588
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10589
0
        return(-1);
10590
10591
0
    GROW;
10592
10593
    /*
10594
     * SAX: detecting the level.
10595
     */
10596
0
    xmlDetectSAX2(ctxt);
10597
10598
    /*
10599
     * SAX: beginning of the document processing.
10600
     */
10601
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10602
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10603
0
    if (ctxt->instate == XML_PARSER_EOF)
10604
0
  return(-1);
10605
10606
0
    if ((ctxt->encoding == NULL) &&
10607
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10608
  /*
10609
   * Get the 4 first bytes and decode the charset
10610
   * if enc != XML_CHAR_ENCODING_NONE
10611
   * plug some encoding conversion routines.
10612
   */
10613
0
  start[0] = RAW;
10614
0
  start[1] = NXT(1);
10615
0
  start[2] = NXT(2);
10616
0
  start[3] = NXT(3);
10617
0
  enc = xmlDetectCharEncoding(&start[0], 4);
10618
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10619
0
      xmlSwitchEncoding(ctxt, enc);
10620
0
  }
10621
0
    }
10622
10623
10624
0
    if (CUR == 0) {
10625
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10626
0
  return(-1);
10627
0
    }
10628
10629
    /*
10630
     * Check for the XMLDecl in the Prolog.
10631
     * do not GROW here to avoid the detected encoder to decode more
10632
     * than just the first line, unless the amount of data is really
10633
     * too small to hold "<?xml version="1.0" encoding="foo"
10634
     */
10635
0
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10636
0
       GROW;
10637
0
    }
10638
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10639
10640
  /*
10641
   * Note that we will switch encoding on the fly.
10642
   */
10643
0
  xmlParseXMLDecl(ctxt);
10644
0
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10645
0
      (ctxt->instate == XML_PARSER_EOF)) {
10646
      /*
10647
       * The XML REC instructs us to stop parsing right here
10648
       */
10649
0
      return(-1);
10650
0
  }
10651
0
  ctxt->standalone = ctxt->input->standalone;
10652
0
  SKIP_BLANKS;
10653
0
    } else {
10654
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10655
0
    }
10656
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10657
0
        ctxt->sax->startDocument(ctxt->userData);
10658
0
    if (ctxt->instate == XML_PARSER_EOF)
10659
0
  return(-1);
10660
0
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10661
0
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10662
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10663
0
    }
10664
10665
    /*
10666
     * The Misc part of the Prolog
10667
     */
10668
0
    GROW;
10669
0
    xmlParseMisc(ctxt);
10670
10671
    /*
10672
     * Then possibly doc type declaration(s) and more Misc
10673
     * (doctypedecl Misc*)?
10674
     */
10675
0
    GROW;
10676
0
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10677
10678
0
  ctxt->inSubset = 1;
10679
0
  xmlParseDocTypeDecl(ctxt);
10680
0
  if (RAW == '[') {
10681
0
      ctxt->instate = XML_PARSER_DTD;
10682
0
      xmlParseInternalSubset(ctxt);
10683
0
      if (ctxt->instate == XML_PARSER_EOF)
10684
0
    return(-1);
10685
0
  }
10686
10687
  /*
10688
   * Create and update the external subset.
10689
   */
10690
0
  ctxt->inSubset = 2;
10691
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10692
0
      (!ctxt->disableSAX))
10693
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10694
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10695
0
  if (ctxt->instate == XML_PARSER_EOF)
10696
0
      return(-1);
10697
0
  ctxt->inSubset = 0;
10698
10699
0
        xmlCleanSpecialAttr(ctxt);
10700
10701
0
  ctxt->instate = XML_PARSER_PROLOG;
10702
0
  xmlParseMisc(ctxt);
10703
0
    }
10704
10705
    /*
10706
     * Time to start parsing the tree itself
10707
     */
10708
0
    GROW;
10709
0
    if (RAW != '<') {
10710
0
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10711
0
           "Start tag expected, '<' not found\n");
10712
0
    } else {
10713
0
  ctxt->instate = XML_PARSER_CONTENT;
10714
0
  xmlParseElement(ctxt);
10715
0
  ctxt->instate = XML_PARSER_EPILOG;
10716
10717
10718
  /*
10719
   * The Misc part at the end
10720
   */
10721
0
  xmlParseMisc(ctxt);
10722
10723
0
  if (RAW != 0) {
10724
0
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10725
0
  }
10726
0
  ctxt->instate = XML_PARSER_EOF;
10727
0
    }
10728
10729
    /*
10730
     * SAX: end of the document processing.
10731
     */
10732
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10733
0
        ctxt->sax->endDocument(ctxt->userData);
10734
10735
    /*
10736
     * Remove locally kept entity definitions if the tree was not built
10737
     */
10738
0
    if ((ctxt->myDoc != NULL) &&
10739
0
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10740
0
  xmlFreeDoc(ctxt->myDoc);
10741
0
  ctxt->myDoc = NULL;
10742
0
    }
10743
10744
0
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10745
0
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10746
0
  if (ctxt->valid)
10747
0
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10748
0
  if (ctxt->nsWellFormed)
10749
0
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10750
0
  if (ctxt->options & XML_PARSE_OLD10)
10751
0
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10752
0
    }
10753
0
    if (! ctxt->wellFormed) {
10754
0
  ctxt->valid = 0;
10755
0
  return(-1);
10756
0
    }
10757
0
    return(0);
10758
0
}
10759
10760
/**
10761
 * xmlParseExtParsedEnt:
10762
 * @ctxt:  an XML parser context
10763
 *
10764
 * parse a general parsed entity
10765
 * An external general parsed entity is well-formed if it matches the
10766
 * production labeled extParsedEnt.
10767
 *
10768
 * [78] extParsedEnt ::= TextDecl? content
10769
 *
10770
 * Returns 0, -1 in case of error. the parser context is augmented
10771
 *                as a result of the parsing.
10772
 */
10773
10774
int
10775
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10776
0
    xmlChar start[4];
10777
0
    xmlCharEncoding enc;
10778
10779
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10780
0
        return(-1);
10781
10782
0
    xmlDefaultSAXHandlerInit();
10783
10784
0
    xmlDetectSAX2(ctxt);
10785
10786
0
    GROW;
10787
10788
    /*
10789
     * SAX: beginning of the document processing.
10790
     */
10791
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10792
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10793
10794
    /*
10795
     * Get the 4 first bytes and decode the charset
10796
     * if enc != XML_CHAR_ENCODING_NONE
10797
     * plug some encoding conversion routines.
10798
     */
10799
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10800
0
  start[0] = RAW;
10801
0
  start[1] = NXT(1);
10802
0
  start[2] = NXT(2);
10803
0
  start[3] = NXT(3);
10804
0
  enc = xmlDetectCharEncoding(start, 4);
10805
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10806
0
      xmlSwitchEncoding(ctxt, enc);
10807
0
  }
10808
0
    }
10809
10810
10811
0
    if (CUR == 0) {
10812
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10813
0
    }
10814
10815
    /*
10816
     * Check for the XMLDecl in the Prolog.
10817
     */
10818
0
    GROW;
10819
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10820
10821
  /*
10822
   * Note that we will switch encoding on the fly.
10823
   */
10824
0
  xmlParseXMLDecl(ctxt);
10825
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10826
      /*
10827
       * The XML REC instructs us to stop parsing right here
10828
       */
10829
0
      return(-1);
10830
0
  }
10831
0
  SKIP_BLANKS;
10832
0
    } else {
10833
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10834
0
    }
10835
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10836
0
        ctxt->sax->startDocument(ctxt->userData);
10837
0
    if (ctxt->instate == XML_PARSER_EOF)
10838
0
  return(-1);
10839
10840
    /*
10841
     * Doing validity checking on chunk doesn't make sense
10842
     */
10843
0
    ctxt->instate = XML_PARSER_CONTENT;
10844
0
    ctxt->validate = 0;
10845
0
    ctxt->loadsubset = 0;
10846
0
    ctxt->depth = 0;
10847
10848
0
    xmlParseContent(ctxt);
10849
0
    if (ctxt->instate == XML_PARSER_EOF)
10850
0
  return(-1);
10851
10852
0
    if ((RAW == '<') && (NXT(1) == '/')) {
10853
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10854
0
    } else if (RAW != 0) {
10855
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10856
0
    }
10857
10858
    /*
10859
     * SAX: end of the document processing.
10860
     */
10861
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10862
0
        ctxt->sax->endDocument(ctxt->userData);
10863
10864
0
    if (! ctxt->wellFormed) return(-1);
10865
0
    return(0);
10866
0
}
10867
10868
#ifdef LIBXML_PUSH_ENABLED
10869
/************************************************************************
10870
 *                  *
10871
 *    Progressive parsing interfaces        *
10872
 *                  *
10873
 ************************************************************************/
10874
10875
/**
10876
 * xmlParseLookupSequence:
10877
 * @ctxt:  an XML parser context
10878
 * @first:  the first char to lookup
10879
 * @next:  the next char to lookup or zero
10880
 * @third:  the next char to lookup or zero
10881
 *
10882
 * Try to find if a sequence (first, next, third) or  just (first next) or
10883
 * (first) is available in the input stream.
10884
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10885
 * to avoid rescanning sequences of bytes, it DOES change the state of the
10886
 * parser, do not use liberally.
10887
 *
10888
 * Returns the index to the current parsing point if the full sequence
10889
 *      is available, -1 otherwise.
10890
 */
10891
static int
10892
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10893
8.59M
                       xmlChar next, xmlChar third) {
10894
8.59M
    int base, len;
10895
8.59M
    xmlParserInputPtr in;
10896
8.59M
    const xmlChar *buf;
10897
10898
8.59M
    in = ctxt->input;
10899
8.59M
    if (in == NULL) return(-1);
10900
8.59M
    base = in->cur - in->base;
10901
8.59M
    if (base < 0) return(-1);
10902
8.59M
    if (ctxt->checkIndex > base)
10903
7.17M
        base = ctxt->checkIndex;
10904
8.59M
    if (in->buf == NULL) {
10905
0
  buf = in->base;
10906
0
  len = in->length;
10907
8.59M
    } else {
10908
8.59M
  buf = xmlBufContent(in->buf->buffer);
10909
8.59M
  len = xmlBufUse(in->buf->buffer);
10910
8.59M
    }
10911
    /* take into account the sequence length */
10912
8.59M
    if (third) len -= 2;
10913
7.62M
    else if (next) len --;
10914
7.28G
    for (;base < len;base++) {
10915
7.27G
        if (buf[base] == first) {
10916
38.2M
      if (third != 0) {
10917
15.8M
    if ((buf[base + 1] != next) ||
10918
15.0M
        (buf[base + 2] != third)) continue;
10919
22.3M
      } else if (next != 0) {
10920
22.1M
    if (buf[base + 1] != next) continue;
10921
22.1M
      }
10922
1.27M
      ctxt->checkIndex = 0;
10923
#ifdef DEBUG_PUSH
10924
      if (next == 0)
10925
    xmlGenericError(xmlGenericErrorContext,
10926
      "PP: lookup '%c' found at %d\n",
10927
      first, base);
10928
      else if (third == 0)
10929
    xmlGenericError(xmlGenericErrorContext,
10930
      "PP: lookup '%c%c' found at %d\n",
10931
      first, next, base);
10932
      else
10933
    xmlGenericError(xmlGenericErrorContext,
10934
      "PP: lookup '%c%c%c' found at %d\n",
10935
      first, next, third, base);
10936
#endif
10937
1.27M
      return(base - (in->cur - in->base));
10938
38.2M
  }
10939
7.27G
    }
10940
7.31M
    ctxt->checkIndex = base;
10941
#ifdef DEBUG_PUSH
10942
    if (next == 0)
10943
  xmlGenericError(xmlGenericErrorContext,
10944
    "PP: lookup '%c' failed\n", first);
10945
    else if (third == 0)
10946
  xmlGenericError(xmlGenericErrorContext,
10947
    "PP: lookup '%c%c' failed\n", first, next);
10948
    else
10949
  xmlGenericError(xmlGenericErrorContext,
10950
    "PP: lookup '%c%c%c' failed\n", first, next, third);
10951
#endif
10952
7.31M
    return(-1);
10953
8.59M
}
10954
10955
/**
10956
 * xmlParseGetLasts:
10957
 * @ctxt:  an XML parser context
10958
 * @lastlt:  pointer to store the last '<' from the input
10959
 * @lastgt:  pointer to store the last '>' from the input
10960
 *
10961
 * Lookup the last < and > in the current chunk
10962
 */
10963
static void
10964
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10965
8.66M
                 const xmlChar **lastgt) {
10966
8.66M
    const xmlChar *tmp;
10967
10968
8.66M
    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10969
0
  xmlGenericError(xmlGenericErrorContext,
10970
0
        "Internal error: xmlParseGetLasts\n");
10971
0
  return;
10972
0
    }
10973
8.66M
    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10974
1.75M
        tmp = ctxt->input->end;
10975
1.75M
  tmp--;
10976
7.72G
  while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10977
1.75M
  if (tmp < ctxt->input->base) {
10978
84.8k
      *lastlt = NULL;
10979
84.8k
      *lastgt = NULL;
10980
1.67M
  } else {
10981
1.67M
      *lastlt = tmp;
10982
1.67M
      tmp++;
10983
1.94G
      while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10984
1.94G
          if (*tmp == '\'') {
10985
1.18M
        tmp++;
10986
324M
        while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10987
1.18M
        if (tmp < ctxt->input->end) tmp++;
10988
1.94G
    } else if (*tmp == '"') {
10989
1.95M
        tmp++;
10990
4.45G
        while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10991
1.95M
        if (tmp < ctxt->input->end) tmp++;
10992
1.95M
    } else
10993
1.93G
        tmp++;
10994
1.94G
      }
10995
1.67M
      if (tmp < ctxt->input->end)
10996
617k
          *lastgt = tmp;
10997
1.05M
      else {
10998
1.05M
          tmp = *lastlt;
10999
1.05M
    tmp--;
11000
222M
    while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11001
1.05M
    if (tmp >= ctxt->input->base)
11002
997k
        *lastgt = tmp;
11003
59.0k
    else
11004
59.0k
        *lastgt = NULL;
11005
1.05M
      }
11006
1.67M
  }
11007
6.90M
    } else {
11008
6.90M
        *lastlt = NULL;
11009
6.90M
  *lastgt = NULL;
11010
6.90M
    }
11011
8.66M
}
11012
/**
11013
 * xmlCheckCdataPush:
11014
 * @cur: pointer to the block of characters
11015
 * @len: length of the block in bytes
11016
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11017
 *
11018
 * Check that the block of characters is okay as SCdata content [20]
11019
 *
11020
 * Returns the number of bytes to pass if okay, a negative index where an
11021
 *         UTF-8 error occurred otherwise
11022
 */
11023
static int
11024
750k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11025
750k
    int ix;
11026
750k
    unsigned char c;
11027
750k
    int codepoint;
11028
11029
750k
    if ((utf == NULL) || (len <= 0))
11030
464k
        return(0);
11031
11032
116M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11033
115M
        c = utf[ix];
11034
115M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11035
39.9M
      if (c >= 0x20)
11036
38.8M
    ix++;
11037
1.10M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11038
1.09M
          ix++;
11039
1.99k
      else
11040
1.99k
          return(-ix);
11041
75.8M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11042
474k
      if (ix + 2 > len) return(complete ? -ix : ix);
11043
473k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11044
1.28k
          return(-ix);
11045
471k
      codepoint = (utf[ix] & 0x1f) << 6;
11046
471k
      codepoint |= utf[ix+1] & 0x3f;
11047
471k
      if (!xmlIsCharQ(codepoint))
11048
754
          return(-ix);
11049
471k
      ix += 2;
11050
75.3M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11051
75.3M
      if (ix + 3 > len) return(complete ? -ix : ix);
11052
75.3M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11053
75.2M
          ((utf[ix+2] & 0xc0) != 0x80))
11054
1.96k
        return(-ix);
11055
75.2M
      codepoint = (utf[ix] & 0xf) << 12;
11056
75.2M
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11057
75.2M
      codepoint |= utf[ix+2] & 0x3f;
11058
75.2M
      if (!xmlIsCharQ(codepoint))
11059
2.01k
          return(-ix);
11060
75.2M
      ix += 3;
11061
75.2M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11062
39.4k
      if (ix + 4 > len) return(complete ? -ix : ix);
11063
39.1k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11064
37.9k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11065
36.7k
    ((utf[ix+3] & 0xc0) != 0x80))
11066
4.04k
        return(-ix);
11067
35.1k
      codepoint = (utf[ix] & 0x7) << 18;
11068
35.1k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11069
35.1k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11070
35.1k
      codepoint |= utf[ix+3] & 0x3f;
11071
35.1k
      if (!xmlIsCharQ(codepoint))
11072
2.05k
          return(-ix);
11073
33.0k
      ix += 4;
11074
33.0k
  } else       /* unknown encoding */
11075
1.66k
      return(-ix);
11076
115M
      }
11077
265k
      return(ix);
11078
285k
}
11079
11080
/**
11081
 * xmlParseTryOrFinish:
11082
 * @ctxt:  an XML parser context
11083
 * @terminate:  last chunk indicator
11084
 *
11085
 * Try to progress on parsing
11086
 *
11087
 * Returns zero if no parsing was possible
11088
 */
11089
static int
11090
8.45M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11091
8.45M
    int ret = 0;
11092
8.45M
    int avail, tlen;
11093
8.45M
    xmlChar cur, next;
11094
8.45M
    const xmlChar *lastlt, *lastgt;
11095
11096
8.45M
    if (ctxt->input == NULL)
11097
0
        return(0);
11098
11099
#ifdef DEBUG_PUSH
11100
    switch (ctxt->instate) {
11101
  case XML_PARSER_EOF:
11102
      xmlGenericError(xmlGenericErrorContext,
11103
        "PP: try EOF\n"); break;
11104
  case XML_PARSER_START:
11105
      xmlGenericError(xmlGenericErrorContext,
11106
        "PP: try START\n"); break;
11107
  case XML_PARSER_MISC:
11108
      xmlGenericError(xmlGenericErrorContext,
11109
        "PP: try MISC\n");break;
11110
  case XML_PARSER_COMMENT:
11111
      xmlGenericError(xmlGenericErrorContext,
11112
        "PP: try COMMENT\n");break;
11113
  case XML_PARSER_PROLOG:
11114
      xmlGenericError(xmlGenericErrorContext,
11115
        "PP: try PROLOG\n");break;
11116
  case XML_PARSER_START_TAG:
11117
      xmlGenericError(xmlGenericErrorContext,
11118
        "PP: try START_TAG\n");break;
11119
  case XML_PARSER_CONTENT:
11120
      xmlGenericError(xmlGenericErrorContext,
11121
        "PP: try CONTENT\n");break;
11122
  case XML_PARSER_CDATA_SECTION:
11123
      xmlGenericError(xmlGenericErrorContext,
11124
        "PP: try CDATA_SECTION\n");break;
11125
  case XML_PARSER_END_TAG:
11126
      xmlGenericError(xmlGenericErrorContext,
11127
        "PP: try END_TAG\n");break;
11128
  case XML_PARSER_ENTITY_DECL:
11129
      xmlGenericError(xmlGenericErrorContext,
11130
        "PP: try ENTITY_DECL\n");break;
11131
  case XML_PARSER_ENTITY_VALUE:
11132
      xmlGenericError(xmlGenericErrorContext,
11133
        "PP: try ENTITY_VALUE\n");break;
11134
  case XML_PARSER_ATTRIBUTE_VALUE:
11135
      xmlGenericError(xmlGenericErrorContext,
11136
        "PP: try ATTRIBUTE_VALUE\n");break;
11137
  case XML_PARSER_DTD:
11138
      xmlGenericError(xmlGenericErrorContext,
11139
        "PP: try DTD\n");break;
11140
  case XML_PARSER_EPILOG:
11141
      xmlGenericError(xmlGenericErrorContext,
11142
        "PP: try EPILOG\n");break;
11143
  case XML_PARSER_PI:
11144
      xmlGenericError(xmlGenericErrorContext,
11145
        "PP: try PI\n");break;
11146
        case XML_PARSER_IGNORE:
11147
            xmlGenericError(xmlGenericErrorContext,
11148
        "PP: try IGNORE\n");break;
11149
    }
11150
#endif
11151
11152
8.45M
    if ((ctxt->input != NULL) &&
11153
8.45M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11154
136k
  xmlSHRINK(ctxt);
11155
136k
  ctxt->checkIndex = 0;
11156
136k
    }
11157
8.45M
    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11158
11159
95.3M
    while (ctxt->instate != XML_PARSER_EOF) {
11160
95.3M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11161
5.53k
      return(0);
11162
11163
95.3M
  if (ctxt->input == NULL) break;
11164
95.3M
  if (ctxt->input->buf == NULL)
11165
0
      avail = ctxt->input->length -
11166
0
              (ctxt->input->cur - ctxt->input->base);
11167
95.3M
  else {
11168
      /*
11169
       * If we are operating on converted input, try to flush
11170
       * remainng chars to avoid them stalling in the non-converted
11171
       * buffer. But do not do this in document start where
11172
       * encoding="..." may not have been read and we work on a
11173
       * guessed encoding.
11174
       */
11175
95.3M
      if ((ctxt->instate != XML_PARSER_START) &&
11176
88.6M
          (ctxt->input->buf->raw != NULL) &&
11177
8.27M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11178
485k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11179
485k
                                                 ctxt->input);
11180
485k
    size_t current = ctxt->input->cur - ctxt->input->base;
11181
11182
485k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11183
485k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11184
485k
                                      base, current);
11185
485k
      }
11186
95.3M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11187
95.3M
        (ctxt->input->cur - ctxt->input->base);
11188
95.3M
  }
11189
95.3M
        if (avail < 1)
11190
262k
      goto done;
11191
95.0M
        switch (ctxt->instate) {
11192
0
            case XML_PARSER_EOF:
11193
          /*
11194
     * Document parsing is done !
11195
     */
11196
0
          goto done;
11197
6.67M
            case XML_PARSER_START:
11198
6.67M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11199
205
        xmlChar start[4];
11200
205
        xmlCharEncoding enc;
11201
11202
        /*
11203
         * Very first chars read from the document flow.
11204
         */
11205
205
        if (avail < 4)
11206
205
      goto done;
11207
11208
        /*
11209
         * Get the 4 first bytes and decode the charset
11210
         * if enc != XML_CHAR_ENCODING_NONE
11211
         * plug some encoding conversion routines,
11212
         * else xmlSwitchEncoding will set to (default)
11213
         * UTF8.
11214
         */
11215
0
        start[0] = RAW;
11216
0
        start[1] = NXT(1);
11217
0
        start[2] = NXT(2);
11218
0
        start[3] = NXT(3);
11219
0
        enc = xmlDetectCharEncoding(start, 4);
11220
0
        xmlSwitchEncoding(ctxt, enc);
11221
0
        break;
11222
205
    }
11223
11224
6.67M
    if (avail < 2)
11225
82
        goto done;
11226
6.67M
    cur = ctxt->input->cur[0];
11227
6.67M
    next = ctxt->input->cur[1];
11228
6.67M
    if (cur == 0) {
11229
369
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11230
369
      ctxt->sax->setDocumentLocator(ctxt->userData,
11231
369
                  &xmlDefaultSAXLocator);
11232
369
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11233
369
        xmlHaltParser(ctxt);
11234
#ifdef DEBUG_PUSH
11235
        xmlGenericError(xmlGenericErrorContext,
11236
          "PP: entering EOF\n");
11237
#endif
11238
369
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11239
369
      ctxt->sax->endDocument(ctxt->userData);
11240
369
        goto done;
11241
369
    }
11242
6.67M
          if ((cur == '<') && (next == '?')) {
11243
        /* PI or XML decl */
11244
6.53M
        if (avail < 5) return(ret);
11245
6.53M
        if ((!terminate) &&
11246
6.51M
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11247
6.44M
      return(ret);
11248
91.9k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11249
91.9k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11250
91.9k
                  &xmlDefaultSAXLocator);
11251
91.9k
        if ((ctxt->input->cur[2] == 'x') &&
11252
85.4k
      (ctxt->input->cur[3] == 'm') &&
11253
83.8k
      (ctxt->input->cur[4] == 'l') &&
11254
83.2k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11255
82.7k
      ret += 5;
11256
#ifdef DEBUG_PUSH
11257
      xmlGenericError(xmlGenericErrorContext,
11258
        "PP: Parsing XML Decl\n");
11259
#endif
11260
82.7k
      xmlParseXMLDecl(ctxt);
11261
82.7k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11262
          /*
11263
           * The XML REC instructs us to stop parsing right
11264
           * here
11265
           */
11266
2.49k
          xmlHaltParser(ctxt);
11267
2.49k
          return(0);
11268
2.49k
      }
11269
80.2k
      ctxt->standalone = ctxt->input->standalone;
11270
80.2k
      if ((ctxt->encoding == NULL) &&
11271
46.8k
          (ctxt->input->encoding != NULL))
11272
28.0k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11273
80.2k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11274
80.2k
          (!ctxt->disableSAX))
11275
79.8k
          ctxt->sax->startDocument(ctxt->userData);
11276
80.2k
      ctxt->instate = XML_PARSER_MISC;
11277
#ifdef DEBUG_PUSH
11278
      xmlGenericError(xmlGenericErrorContext,
11279
        "PP: entering MISC\n");
11280
#endif
11281
80.2k
        } else {
11282
9.14k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11283
9.14k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11284
9.14k
          (!ctxt->disableSAX))
11285
9.14k
          ctxt->sax->startDocument(ctxt->userData);
11286
9.14k
      ctxt->instate = XML_PARSER_MISC;
11287
#ifdef DEBUG_PUSH
11288
      xmlGenericError(xmlGenericErrorContext,
11289
        "PP: entering MISC\n");
11290
#endif
11291
9.14k
        }
11292
142k
    } else {
11293
142k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11294
142k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11295
142k
                  &xmlDefaultSAXLocator);
11296
142k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11297
142k
        if (ctxt->version == NULL) {
11298
0
            xmlErrMemory(ctxt, NULL);
11299
0
      break;
11300
0
        }
11301
142k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11302
142k
            (!ctxt->disableSAX))
11303
142k
      ctxt->sax->startDocument(ctxt->userData);
11304
142k
        ctxt->instate = XML_PARSER_MISC;
11305
#ifdef DEBUG_PUSH
11306
        xmlGenericError(xmlGenericErrorContext,
11307
          "PP: entering MISC\n");
11308
#endif
11309
142k
    }
11310
231k
    break;
11311
18.4M
            case XML_PARSER_START_TAG: {
11312
18.4M
          const xmlChar *name;
11313
18.4M
    const xmlChar *prefix = NULL;
11314
18.4M
    const xmlChar *URI = NULL;
11315
18.4M
    int nsNr = ctxt->nsNr;
11316
11317
18.4M
    if ((avail < 2) && (ctxt->inputNr == 1))
11318
0
        goto done;
11319
18.4M
    cur = ctxt->input->cur[0];
11320
18.4M
          if (cur != '<') {
11321
25.8k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11322
25.8k
        xmlHaltParser(ctxt);
11323
25.8k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11324
25.8k
      ctxt->sax->endDocument(ctxt->userData);
11325
25.8k
        goto done;
11326
25.8k
    }
11327
18.4M
    if (!terminate) {
11328
8.05M
        if (ctxt->progressive) {
11329
            /* > can be found unescaped in attribute values */
11330
8.05M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11331
273k
          goto done;
11332
8.05M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11333
0
      goto done;
11334
0
        }
11335
8.05M
    }
11336
18.1M
    if (ctxt->spaceNr == 0)
11337
501k
        spacePush(ctxt, -1);
11338
17.6M
    else if (*ctxt->space == -2)
11339
7.16M
        spacePush(ctxt, -1);
11340
10.5M
    else
11341
10.5M
        spacePush(ctxt, *ctxt->space);
11342
18.1M
#ifdef LIBXML_SAX1_ENABLED
11343
18.1M
    if (ctxt->sax2)
11344
18.1M
#endif /* LIBXML_SAX1_ENABLED */
11345
18.1M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11346
0
#ifdef LIBXML_SAX1_ENABLED
11347
0
    else
11348
0
        name = xmlParseStartTag(ctxt);
11349
18.1M
#endif /* LIBXML_SAX1_ENABLED */
11350
18.1M
    if (ctxt->instate == XML_PARSER_EOF)
11351
468
        goto done;
11352
18.1M
    if (name == NULL) {
11353
6.96k
        spacePop(ctxt);
11354
6.96k
        xmlHaltParser(ctxt);
11355
6.96k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11356
6.96k
      ctxt->sax->endDocument(ctxt->userData);
11357
6.96k
        goto done;
11358
6.96k
    }
11359
18.1M
#ifdef LIBXML_VALID_ENABLED
11360
    /*
11361
     * [ VC: Root Element Type ]
11362
     * The Name in the document type declaration must match
11363
     * the element type of the root element.
11364
     */
11365
18.1M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11366
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11367
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11368
18.1M
#endif /* LIBXML_VALID_ENABLED */
11369
11370
    /*
11371
     * Check for an Empty Element.
11372
     */
11373
18.1M
    if ((RAW == '/') && (NXT(1) == '>')) {
11374
4.44M
        SKIP(2);
11375
11376
4.44M
        if (ctxt->sax2) {
11377
4.44M
      if ((ctxt->sax != NULL) &&
11378
4.44M
          (ctxt->sax->endElementNs != NULL) &&
11379
4.44M
          (!ctxt->disableSAX))
11380
4.44M
          ctxt->sax->endElementNs(ctxt->userData, name,
11381
4.44M
                                  prefix, URI);
11382
4.44M
      if (ctxt->nsNr - nsNr > 0)
11383
7.47k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11384
4.44M
#ifdef LIBXML_SAX1_ENABLED
11385
4.44M
        } else {
11386
0
      if ((ctxt->sax != NULL) &&
11387
0
          (ctxt->sax->endElement != NULL) &&
11388
0
          (!ctxt->disableSAX))
11389
0
          ctxt->sax->endElement(ctxt->userData, name);
11390
0
#endif /* LIBXML_SAX1_ENABLED */
11391
0
        }
11392
4.44M
        if (ctxt->instate == XML_PARSER_EOF)
11393
0
      goto done;
11394
4.44M
        spacePop(ctxt);
11395
4.44M
        if (ctxt->nameNr == 0) {
11396
5.67k
      ctxt->instate = XML_PARSER_EPILOG;
11397
4.44M
        } else {
11398
4.44M
      ctxt->instate = XML_PARSER_CONTENT;
11399
4.44M
        }
11400
4.44M
                    ctxt->progressive = 1;
11401
4.44M
        break;
11402
4.44M
    }
11403
13.7M
    if (RAW == '>') {
11404
8.96M
        NEXT;
11405
8.96M
    } else {
11406
4.77M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11407
4.77M
           "Couldn't find end of Start Tag %s\n",
11408
4.77M
           name);
11409
4.77M
        nodePop(ctxt);
11410
4.77M
        spacePop(ctxt);
11411
4.77M
    }
11412
13.7M
    if (ctxt->sax2)
11413
13.7M
        nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11414
0
#ifdef LIBXML_SAX1_ENABLED
11415
0
    else
11416
0
        namePush(ctxt, name);
11417
13.7M
#endif /* LIBXML_SAX1_ENABLED */
11418
11419
13.7M
    ctxt->instate = XML_PARSER_CONTENT;
11420
13.7M
                ctxt->progressive = 1;
11421
13.7M
                break;
11422
18.1M
      }
11423
60.5M
            case XML_PARSER_CONTENT: {
11424
60.5M
    const xmlChar *test;
11425
60.5M
    unsigned int cons;
11426
60.5M
    if ((avail < 2) && (ctxt->inputNr == 1))
11427
44.4k
        goto done;
11428
60.4M
    cur = ctxt->input->cur[0];
11429
60.4M
    next = ctxt->input->cur[1];
11430
11431
60.4M
    test = CUR_PTR;
11432
60.4M
          cons = ctxt->input->consumed;
11433
60.4M
    if ((cur == '<') && (next == '/')) {
11434
7.69M
        ctxt->instate = XML_PARSER_END_TAG;
11435
7.69M
        break;
11436
52.7M
          } else if ((cur == '<') && (next == '?')) {
11437
657k
        if ((!terminate) &&
11438
373k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11439
326k
                        ctxt->progressive = XML_PARSER_PI;
11440
326k
      goto done;
11441
326k
                    }
11442
331k
        xmlParsePI(ctxt);
11443
331k
        ctxt->instate = XML_PARSER_CONTENT;
11444
331k
                    ctxt->progressive = 1;
11445
52.1M
    } else if ((cur == '<') && (next != '!')) {
11446
18.0M
        ctxt->instate = XML_PARSER_START_TAG;
11447
18.0M
        break;
11448
34.1M
    } else if ((cur == '<') && (next == '!') &&
11449
867k
               (ctxt->input->cur[2] == '-') &&
11450
129k
         (ctxt->input->cur[3] == '-')) {
11451
128k
        int term;
11452
11453
128k
              if (avail < 4)
11454
0
            goto done;
11455
128k
        ctxt->input->cur += 4;
11456
128k
        term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11457
128k
        ctxt->input->cur -= 4;
11458
128k
        if ((!terminate) && (term < 0)) {
11459
35.9k
                        ctxt->progressive = XML_PARSER_COMMENT;
11460
35.9k
      goto done;
11461
35.9k
                    }
11462
92.7k
        xmlParseComment(ctxt);
11463
92.7k
        ctxt->instate = XML_PARSER_CONTENT;
11464
92.7k
                    ctxt->progressive = 1;
11465
33.9M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11466
738k
        (ctxt->input->cur[2] == '[') &&
11467
734k
        (ctxt->input->cur[3] == 'C') &&
11468
732k
        (ctxt->input->cur[4] == 'D') &&
11469
730k
        (ctxt->input->cur[5] == 'A') &&
11470
728k
        (ctxt->input->cur[6] == 'T') &&
11471
726k
        (ctxt->input->cur[7] == 'A') &&
11472
724k
        (ctxt->input->cur[8] == '[')) {
11473
722k
        SKIP(9);
11474
722k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11475
722k
        break;
11476
33.2M
    } else if ((cur == '<') && (next == '!') &&
11477
16.7k
               (avail < 9)) {
11478
15.2k
        goto done;
11479
33.2M
    } else if (cur == '&') {
11480
1.91M
        if ((!terminate) &&
11481
458k
            (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11482
352k
      goto done;
11483
1.56M
        xmlParseReference(ctxt);
11484
31.3M
    } else {
11485
        /* TODO Avoid the extra copy, handle directly !!! */
11486
        /*
11487
         * Goal of the following test is:
11488
         *  - minimize calls to the SAX 'character' callback
11489
         *    when they are mergeable
11490
         *  - handle an problem for isBlank when we only parse
11491
         *    a sequence of blank chars and the next one is
11492
         *    not available to check against '<' presence.
11493
         *  - tries to homogenize the differences in SAX
11494
         *    callbacks between the push and pull versions
11495
         *    of the parser.
11496
         */
11497
31.3M
        if ((ctxt->inputNr == 1) &&
11498
31.3M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11499
3.91M
      if (!terminate) {
11500
3.43M
          if (ctxt->progressive) {
11501
3.43M
        if ((lastlt == NULL) ||
11502
3.43M
            (ctxt->input->cur > lastlt))
11503
217k
            goto done;
11504
3.43M
          } else if (xmlParseLookupSequence(ctxt,
11505
0
                                            '<', 0, 0) < 0) {
11506
0
        goto done;
11507
0
          }
11508
3.43M
      }
11509
3.91M
                    }
11510
31.1M
        ctxt->checkIndex = 0;
11511
31.1M
        xmlParseCharData(ctxt, 0);
11512
31.1M
    }
11513
33.0M
    if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11514
12.3k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11515
12.3k
                    "detected an error in element content\n");
11516
12.3k
        xmlHaltParser(ctxt);
11517
12.3k
        break;
11518
12.3k
    }
11519
33.0M
    break;
11520
33.0M
      }
11521
33.0M
            case XML_PARSER_END_TAG:
11522
7.72M
    if (avail < 2)
11523
0
        goto done;
11524
7.72M
    if (!terminate) {
11525
2.80M
        if (ctxt->progressive) {
11526
            /* > can be found unescaped in attribute values */
11527
2.80M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11528
30.7k
          goto done;
11529
2.80M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11530
0
      goto done;
11531
0
        }
11532
2.80M
    }
11533
7.69M
    if (ctxt->sax2) {
11534
7.69M
        xmlParseEndTag2(ctxt,
11535
7.69M
                (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11536
7.69M
                (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11537
7.69M
                (int) (ptrdiff_t)
11538
7.69M
                                ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11539
7.69M
        nameNsPop(ctxt);
11540
7.69M
    }
11541
0
#ifdef LIBXML_SAX1_ENABLED
11542
0
      else
11543
0
        xmlParseEndTag1(ctxt, 0);
11544
7.69M
#endif /* LIBXML_SAX1_ENABLED */
11545
7.69M
    if (ctxt->instate == XML_PARSER_EOF) {
11546
        /* Nothing */
11547
7.69M
    } else if (ctxt->nameNr == 0) {
11548
41.9k
        ctxt->instate = XML_PARSER_EPILOG;
11549
7.65M
    } else {
11550
7.65M
        ctxt->instate = XML_PARSER_CONTENT;
11551
7.65M
    }
11552
7.69M
    break;
11553
812k
            case XML_PARSER_CDATA_SECTION: {
11554
          /*
11555
     * The Push mode need to have the SAX callback for
11556
     * cdataBlock merge back contiguous callbacks.
11557
     */
11558
812k
    int base;
11559
11560
812k
    base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11561
812k
    if (base < 0) {
11562
92.0k
        if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11563
30.3k
            int tmp;
11564
11565
30.3k
      tmp = xmlCheckCdataPush(ctxt->input->cur,
11566
30.3k
                              XML_PARSER_BIG_BUFFER_SIZE, 0);
11567
30.3k
      if (tmp < 0) {
11568
267
          tmp = -tmp;
11569
267
          ctxt->input->cur += tmp;
11570
267
          goto encoding_error;
11571
267
      }
11572
30.0k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11573
30.0k
          if (ctxt->sax->cdataBlock != NULL)
11574
30.0k
        ctxt->sax->cdataBlock(ctxt->userData,
11575
30.0k
                              ctxt->input->cur, tmp);
11576
0
          else if (ctxt->sax->characters != NULL)
11577
0
        ctxt->sax->characters(ctxt->userData,
11578
0
                              ctxt->input->cur, tmp);
11579
30.0k
      }
11580
30.0k
      if (ctxt->instate == XML_PARSER_EOF)
11581
0
          goto done;
11582
30.0k
      SKIPL(tmp);
11583
30.0k
      ctxt->checkIndex = 0;
11584
30.0k
        }
11585
91.8k
        goto done;
11586
720k
    } else {
11587
720k
        int tmp;
11588
11589
720k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11590
720k
        if ((tmp < 0) || (tmp != base)) {
11591
1.10k
      tmp = -tmp;
11592
1.10k
      ctxt->input->cur += tmp;
11593
1.10k
      goto encoding_error;
11594
1.10k
        }
11595
719k
        if ((ctxt->sax != NULL) && (base == 0) &&
11596
464k
            (ctxt->sax->cdataBlock != NULL) &&
11597
464k
            (!ctxt->disableSAX)) {
11598
      /*
11599
       * Special case to provide identical behaviour
11600
       * between pull and push parsers on enpty CDATA
11601
       * sections
11602
       */
11603
464k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11604
464k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11605
464k
                     "<![CDATA[", 9)))
11606
464k
           ctxt->sax->cdataBlock(ctxt->userData,
11607
464k
                                 BAD_CAST "", 0);
11608
464k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11609
254k
      (!ctxt->disableSAX)) {
11610
254k
      if (ctxt->sax->cdataBlock != NULL)
11611
254k
          ctxt->sax->cdataBlock(ctxt->userData,
11612
254k
              ctxt->input->cur, base);
11613
0
      else if (ctxt->sax->characters != NULL)
11614
0
          ctxt->sax->characters(ctxt->userData,
11615
0
              ctxt->input->cur, base);
11616
254k
        }
11617
719k
        if (ctxt->instate == XML_PARSER_EOF)
11618
0
      goto done;
11619
719k
        SKIPL(base + 3);
11620
719k
        ctxt->checkIndex = 0;
11621
719k
        ctxt->instate = XML_PARSER_CONTENT;
11622
#ifdef DEBUG_PUSH
11623
        xmlGenericError(xmlGenericErrorContext,
11624
          "PP: entering CONTENT\n");
11625
#endif
11626
719k
    }
11627
719k
    break;
11628
812k
      }
11629
719k
            case XML_PARSER_MISC:
11630
583k
    SKIP_BLANKS;
11631
583k
    if (ctxt->input->buf == NULL)
11632
0
        avail = ctxt->input->length -
11633
0
                (ctxt->input->cur - ctxt->input->base);
11634
583k
    else
11635
583k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11636
583k
                (ctxt->input->cur - ctxt->input->base);
11637
583k
    if (avail < 2)
11638
114k
        goto done;
11639
469k
    cur = ctxt->input->cur[0];
11640
469k
    next = ctxt->input->cur[1];
11641
469k
          if ((cur == '<') && (next == '?')) {
11642
199k
        if ((!terminate) &&
11643
191k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11644
17.7k
                        ctxt->progressive = XML_PARSER_PI;
11645
17.7k
      goto done;
11646
17.7k
                    }
11647
#ifdef DEBUG_PUSH
11648
        xmlGenericError(xmlGenericErrorContext,
11649
          "PP: Parsing PI\n");
11650
#endif
11651
181k
        xmlParsePI(ctxt);
11652
181k
        if (ctxt->instate == XML_PARSER_EOF)
11653
1
      goto done;
11654
181k
        ctxt->instate = XML_PARSER_MISC;
11655
181k
                    ctxt->progressive = 1;
11656
181k
        ctxt->checkIndex = 0;
11657
269k
    } else if ((cur == '<') && (next == '!') &&
11658
116k
        (ctxt->input->cur[2] == '-') &&
11659
28.7k
        (ctxt->input->cur[3] == '-')) {
11660
26.9k
        if ((!terminate) &&
11661
22.2k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11662
4.18k
                        ctxt->progressive = XML_PARSER_COMMENT;
11663
4.18k
      goto done;
11664
4.18k
                    }
11665
#ifdef DEBUG_PUSH
11666
        xmlGenericError(xmlGenericErrorContext,
11667
          "PP: Parsing Comment\n");
11668
#endif
11669
22.7k
        xmlParseComment(ctxt);
11670
22.7k
        if (ctxt->instate == XML_PARSER_EOF)
11671
0
      goto done;
11672
22.7k
        ctxt->instate = XML_PARSER_MISC;
11673
22.7k
                    ctxt->progressive = 1;
11674
22.7k
        ctxt->checkIndex = 0;
11675
242k
    } else if ((cur == '<') && (next == '!') &&
11676
89.5k
        (ctxt->input->cur[2] == 'D') &&
11677
87.1k
        (ctxt->input->cur[3] == 'O') &&
11678
85.0k
        (ctxt->input->cur[4] == 'C') &&
11679
82.9k
        (ctxt->input->cur[5] == 'T') &&
11680
79.7k
        (ctxt->input->cur[6] == 'Y') &&
11681
77.7k
        (ctxt->input->cur[7] == 'P') &&
11682
76.1k
        (ctxt->input->cur[8] == 'E')) {
11683
74.6k
        if ((!terminate) &&
11684
64.0k
            (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11685
5.33k
                        ctxt->progressive = XML_PARSER_DTD;
11686
5.33k
      goto done;
11687
5.33k
                    }
11688
#ifdef DEBUG_PUSH
11689
        xmlGenericError(xmlGenericErrorContext,
11690
          "PP: Parsing internal subset\n");
11691
#endif
11692
69.3k
        ctxt->inSubset = 1;
11693
69.3k
                    ctxt->progressive = 0;
11694
69.3k
        ctxt->checkIndex = 0;
11695
69.3k
        xmlParseDocTypeDecl(ctxt);
11696
69.3k
        if (ctxt->instate == XML_PARSER_EOF)
11697
0
      goto done;
11698
69.3k
        if (RAW == '[') {
11699
61.0k
      ctxt->instate = XML_PARSER_DTD;
11700
#ifdef DEBUG_PUSH
11701
      xmlGenericError(xmlGenericErrorContext,
11702
        "PP: entering DTD\n");
11703
#endif
11704
61.0k
        } else {
11705
      /*
11706
       * Create and update the external subset.
11707
       */
11708
8.30k
      ctxt->inSubset = 2;
11709
8.30k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11710
8.30k
          (ctxt->sax->externalSubset != NULL))
11711
8.30k
          ctxt->sax->externalSubset(ctxt->userData,
11712
8.30k
            ctxt->intSubName, ctxt->extSubSystem,
11713
8.30k
            ctxt->extSubURI);
11714
8.30k
      ctxt->inSubset = 0;
11715
8.30k
      xmlCleanSpecialAttr(ctxt);
11716
8.30k
      ctxt->instate = XML_PARSER_PROLOG;
11717
#ifdef DEBUG_PUSH
11718
      xmlGenericError(xmlGenericErrorContext,
11719
        "PP: entering PROLOG\n");
11720
#endif
11721
8.30k
        }
11722
168k
    } else if ((cur == '<') && (next == '!') &&
11723
14.8k
               (avail < 9)) {
11724
14.6k
        goto done;
11725
153k
    } else {
11726
153k
        ctxt->instate = XML_PARSER_START_TAG;
11727
153k
        ctxt->progressive = XML_PARSER_START_TAG;
11728
153k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11729
#ifdef DEBUG_PUSH
11730
        xmlGenericError(xmlGenericErrorContext,
11731
          "PP: entering START_TAG\n");
11732
#endif
11733
153k
    }
11734
427k
    break;
11735
427k
            case XML_PARSER_PROLOG:
11736
80.5k
    SKIP_BLANKS;
11737
80.5k
    if (ctxt->input->buf == NULL)
11738
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11739
80.5k
    else
11740
80.5k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11741
80.5k
                            (ctxt->input->cur - ctxt->input->base);
11742
80.5k
    if (avail < 2)
11743
3.28k
        goto done;
11744
77.2k
    cur = ctxt->input->cur[0];
11745
77.2k
    next = ctxt->input->cur[1];
11746
77.2k
          if ((cur == '<') && (next == '?')) {
11747
13.4k
        if ((!terminate) &&
11748
12.3k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11749
2.72k
                        ctxt->progressive = XML_PARSER_PI;
11750
2.72k
      goto done;
11751
2.72k
                    }
11752
#ifdef DEBUG_PUSH
11753
        xmlGenericError(xmlGenericErrorContext,
11754
          "PP: Parsing PI\n");
11755
#endif
11756
10.6k
        xmlParsePI(ctxt);
11757
10.6k
        if (ctxt->instate == XML_PARSER_EOF)
11758
0
      goto done;
11759
10.6k
        ctxt->instate = XML_PARSER_PROLOG;
11760
10.6k
                    ctxt->progressive = 1;
11761
63.8k
    } else if ((cur == '<') && (next == '!') &&
11762
9.54k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11763
5.08k
        if ((!terminate) &&
11764
3.95k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11765
1.86k
                        ctxt->progressive = XML_PARSER_COMMENT;
11766
1.86k
      goto done;
11767
1.86k
                    }
11768
#ifdef DEBUG_PUSH
11769
        xmlGenericError(xmlGenericErrorContext,
11770
          "PP: Parsing Comment\n");
11771
#endif
11772
3.21k
        xmlParseComment(ctxt);
11773
3.21k
        if (ctxt->instate == XML_PARSER_EOF)
11774
0
      goto done;
11775
3.21k
        ctxt->instate = XML_PARSER_PROLOG;
11776
3.21k
                    ctxt->progressive = 1;
11777
58.7k
    } else if ((cur == '<') && (next == '!') &&
11778
4.46k
               (avail < 4)) {
11779
3.31k
        goto done;
11780
55.4k
    } else {
11781
55.4k
        ctxt->instate = XML_PARSER_START_TAG;
11782
55.4k
        if (ctxt->progressive == 0)
11783
54.2k
      ctxt->progressive = XML_PARSER_START_TAG;
11784
55.4k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11785
#ifdef DEBUG_PUSH
11786
        xmlGenericError(xmlGenericErrorContext,
11787
          "PP: entering START_TAG\n");
11788
#endif
11789
55.4k
    }
11790
69.3k
    break;
11791
69.3k
            case XML_PARSER_EPILOG:
11792
42.3k
    SKIP_BLANKS;
11793
42.3k
    if (ctxt->input->buf == NULL)
11794
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11795
42.3k
    else
11796
42.3k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11797
42.3k
                            (ctxt->input->cur - ctxt->input->base);
11798
42.3k
    if (avail < 2)
11799
15.7k
        goto done;
11800
26.5k
    cur = ctxt->input->cur[0];
11801
26.5k
    next = ctxt->input->cur[1];
11802
26.5k
          if ((cur == '<') && (next == '?')) {
11803
13.8k
        if ((!terminate) &&
11804
12.2k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11805
8.61k
                        ctxt->progressive = XML_PARSER_PI;
11806
8.61k
      goto done;
11807
8.61k
                    }
11808
#ifdef DEBUG_PUSH
11809
        xmlGenericError(xmlGenericErrorContext,
11810
          "PP: Parsing PI\n");
11811
#endif
11812
5.27k
        xmlParsePI(ctxt);
11813
5.27k
        if (ctxt->instate == XML_PARSER_EOF)
11814
0
      goto done;
11815
5.27k
        ctxt->instate = XML_PARSER_EPILOG;
11816
5.27k
                    ctxt->progressive = 1;
11817
12.6k
    } else if ((cur == '<') && (next == '!') &&
11818
11.9k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11819
7.07k
        if ((!terminate) &&
11820
5.72k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11821
1.48k
                        ctxt->progressive = XML_PARSER_COMMENT;
11822
1.48k
      goto done;
11823
1.48k
                    }
11824
#ifdef DEBUG_PUSH
11825
        xmlGenericError(xmlGenericErrorContext,
11826
          "PP: Parsing Comment\n");
11827
#endif
11828
5.58k
        xmlParseComment(ctxt);
11829
5.58k
        if (ctxt->instate == XML_PARSER_EOF)
11830
0
      goto done;
11831
5.58k
        ctxt->instate = XML_PARSER_EPILOG;
11832
5.58k
                    ctxt->progressive = 1;
11833
5.60k
    } else if ((cur == '<') && (next == '!') &&
11834
4.84k
               (avail < 4)) {
11835
4.80k
        goto done;
11836
4.80k
    } else {
11837
804
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11838
804
        xmlHaltParser(ctxt);
11839
#ifdef DEBUG_PUSH
11840
        xmlGenericError(xmlGenericErrorContext,
11841
          "PP: entering EOF\n");
11842
#endif
11843
804
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11844
804
      ctxt->sax->endDocument(ctxt->userData);
11845
804
        goto done;
11846
804
    }
11847
10.8k
    break;
11848
154k
            case XML_PARSER_DTD: {
11849
          /*
11850
     * Sorry but progressive parsing of the internal subset
11851
     * is not expected to be supported. We first check that
11852
     * the full content of the internal subset is available and
11853
     * the parsing is launched only at that point.
11854
     * Internal subset ends up with "']' S? '>'" in an unescaped
11855
     * section and not in a ']]>' sequence which are conditional
11856
     * sections (whoever argued to keep that crap in XML deserve
11857
     * a place in hell !).
11858
     */
11859
154k
    int base, i;
11860
154k
    xmlChar *buf;
11861
154k
          xmlChar quote = 0;
11862
154k
                size_t use;
11863
11864
154k
    base = ctxt->input->cur - ctxt->input->base;
11865
154k
    if (base < 0) return(0);
11866
154k
    if (ctxt->checkIndex > base)
11867
32.7k
        base = ctxt->checkIndex;
11868
154k
    buf = xmlBufContent(ctxt->input->buf->buffer);
11869
154k
                use = xmlBufUse(ctxt->input->buf->buffer);
11870
33.5G
    for (;(unsigned int) base < use; base++) {
11871
33.5G
        if (quote != 0) {
11872
29.1G
            if (buf[base] == quote)
11873
18.0M
          quote = 0;
11874
29.1G
      continue;
11875
29.1G
        }
11876
4.36G
        if ((quote == 0) && (buf[base] == '<')) {
11877
20.7M
            int found  = 0;
11878
      /* special handling of comments */
11879
20.7M
            if (((unsigned int) base + 4 < use) &&
11880
20.7M
          (buf[base + 1] == '!') &&
11881
12.7M
          (buf[base + 2] == '-') &&
11882
134k
          (buf[base + 3] == '-')) {
11883
841M
          for (;(unsigned int) base + 3 < use; base++) {
11884
841M
        if ((buf[base] == '-') &&
11885
2.52M
            (buf[base + 1] == '-') &&
11886
1.07M
            (buf[base + 2] == '>')) {
11887
127k
            found = 1;
11888
127k
            base += 2;
11889
127k
            break;
11890
127k
        }
11891
841M
                }
11892
132k
          if (!found) {
11893
#if 0
11894
              fprintf(stderr, "unfinished comment\n");
11895
#endif
11896
4.76k
              break; /* for */
11897
4.76k
                }
11898
127k
                continue;
11899
132k
      }
11900
20.7M
        }
11901
4.36G
        if (buf[base] == '"') {
11902
4.45M
            quote = '"';
11903
4.45M
      continue;
11904
4.45M
        }
11905
4.36G
        if (buf[base] == '\'') {
11906
13.6M
            quote = '\'';
11907
13.6M
      continue;
11908
13.6M
        }
11909
4.34G
        if (buf[base] == ']') {
11910
#if 0
11911
            fprintf(stderr, "%c%c%c%c: ", buf[base],
11912
              buf[base + 1], buf[base + 2], buf[base + 3]);
11913
#endif
11914
1.84M
            if ((unsigned int) base +1 >= use)
11915
1.09k
          break;
11916
1.84M
      if (buf[base + 1] == ']') {
11917
          /* conditional crap, skip both ']' ! */
11918
1.20M
          base++;
11919
1.20M
          continue;
11920
1.20M
      }
11921
2.80M
            for (i = 1; (unsigned int) base + i < use; i++) {
11922
2.80M
          if (buf[base + i] == '>') {
11923
#if 0
11924
              fprintf(stderr, "found\n");
11925
#endif
11926
59.4k
              goto found_end_int_subset;
11927
59.4k
          }
11928
2.74M
          if (!IS_BLANK_CH(buf[base + i])) {
11929
#if 0
11930
              fprintf(stderr, "not found\n");
11931
#endif
11932
579k
              goto not_end_of_int_subset;
11933
579k
          }
11934
2.74M
      }
11935
#if 0
11936
      fprintf(stderr, "end of stream\n");
11937
#endif
11938
964
            break;
11939
11940
639k
        }
11941
4.34G
not_end_of_int_subset:
11942
4.34G
                    continue; /* for */
11943
4.34G
    }
11944
    /*
11945
     * We didn't found the end of the Internal subset
11946
     */
11947
95.3k
                if (quote == 0)
11948
33.6k
                    ctxt->checkIndex = base;
11949
61.7k
                else
11950
61.7k
                    ctxt->checkIndex = 0;
11951
#ifdef DEBUG_PUSH
11952
    if (next == 0)
11953
        xmlGenericError(xmlGenericErrorContext,
11954
          "PP: lookup of int subset end filed\n");
11955
#endif
11956
95.3k
          goto done;
11957
11958
59.4k
found_end_int_subset:
11959
59.4k
                ctxt->checkIndex = 0;
11960
59.4k
    xmlParseInternalSubset(ctxt);
11961
59.4k
    if (ctxt->instate == XML_PARSER_EOF)
11962
6.97k
        goto done;
11963
52.5k
    ctxt->inSubset = 2;
11964
52.5k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11965
51.9k
        (ctxt->sax->externalSubset != NULL))
11966
51.9k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11967
51.9k
          ctxt->extSubSystem, ctxt->extSubURI);
11968
52.5k
    ctxt->inSubset = 0;
11969
52.5k
    xmlCleanSpecialAttr(ctxt);
11970
52.5k
    if (ctxt->instate == XML_PARSER_EOF)
11971
0
        goto done;
11972
52.5k
    ctxt->instate = XML_PARSER_PROLOG;
11973
52.5k
    ctxt->checkIndex = 0;
11974
#ifdef DEBUG_PUSH
11975
    xmlGenericError(xmlGenericErrorContext,
11976
      "PP: entering PROLOG\n");
11977
#endif
11978
52.5k
                break;
11979
52.5k
      }
11980
0
            case XML_PARSER_COMMENT:
11981
0
    xmlGenericError(xmlGenericErrorContext,
11982
0
      "PP: internal error, state == COMMENT\n");
11983
0
    ctxt->instate = XML_PARSER_CONTENT;
11984
#ifdef DEBUG_PUSH
11985
    xmlGenericError(xmlGenericErrorContext,
11986
      "PP: entering CONTENT\n");
11987
#endif
11988
0
    break;
11989
0
            case XML_PARSER_IGNORE:
11990
0
    xmlGenericError(xmlGenericErrorContext,
11991
0
      "PP: internal error, state == IGNORE");
11992
0
          ctxt->instate = XML_PARSER_DTD;
11993
#ifdef DEBUG_PUSH
11994
    xmlGenericError(xmlGenericErrorContext,
11995
      "PP: entering DTD\n");
11996
#endif
11997
0
          break;
11998
0
            case XML_PARSER_PI:
11999
0
    xmlGenericError(xmlGenericErrorContext,
12000
0
      "PP: internal error, state == PI\n");
12001
0
    ctxt->instate = XML_PARSER_CONTENT;
12002
#ifdef DEBUG_PUSH
12003
    xmlGenericError(xmlGenericErrorContext,
12004
      "PP: entering CONTENT\n");
12005
#endif
12006
0
    break;
12007
0
            case XML_PARSER_ENTITY_DECL:
12008
0
    xmlGenericError(xmlGenericErrorContext,
12009
0
      "PP: internal error, state == ENTITY_DECL\n");
12010
0
    ctxt->instate = XML_PARSER_DTD;
12011
#ifdef DEBUG_PUSH
12012
    xmlGenericError(xmlGenericErrorContext,
12013
      "PP: entering DTD\n");
12014
#endif
12015
0
    break;
12016
0
            case XML_PARSER_ENTITY_VALUE:
12017
0
    xmlGenericError(xmlGenericErrorContext,
12018
0
      "PP: internal error, state == ENTITY_VALUE\n");
12019
0
    ctxt->instate = XML_PARSER_CONTENT;
12020
#ifdef DEBUG_PUSH
12021
    xmlGenericError(xmlGenericErrorContext,
12022
      "PP: entering DTD\n");
12023
#endif
12024
0
    break;
12025
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12026
0
    xmlGenericError(xmlGenericErrorContext,
12027
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12028
0
    ctxt->instate = XML_PARSER_START_TAG;
12029
#ifdef DEBUG_PUSH
12030
    xmlGenericError(xmlGenericErrorContext,
12031
      "PP: entering START_TAG\n");
12032
#endif
12033
0
    break;
12034
0
            case XML_PARSER_SYSTEM_LITERAL:
12035
0
    xmlGenericError(xmlGenericErrorContext,
12036
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12037
0
    ctxt->instate = XML_PARSER_START_TAG;
12038
#ifdef DEBUG_PUSH
12039
    xmlGenericError(xmlGenericErrorContext,
12040
      "PP: entering START_TAG\n");
12041
#endif
12042
0
    break;
12043
0
            case XML_PARSER_PUBLIC_LITERAL:
12044
0
    xmlGenericError(xmlGenericErrorContext,
12045
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12046
0
    ctxt->instate = XML_PARSER_START_TAG;
12047
#ifdef DEBUG_PUSH
12048
    xmlGenericError(xmlGenericErrorContext,
12049
      "PP: entering START_TAG\n");
12050
#endif
12051
0
    break;
12052
95.0M
  }
12053
95.0M
    }
12054
1.99M
done:
12055
#ifdef DEBUG_PUSH
12056
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12057
#endif
12058
1.99M
    return(ret);
12059
1.37k
encoding_error:
12060
1.37k
    {
12061
1.37k
        char buffer[150];
12062
12063
1.37k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12064
1.37k
      ctxt->input->cur[0], ctxt->input->cur[1],
12065
1.37k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12066
1.37k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12067
1.37k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12068
1.37k
         BAD_CAST buffer, NULL);
12069
1.37k
    }
12070
1.37k
    return(0);
12071
8.45M
}
12072
12073
/**
12074
 * xmlParseCheckTransition:
12075
 * @ctxt:  an XML parser context
12076
 * @chunk:  a char array
12077
 * @size:  the size in byte of the chunk
12078
 *
12079
 * Check depending on the current parser state if the chunk given must be
12080
 * processed immediately or one need more data to advance on parsing.
12081
 *
12082
 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12083
 */
12084
static int
12085
6.57M
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12086
6.57M
    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12087
0
        return(-1);
12088
6.57M
    if (ctxt->instate == XML_PARSER_START_TAG) {
12089
1.12M
        if (memchr(chunk, '>', size) != NULL)
12090
222k
            return(1);
12091
906k
        return(0);
12092
1.12M
    }
12093
5.44M
    if (ctxt->progressive == XML_PARSER_COMMENT) {
12094
59.6k
        if (memchr(chunk, '>', size) != NULL)
12095
40.5k
            return(1);
12096
19.1k
        return(0);
12097
59.6k
    }
12098
5.38M
    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12099
123k
        if (memchr(chunk, '>', size) != NULL)
12100
90.6k
            return(1);
12101
32.6k
        return(0);
12102
123k
    }
12103
5.26M
    if (ctxt->progressive == XML_PARSER_PI) {
12104
535k
        if (memchr(chunk, '>', size) != NULL)
12105
331k
            return(1);
12106
204k
        return(0);
12107
535k
    }
12108
4.73M
    if (ctxt->instate == XML_PARSER_END_TAG) {
12109
64.9k
        if (memchr(chunk, '>', size) != NULL)
12110
26.7k
            return(1);
12111
38.2k
        return(0);
12112
64.9k
    }
12113
4.66M
    if ((ctxt->progressive == XML_PARSER_DTD) ||
12114
4.37M
        (ctxt->instate == XML_PARSER_DTD)) {
12115
1.44M
        if (memchr(chunk, '>', size) != NULL)
12116
87.6k
            return(1);
12117
1.35M
        return(0);
12118
1.44M
    }
12119
3.22M
    return(1);
12120
4.66M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
6.76M
              int terminate) {
12136
6.76M
    int end_in_lf = 0;
12137
6.76M
    int remain = 0;
12138
6.76M
    size_t old_avail = 0;
12139
6.76M
    size_t avail = 0;
12140
12141
6.76M
    if (ctxt == NULL)
12142
0
        return(XML_ERR_INTERNAL_ERROR);
12143
6.76M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12144
10.3k
        return(ctxt->errNo);
12145
6.75M
    if (ctxt->instate == XML_PARSER_EOF)
12146
18.9k
        return(-1);
12147
6.73M
    if (ctxt->instate == XML_PARSER_START)
12148
2.40M
        xmlDetectSAX2(ctxt);
12149
6.73M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12150
6.57M
        (chunk[size - 1] == '\r')) {
12151
20.9k
  end_in_lf = 1;
12152
20.9k
  size--;
12153
20.9k
    }
12154
12155
11.0M
xmldecl_done:
12156
12157
11.0M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12158
10.9M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12159
10.9M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12160
10.9M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12161
10.9M
  int res;
12162
12163
10.9M
        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12164
        /*
12165
         * Specific handling if we autodetected an encoding, we should not
12166
         * push more than the first line ... which depend on the encoding
12167
         * And only push the rest once the final encoding was detected
12168
         */
12169
10.9M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12170
6.65M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12171
5.13M
            unsigned int len = 45;
12172
12173
5.13M
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12174
5.13M
                               BAD_CAST "UTF-16")) ||
12175
4.96k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12176
4.96k
                               BAD_CAST "UTF16")))
12177
5.12M
                len = 90;
12178
4.96k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12179
4.96k
                                    BAD_CAST "UCS-4")) ||
12180
3.34k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12181
3.34k
                                    BAD_CAST "UCS4")))
12182
1.62k
                len = 180;
12183
12184
5.13M
            if (ctxt->input->buf->rawconsumed < len)
12185
4.00k
                len -= ctxt->input->buf->rawconsumed;
12186
12187
            /*
12188
             * Change size for reading the initial declaration only
12189
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12190
             * will blindly copy extra bytes from memory.
12191
             */
12192
5.13M
            if ((unsigned int) size > len) {
12193
4.27M
                remain = size - len;
12194
4.27M
                size = len;
12195
4.27M
            } else {
12196
857k
                remain = 0;
12197
857k
            }
12198
5.13M
        }
12199
10.9M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12200
10.9M
  if (res < 0) {
12201
421
      ctxt->errNo = XML_PARSER_EOF;
12202
421
      xmlHaltParser(ctxt);
12203
421
      return (XML_PARSER_EOF);
12204
421
  }
12205
10.9M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12206
#ifdef DEBUG_PUSH
12207
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12208
#endif
12209
12210
10.9M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12211
94.0k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12212
94.0k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12213
94.0k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12214
7.35k
        (in->raw != NULL)) {
12215
7.35k
    int nbchars;
12216
7.35k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12217
7.35k
    size_t current = ctxt->input->cur - ctxt->input->base;
12218
12219
7.35k
    nbchars = xmlCharEncInput(in, terminate);
12220
7.35k
    if (nbchars < 0) {
12221
        /* TODO 2.6.0 */
12222
232
        xmlGenericError(xmlGenericErrorContext,
12223
232
            "xmlParseChunk: encoder error\n");
12224
232
        return(XML_ERR_INVALID_ENCODING);
12225
232
    }
12226
7.12k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12227
7.12k
      }
12228
94.0k
  }
12229
94.0k
    }
12230
11.0M
    if (remain != 0) {
12231
4.27M
        xmlParseTryOrFinish(ctxt, 0);
12232
6.73M
    } else {
12233
6.73M
        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12234
6.73M
            avail = xmlBufUse(ctxt->input->buf->buffer);
12235
        /*
12236
         * Depending on the current state it may not be such
12237
         * a good idea to try parsing if there is nothing in the chunk
12238
         * which would be worth doing a parser state transition and we
12239
         * need to wait for more data
12240
         */
12241
6.73M
        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12242
6.57M
            (old_avail == 0) || (avail == 0) ||
12243
6.57M
            (xmlParseCheckTransition(ctxt,
12244
6.57M
                       (const char *)&ctxt->input->base[old_avail],
12245
6.57M
                                     avail - old_avail)))
12246
4.17M
            xmlParseTryOrFinish(ctxt, terminate);
12247
6.73M
    }
12248
11.0M
    if (ctxt->instate == XML_PARSER_EOF)
12249
56.2k
        return(ctxt->errNo);
12250
12251
10.9M
    if ((ctxt->input != NULL) &&
12252
10.9M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12253
10.9M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12254
98
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12255
98
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12256
98
        xmlHaltParser(ctxt);
12257
98
    }
12258
10.9M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12259
5.63k
        return(ctxt->errNo);
12260
12261
10.9M
    if (remain != 0) {
12262
4.27M
        chunk += size;
12263
4.27M
        size = remain;
12264
4.27M
        remain = 0;
12265
4.27M
        goto xmldecl_done;
12266
4.27M
    }
12267
6.67M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12268
20.7k
        (ctxt->input->buf != NULL)) {
12269
20.7k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12270
20.7k
           ctxt->input);
12271
20.7k
  size_t current = ctxt->input->cur - ctxt->input->base;
12272
12273
20.7k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12274
12275
20.7k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12276
20.7k
            base, current);
12277
20.7k
    }
12278
6.67M
    if (terminate) {
12279
  /*
12280
   * Check for termination
12281
   */
12282
126k
  int cur_avail = 0;
12283
12284
126k
  if (ctxt->input != NULL) {
12285
126k
      if (ctxt->input->buf == NULL)
12286
0
    cur_avail = ctxt->input->length -
12287
0
          (ctxt->input->cur - ctxt->input->base);
12288
126k
      else
12289
126k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12290
126k
                    (ctxt->input->cur - ctxt->input->base);
12291
126k
  }
12292
12293
126k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12294
126k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12295
79.5k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12296
79.5k
  }
12297
126k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12298
353
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12299
353
  }
12300
126k
  if (ctxt->instate != XML_PARSER_EOF) {
12301
126k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12302
126k
    ctxt->sax->endDocument(ctxt->userData);
12303
126k
  }
12304
126k
  ctxt->instate = XML_PARSER_EOF;
12305
126k
    }
12306
6.67M
    if (ctxt->wellFormed == 0)
12307
111k
  return((xmlParserErrors) ctxt->errNo);
12308
6.56M
    else
12309
6.56M
        return(0);
12310
6.67M
}
12311
12312
/************************************************************************
12313
 *                  *
12314
 *    I/O front end functions to the parser     *
12315
 *                  *
12316
 ************************************************************************/
12317
12318
/**
12319
 * xmlCreatePushParserCtxt:
12320
 * @sax:  a SAX handler
12321
 * @user_data:  The user data returned on SAX callbacks
12322
 * @chunk:  a pointer to an array of chars
12323
 * @size:  number of chars in the array
12324
 * @filename:  an optional file name or URI
12325
 *
12326
 * Create a parser context for using the XML parser in push mode.
12327
 * If @buffer and @size are non-NULL, the data is used to detect
12328
 * the encoding.  The remaining characters will be parsed so they
12329
 * don't need to be fed in again through xmlParseChunk.
12330
 * To allow content encoding detection, @size should be >= 4
12331
 * The value of @filename is used for fetching external entities
12332
 * and error/warning reports.
12333
 *
12334
 * Returns the new parser context or NULL
12335
 */
12336
12337
xmlParserCtxtPtr
12338
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12339
234k
                        const char *chunk, int size, const char *filename) {
12340
234k
    xmlParserCtxtPtr ctxt;
12341
234k
    xmlParserInputPtr inputStream;
12342
234k
    xmlParserInputBufferPtr buf;
12343
234k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12344
12345
    /*
12346
     * plug some encoding conversion routines
12347
     */
12348
234k
    if ((chunk != NULL) && (size >= 4))
12349
234k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12350
12351
234k
    buf = xmlAllocParserInputBuffer(enc);
12352
234k
    if (buf == NULL) return(NULL);
12353
12354
234k
    ctxt = xmlNewParserCtxt();
12355
234k
    if (ctxt == NULL) {
12356
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12357
0
  xmlFreeParserInputBuffer(buf);
12358
0
  return(NULL);
12359
0
    }
12360
234k
    ctxt->dictNames = 1;
12361
234k
    ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12362
234k
    if (ctxt->pushTab == NULL) {
12363
0
        xmlErrMemory(ctxt, NULL);
12364
0
  xmlFreeParserInputBuffer(buf);
12365
0
  xmlFreeParserCtxt(ctxt);
12366
0
  return(NULL);
12367
0
    }
12368
234k
    if (sax != NULL) {
12369
234k
#ifdef LIBXML_SAX1_ENABLED
12370
234k
  if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12371
234k
#endif /* LIBXML_SAX1_ENABLED */
12372
234k
      xmlFree(ctxt->sax);
12373
234k
  ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12374
234k
  if (ctxt->sax == NULL) {
12375
0
      xmlErrMemory(ctxt, NULL);
12376
0
      xmlFreeParserInputBuffer(buf);
12377
0
      xmlFreeParserCtxt(ctxt);
12378
0
      return(NULL);
12379
0
  }
12380
234k
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12381
234k
  if (sax->initialized == XML_SAX2_MAGIC)
12382
234k
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12383
0
  else
12384
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12385
234k
  if (user_data != NULL)
12386
0
      ctxt->userData = user_data;
12387
234k
    }
12388
234k
    if (filename == NULL) {
12389
138k
  ctxt->directory = NULL;
12390
138k
    } else {
12391
96.8k
        ctxt->directory = xmlParserGetDirectory(filename);
12392
96.8k
    }
12393
12394
234k
    inputStream = xmlNewInputStream(ctxt);
12395
234k
    if (inputStream == NULL) {
12396
0
  xmlFreeParserCtxt(ctxt);
12397
0
  xmlFreeParserInputBuffer(buf);
12398
0
  return(NULL);
12399
0
    }
12400
12401
234k
    if (filename == NULL)
12402
138k
  inputStream->filename = NULL;
12403
96.8k
    else {
12404
96.8k
  inputStream->filename = (char *)
12405
96.8k
      xmlCanonicPath((const xmlChar *) filename);
12406
96.8k
  if (inputStream->filename == NULL) {
12407
0
      xmlFreeParserCtxt(ctxt);
12408
0
      xmlFreeParserInputBuffer(buf);
12409
0
      return(NULL);
12410
0
  }
12411
96.8k
    }
12412
234k
    inputStream->buf = buf;
12413
234k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12414
234k
    inputPush(ctxt, inputStream);
12415
12416
    /*
12417
     * If the caller didn't provide an initial 'chunk' for determining
12418
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12419
     * that it can be automatically determined later
12420
     */
12421
234k
    if ((size == 0) || (chunk == NULL)) {
12422
206
  ctxt->charset = XML_CHAR_ENCODING_NONE;
12423
234k
    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12424
234k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12425
234k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12426
12427
234k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12428
12429
234k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12430
#ifdef DEBUG_PUSH
12431
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12432
#endif
12433
234k
    }
12434
12435
234k
    if (enc != XML_CHAR_ENCODING_NONE) {
12436
89.6k
        xmlSwitchEncoding(ctxt, enc);
12437
89.6k
    }
12438
12439
234k
    return(ctxt);
12440
234k
}
12441
#endif /* LIBXML_PUSH_ENABLED */
12442
12443
/**
12444
 * xmlHaltParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing don't override error
12448
 * for internal use
12449
 */
12450
static void
12451
111k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12452
111k
    if (ctxt == NULL)
12453
0
        return;
12454
111k
    ctxt->instate = XML_PARSER_EOF;
12455
111k
    ctxt->disableSAX = 1;
12456
111k
    while (ctxt->inputNr > 1)
12457
177
        xmlFreeInputStream(inputPop(ctxt));
12458
111k
    if (ctxt->input != NULL) {
12459
        /*
12460
   * in case there was a specific allocation deallocate before
12461
   * overriding base
12462
   */
12463
111k
        if (ctxt->input->free != NULL) {
12464
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12465
0
      ctxt->input->free = NULL;
12466
0
  }
12467
111k
  ctxt->input->cur = BAD_CAST"";
12468
111k
  ctxt->input->base = ctxt->input->cur;
12469
111k
        ctxt->input->end = ctxt->input->cur;
12470
111k
    }
12471
111k
}
12472
12473
/**
12474
 * xmlStopParser:
12475
 * @ctxt:  an XML parser context
12476
 *
12477
 * Blocks further parser processing
12478
 */
12479
void
12480
53.6k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12481
53.6k
    if (ctxt == NULL)
12482
0
        return;
12483
53.6k
    xmlHaltParser(ctxt);
12484
53.6k
    ctxt->errNo = XML_ERR_USER_STOP;
12485
53.6k
}
12486
12487
/**
12488
 * xmlCreateIOParserCtxt:
12489
 * @sax:  a SAX handler
12490
 * @user_data:  The user data returned on SAX callbacks
12491
 * @ioread:  an I/O read function
12492
 * @ioclose:  an I/O close function
12493
 * @ioctx:  an I/O handler
12494
 * @enc:  the charset encoding if known
12495
 *
12496
 * Create a parser context for using the XML parser with an existing
12497
 * I/O stream
12498
 *
12499
 * Returns the new parser context or NULL
12500
 */
12501
xmlParserCtxtPtr
12502
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12503
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12504
0
  void *ioctx, xmlCharEncoding enc) {
12505
0
    xmlParserCtxtPtr ctxt;
12506
0
    xmlParserInputPtr inputStream;
12507
0
    xmlParserInputBufferPtr buf;
12508
12509
0
    if (ioread == NULL) return(NULL);
12510
12511
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12512
0
    if (buf == NULL) {
12513
0
        if (ioclose != NULL)
12514
0
            ioclose(ioctx);
12515
0
        return (NULL);
12516
0
    }
12517
12518
0
    ctxt = xmlNewParserCtxt();
12519
0
    if (ctxt == NULL) {
12520
0
  xmlFreeParserInputBuffer(buf);
12521
0
  return(NULL);
12522
0
    }
12523
0
    if (sax != NULL) {
12524
0
#ifdef LIBXML_SAX1_ENABLED
12525
0
  if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12526
0
#endif /* LIBXML_SAX1_ENABLED */
12527
0
      xmlFree(ctxt->sax);
12528
0
  ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12529
0
  if (ctxt->sax == NULL) {
12530
0
      xmlErrMemory(ctxt, NULL);
12531
0
      xmlFreeParserCtxt(ctxt);
12532
0
      return(NULL);
12533
0
  }
12534
0
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12535
0
  if (sax->initialized == XML_SAX2_MAGIC)
12536
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12537
0
  else
12538
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12539
0
  if (user_data != NULL)
12540
0
      ctxt->userData = user_data;
12541
0
    }
12542
12543
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12544
0
    if (inputStream == NULL) {
12545
0
  xmlFreeParserCtxt(ctxt);
12546
0
  return(NULL);
12547
0
    }
12548
0
    inputPush(ctxt, inputStream);
12549
12550
0
    return(ctxt);
12551
0
}
12552
12553
#ifdef LIBXML_VALID_ENABLED
12554
/************************************************************************
12555
 *                  *
12556
 *    Front ends when parsing a DTD       *
12557
 *                  *
12558
 ************************************************************************/
12559
12560
/**
12561
 * xmlIOParseDTD:
12562
 * @sax:  the SAX handler block or NULL
12563
 * @input:  an Input Buffer
12564
 * @enc:  the charset encoding if known
12565
 *
12566
 * Load and parse a DTD
12567
 *
12568
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12569
 * @input will be freed by the function in any case.
12570
 */
12571
12572
xmlDtdPtr
12573
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12574
0
        xmlCharEncoding enc) {
12575
0
    xmlDtdPtr ret = NULL;
12576
0
    xmlParserCtxtPtr ctxt;
12577
0
    xmlParserInputPtr pinput = NULL;
12578
0
    xmlChar start[4];
12579
12580
0
    if (input == NULL)
12581
0
  return(NULL);
12582
12583
0
    ctxt = xmlNewParserCtxt();
12584
0
    if (ctxt == NULL) {
12585
0
        xmlFreeParserInputBuffer(input);
12586
0
  return(NULL);
12587
0
    }
12588
12589
    /* We are loading a DTD */
12590
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12591
12592
    /*
12593
     * Set-up the SAX context
12594
     */
12595
0
    if (sax != NULL) {
12596
0
  if (ctxt->sax != NULL)
12597
0
      xmlFree(ctxt->sax);
12598
0
        ctxt->sax = sax;
12599
0
        ctxt->userData = ctxt;
12600
0
    }
12601
0
    xmlDetectSAX2(ctxt);
12602
12603
    /*
12604
     * generate a parser input from the I/O handler
12605
     */
12606
12607
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12608
0
    if (pinput == NULL) {
12609
0
        if (sax != NULL) ctxt->sax = NULL;
12610
0
        xmlFreeParserInputBuffer(input);
12611
0
  xmlFreeParserCtxt(ctxt);
12612
0
  return(NULL);
12613
0
    }
12614
12615
    /*
12616
     * plug some encoding conversion routines here.
12617
     */
12618
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12619
0
        if (sax != NULL) ctxt->sax = NULL;
12620
0
  xmlFreeParserCtxt(ctxt);
12621
0
  return(NULL);
12622
0
    }
12623
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12624
0
        xmlSwitchEncoding(ctxt, enc);
12625
0
    }
12626
12627
0
    pinput->filename = NULL;
12628
0
    pinput->line = 1;
12629
0
    pinput->col = 1;
12630
0
    pinput->base = ctxt->input->cur;
12631
0
    pinput->cur = ctxt->input->cur;
12632
0
    pinput->free = NULL;
12633
12634
    /*
12635
     * let's parse that entity knowing it's an external subset.
12636
     */
12637
0
    ctxt->inSubset = 2;
12638
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12639
0
    if (ctxt->myDoc == NULL) {
12640
0
  xmlErrMemory(ctxt, "New Doc failed");
12641
0
  return(NULL);
12642
0
    }
12643
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12644
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12645
0
                                 BAD_CAST "none", BAD_CAST "none");
12646
12647
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12648
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12649
  /*
12650
   * Get the 4 first bytes and decode the charset
12651
   * if enc != XML_CHAR_ENCODING_NONE
12652
   * plug some encoding conversion routines.
12653
   */
12654
0
  start[0] = RAW;
12655
0
  start[1] = NXT(1);
12656
0
  start[2] = NXT(2);
12657
0
  start[3] = NXT(3);
12658
0
  enc = xmlDetectCharEncoding(start, 4);
12659
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12660
0
      xmlSwitchEncoding(ctxt, enc);
12661
0
  }
12662
0
    }
12663
12664
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12665
12666
0
    if (ctxt->myDoc != NULL) {
12667
0
  if (ctxt->wellFormed) {
12668
0
      ret = ctxt->myDoc->extSubset;
12669
0
      ctxt->myDoc->extSubset = NULL;
12670
0
      if (ret != NULL) {
12671
0
    xmlNodePtr tmp;
12672
12673
0
    ret->doc = NULL;
12674
0
    tmp = ret->children;
12675
0
    while (tmp != NULL) {
12676
0
        tmp->doc = NULL;
12677
0
        tmp = tmp->next;
12678
0
    }
12679
0
      }
12680
0
  } else {
12681
0
      ret = NULL;
12682
0
  }
12683
0
        xmlFreeDoc(ctxt->myDoc);
12684
0
        ctxt->myDoc = NULL;
12685
0
    }
12686
0
    if (sax != NULL) ctxt->sax = NULL;
12687
0
    xmlFreeParserCtxt(ctxt);
12688
12689
0
    return(ret);
12690
0
}
12691
12692
/**
12693
 * xmlSAXParseDTD:
12694
 * @sax:  the SAX handler block
12695
 * @ExternalID:  a NAME* containing the External ID of the DTD
12696
 * @SystemID:  a NAME* containing the URL to the DTD
12697
 *
12698
 * Load and parse an external subset.
12699
 *
12700
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12701
 */
12702
12703
xmlDtdPtr
12704
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12705
0
                          const xmlChar *SystemID) {
12706
0
    xmlDtdPtr ret = NULL;
12707
0
    xmlParserCtxtPtr ctxt;
12708
0
    xmlParserInputPtr input = NULL;
12709
0
    xmlCharEncoding enc;
12710
0
    xmlChar* systemIdCanonic;
12711
12712
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12713
12714
0
    ctxt = xmlNewParserCtxt();
12715
0
    if (ctxt == NULL) {
12716
0
  return(NULL);
12717
0
    }
12718
12719
    /* We are loading a DTD */
12720
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12721
12722
    /*
12723
     * Set-up the SAX context
12724
     */
12725
0
    if (sax != NULL) {
12726
0
  if (ctxt->sax != NULL)
12727
0
      xmlFree(ctxt->sax);
12728
0
        ctxt->sax = sax;
12729
0
        ctxt->userData = ctxt;
12730
0
    }
12731
12732
    /*
12733
     * Canonicalise the system ID
12734
     */
12735
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12736
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12737
0
  xmlFreeParserCtxt(ctxt);
12738
0
  return(NULL);
12739
0
    }
12740
12741
    /*
12742
     * Ask the Entity resolver to load the damn thing
12743
     */
12744
12745
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12746
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12747
0
                                   systemIdCanonic);
12748
0
    if (input == NULL) {
12749
0
        if (sax != NULL) ctxt->sax = NULL;
12750
0
  xmlFreeParserCtxt(ctxt);
12751
0
  if (systemIdCanonic != NULL)
12752
0
      xmlFree(systemIdCanonic);
12753
0
  return(NULL);
12754
0
    }
12755
12756
    /*
12757
     * plug some encoding conversion routines here.
12758
     */
12759
0
    if (xmlPushInput(ctxt, input) < 0) {
12760
0
        if (sax != NULL) ctxt->sax = NULL;
12761
0
  xmlFreeParserCtxt(ctxt);
12762
0
  if (systemIdCanonic != NULL)
12763
0
      xmlFree(systemIdCanonic);
12764
0
  return(NULL);
12765
0
    }
12766
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12767
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12768
0
  xmlSwitchEncoding(ctxt, enc);
12769
0
    }
12770
12771
0
    if (input->filename == NULL)
12772
0
  input->filename = (char *) systemIdCanonic;
12773
0
    else
12774
0
  xmlFree(systemIdCanonic);
12775
0
    input->line = 1;
12776
0
    input->col = 1;
12777
0
    input->base = ctxt->input->cur;
12778
0
    input->cur = ctxt->input->cur;
12779
0
    input->free = NULL;
12780
12781
    /*
12782
     * let's parse that entity knowing it's an external subset.
12783
     */
12784
0
    ctxt->inSubset = 2;
12785
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12786
0
    if (ctxt->myDoc == NULL) {
12787
0
  xmlErrMemory(ctxt, "New Doc failed");
12788
0
        if (sax != NULL) ctxt->sax = NULL;
12789
0
  xmlFreeParserCtxt(ctxt);
12790
0
  return(NULL);
12791
0
    }
12792
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12793
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12794
0
                                 ExternalID, SystemID);
12795
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12796
12797
0
    if (ctxt->myDoc != NULL) {
12798
0
  if (ctxt->wellFormed) {
12799
0
      ret = ctxt->myDoc->extSubset;
12800
0
      ctxt->myDoc->extSubset = NULL;
12801
0
      if (ret != NULL) {
12802
0
    xmlNodePtr tmp;
12803
12804
0
    ret->doc = NULL;
12805
0
    tmp = ret->children;
12806
0
    while (tmp != NULL) {
12807
0
        tmp->doc = NULL;
12808
0
        tmp = tmp->next;
12809
0
    }
12810
0
      }
12811
0
  } else {
12812
0
      ret = NULL;
12813
0
  }
12814
0
        xmlFreeDoc(ctxt->myDoc);
12815
0
        ctxt->myDoc = NULL;
12816
0
    }
12817
0
    if (sax != NULL) ctxt->sax = NULL;
12818
0
    xmlFreeParserCtxt(ctxt);
12819
12820
0
    return(ret);
12821
0
}
12822
12823
12824
/**
12825
 * xmlParseDTD:
12826
 * @ExternalID:  a NAME* containing the External ID of the DTD
12827
 * @SystemID:  a NAME* containing the URL to the DTD
12828
 *
12829
 * Load and parse an external subset.
12830
 *
12831
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12832
 */
12833
12834
xmlDtdPtr
12835
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12836
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12837
0
}
12838
#endif /* LIBXML_VALID_ENABLED */
12839
12840
/************************************************************************
12841
 *                  *
12842
 *    Front ends when parsing an Entity     *
12843
 *                  *
12844
 ************************************************************************/
12845
12846
/**
12847
 * xmlParseCtxtExternalEntity:
12848
 * @ctx:  the existing parsing context
12849
 * @URL:  the URL for the entity to load
12850
 * @ID:  the System ID for the entity to load
12851
 * @lst:  the return value for the set of parsed nodes
12852
 *
12853
 * Parse an external general entity within an existing parsing context
12854
 * An external general parsed entity is well-formed if it matches the
12855
 * production labeled extParsedEnt.
12856
 *
12857
 * [78] extParsedEnt ::= TextDecl? content
12858
 *
12859
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12860
 *    the parser error code otherwise
12861
 */
12862
12863
int
12864
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12865
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12866
0
    xmlParserCtxtPtr ctxt;
12867
0
    xmlDocPtr newDoc;
12868
0
    xmlNodePtr newRoot;
12869
0
    xmlSAXHandlerPtr oldsax = NULL;
12870
0
    int ret = 0;
12871
0
    xmlChar start[4];
12872
0
    xmlCharEncoding enc;
12873
12874
0
    if (ctx == NULL) return(-1);
12875
12876
0
    if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12877
0
        (ctx->depth > 1024)) {
12878
0
  return(XML_ERR_ENTITY_LOOP);
12879
0
    }
12880
12881
0
    if (lst != NULL)
12882
0
        *lst = NULL;
12883
0
    if ((URL == NULL) && (ID == NULL))
12884
0
  return(-1);
12885
0
    if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12886
0
  return(-1);
12887
12888
0
    ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12889
0
    if (ctxt == NULL) {
12890
0
  return(-1);
12891
0
    }
12892
12893
0
    oldsax = ctxt->sax;
12894
0
    ctxt->sax = ctx->sax;
12895
0
    xmlDetectSAX2(ctxt);
12896
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12897
0
    if (newDoc == NULL) {
12898
0
  xmlFreeParserCtxt(ctxt);
12899
0
  return(-1);
12900
0
    }
12901
0
    newDoc->properties = XML_DOC_INTERNAL;
12902
0
    if (ctx->myDoc->dict) {
12903
0
  newDoc->dict = ctx->myDoc->dict;
12904
0
  xmlDictReference(newDoc->dict);
12905
0
    }
12906
0
    if (ctx->myDoc != NULL) {
12907
0
  newDoc->intSubset = ctx->myDoc->intSubset;
12908
0
  newDoc->extSubset = ctx->myDoc->extSubset;
12909
0
    }
12910
0
    if (ctx->myDoc->URL != NULL) {
12911
0
  newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12912
0
    }
12913
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12914
0
    if (newRoot == NULL) {
12915
0
  ctxt->sax = oldsax;
12916
0
  xmlFreeParserCtxt(ctxt);
12917
0
  newDoc->intSubset = NULL;
12918
0
  newDoc->extSubset = NULL;
12919
0
        xmlFreeDoc(newDoc);
12920
0
  return(-1);
12921
0
    }
12922
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12923
0
    nodePush(ctxt, newDoc->children);
12924
0
    if (ctx->myDoc == NULL) {
12925
0
  ctxt->myDoc = newDoc;
12926
0
    } else {
12927
0
  ctxt->myDoc = ctx->myDoc;
12928
0
  newDoc->children->doc = ctx->myDoc;
12929
0
    }
12930
12931
    /*
12932
     * Get the 4 first bytes and decode the charset
12933
     * if enc != XML_CHAR_ENCODING_NONE
12934
     * plug some encoding conversion routines.
12935
     */
12936
0
    GROW
12937
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12938
0
  start[0] = RAW;
12939
0
  start[1] = NXT(1);
12940
0
  start[2] = NXT(2);
12941
0
  start[3] = NXT(3);
12942
0
  enc = xmlDetectCharEncoding(start, 4);
12943
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12944
0
      xmlSwitchEncoding(ctxt, enc);
12945
0
  }
12946
0
    }
12947
12948
    /*
12949
     * Parse a possible text declaration first
12950
     */
12951
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12952
0
  xmlParseTextDecl(ctxt);
12953
  /*
12954
   * An XML-1.0 document can't reference an entity not XML-1.0
12955
   */
12956
0
  if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12957
0
      (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12958
0
      xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12959
0
                     "Version mismatch between document and entity\n");
12960
0
  }
12961
0
    }
12962
12963
    /*
12964
     * If the user provided its own SAX callbacks then reuse the
12965
     * useData callback field, otherwise the expected setup in a
12966
     * DOM builder is to have userData == ctxt
12967
     */
12968
0
    if (ctx->userData == ctx)
12969
0
        ctxt->userData = ctxt;
12970
0
    else
12971
0
        ctxt->userData = ctx->userData;
12972
12973
    /*
12974
     * Doing validity checking on chunk doesn't make sense
12975
     */
12976
0
    ctxt->instate = XML_PARSER_CONTENT;
12977
0
    ctxt->validate = ctx->validate;
12978
0
    ctxt->valid = ctx->valid;
12979
0
    ctxt->loadsubset = ctx->loadsubset;
12980
0
    ctxt->depth = ctx->depth + 1;
12981
0
    ctxt->replaceEntities = ctx->replaceEntities;
12982
0
    if (ctxt->validate) {
12983
0
  ctxt->vctxt.error = ctx->vctxt.error;
12984
0
  ctxt->vctxt.warning = ctx->vctxt.warning;
12985
0
    } else {
12986
0
  ctxt->vctxt.error = NULL;
12987
0
  ctxt->vctxt.warning = NULL;
12988
0
    }
12989
0
    ctxt->vctxt.nodeTab = NULL;
12990
0
    ctxt->vctxt.nodeNr = 0;
12991
0
    ctxt->vctxt.nodeMax = 0;
12992
0
    ctxt->vctxt.node = NULL;
12993
0
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12994
0
    ctxt->dict = ctx->dict;
12995
0
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12996
0
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12997
0
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12998
0
    ctxt->dictNames = ctx->dictNames;
12999
0
    ctxt->attsDefault = ctx->attsDefault;
13000
0
    ctxt->attsSpecial = ctx->attsSpecial;
13001
0
    ctxt->linenumbers = ctx->linenumbers;
13002
13003
0
    xmlParseContent(ctxt);
13004
13005
0
    ctx->validate = ctxt->validate;
13006
0
    ctx->valid = ctxt->valid;
13007
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13008
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13009
0
    } else if (RAW != 0) {
13010
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13011
0
    }
13012
0
    if (ctxt->node != newDoc->children) {
13013
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13014
0
    }
13015
13016
0
    if (!ctxt->wellFormed) {
13017
0
        if (ctxt->errNo == 0)
13018
0
      ret = 1;
13019
0
  else
13020
0
      ret = ctxt->errNo;
13021
0
    } else {
13022
0
  if (lst != NULL) {
13023
0
      xmlNodePtr cur;
13024
13025
      /*
13026
       * Return the newly created nodeset after unlinking it from
13027
       * they pseudo parent.
13028
       */
13029
0
      cur = newDoc->children->children;
13030
0
      *lst = cur;
13031
0
      while (cur != NULL) {
13032
0
    cur->parent = NULL;
13033
0
    cur = cur->next;
13034
0
      }
13035
0
            newDoc->children->children = NULL;
13036
0
  }
13037
0
  ret = 0;
13038
0
    }
13039
0
    ctxt->sax = oldsax;
13040
0
    ctxt->dict = NULL;
13041
0
    ctxt->attsDefault = NULL;
13042
0
    ctxt->attsSpecial = NULL;
13043
0
    xmlFreeParserCtxt(ctxt);
13044
0
    newDoc->intSubset = NULL;
13045
0
    newDoc->extSubset = NULL;
13046
0
    xmlFreeDoc(newDoc);
13047
13048
0
    return(ret);
13049
0
}
13050
13051
/**
13052
 * xmlParseExternalEntityPrivate:
13053
 * @doc:  the document the chunk pertains to
13054
 * @oldctxt:  the previous parser context if available
13055
 * @sax:  the SAX handler bloc (possibly NULL)
13056
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13057
 * @depth:  Used for loop detection, use 0
13058
 * @URL:  the URL for the entity to load
13059
 * @ID:  the System ID for the entity to load
13060
 * @list:  the return value for the set of parsed nodes
13061
 *
13062
 * Private version of xmlParseExternalEntity()
13063
 *
13064
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13065
 *    the parser error code otherwise
13066
 */
13067
13068
static xmlParserErrors
13069
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13070
                xmlSAXHandlerPtr sax,
13071
          void *user_data, int depth, const xmlChar *URL,
13072
0
          const xmlChar *ID, xmlNodePtr *list) {
13073
0
    xmlParserCtxtPtr ctxt;
13074
0
    xmlDocPtr newDoc;
13075
0
    xmlNodePtr newRoot;
13076
0
    xmlSAXHandlerPtr oldsax = NULL;
13077
0
    xmlParserErrors ret = XML_ERR_OK;
13078
0
    xmlChar start[4];
13079
0
    xmlCharEncoding enc;
13080
13081
0
    if (((depth > 40) &&
13082
0
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13083
0
  (depth > 1024)) {
13084
0
  return(XML_ERR_ENTITY_LOOP);
13085
0
    }
13086
13087
0
    if (list != NULL)
13088
0
        *list = NULL;
13089
0
    if ((URL == NULL) && (ID == NULL))
13090
0
  return(XML_ERR_INTERNAL_ERROR);
13091
0
    if (doc == NULL)
13092
0
  return(XML_ERR_INTERNAL_ERROR);
13093
13094
13095
0
    ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13096
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13097
0
    ctxt->userData = ctxt;
13098
0
    if (oldctxt != NULL) {
13099
0
  ctxt->_private = oldctxt->_private;
13100
0
  ctxt->loadsubset = oldctxt->loadsubset;
13101
0
  ctxt->validate = oldctxt->validate;
13102
0
  ctxt->external = oldctxt->external;
13103
0
  ctxt->record_info = oldctxt->record_info;
13104
0
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13105
0
  ctxt->node_seq.length = oldctxt->node_seq.length;
13106
0
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13107
0
    } else {
13108
  /*
13109
   * Doing validity checking on chunk without context
13110
   * doesn't make sense
13111
   */
13112
0
  ctxt->_private = NULL;
13113
0
  ctxt->validate = 0;
13114
0
  ctxt->external = 2;
13115
0
  ctxt->loadsubset = 0;
13116
0
    }
13117
0
    if (sax != NULL) {
13118
0
  oldsax = ctxt->sax;
13119
0
        ctxt->sax = sax;
13120
0
  if (user_data != NULL)
13121
0
      ctxt->userData = user_data;
13122
0
    }
13123
0
    xmlDetectSAX2(ctxt);
13124
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13125
0
    if (newDoc == NULL) {
13126
0
  ctxt->node_seq.maximum = 0;
13127
0
  ctxt->node_seq.length = 0;
13128
0
  ctxt->node_seq.buffer = NULL;
13129
0
  xmlFreeParserCtxt(ctxt);
13130
0
  return(XML_ERR_INTERNAL_ERROR);
13131
0
    }
13132
0
    newDoc->properties = XML_DOC_INTERNAL;
13133
0
    newDoc->intSubset = doc->intSubset;
13134
0
    newDoc->extSubset = doc->extSubset;
13135
0
    newDoc->dict = doc->dict;
13136
0
    xmlDictReference(newDoc->dict);
13137
13138
0
    if (doc->URL != NULL) {
13139
0
  newDoc->URL = xmlStrdup(doc->URL);
13140
0
    }
13141
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13142
0
    if (newRoot == NULL) {
13143
0
  if (sax != NULL)
13144
0
      ctxt->sax = oldsax;
13145
0
  ctxt->node_seq.maximum = 0;
13146
0
  ctxt->node_seq.length = 0;
13147
0
  ctxt->node_seq.buffer = NULL;
13148
0
  xmlFreeParserCtxt(ctxt);
13149
0
  newDoc->intSubset = NULL;
13150
0
  newDoc->extSubset = NULL;
13151
0
        xmlFreeDoc(newDoc);
13152
0
  return(XML_ERR_INTERNAL_ERROR);
13153
0
    }
13154
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13155
0
    nodePush(ctxt, newDoc->children);
13156
0
    ctxt->myDoc = doc;
13157
0
    newRoot->doc = doc;
13158
13159
    /*
13160
     * Get the 4 first bytes and decode the charset
13161
     * if enc != XML_CHAR_ENCODING_NONE
13162
     * plug some encoding conversion routines.
13163
     */
13164
0
    GROW;
13165
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13166
0
  start[0] = RAW;
13167
0
  start[1] = NXT(1);
13168
0
  start[2] = NXT(2);
13169
0
  start[3] = NXT(3);
13170
0
  enc = xmlDetectCharEncoding(start, 4);
13171
0
  if (enc != XML_CHAR_ENCODING_NONE) {
13172
0
      xmlSwitchEncoding(ctxt, enc);
13173
0
  }
13174
0
    }
13175
13176
    /*
13177
     * Parse a possible text declaration first
13178
     */
13179
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13180
0
  xmlParseTextDecl(ctxt);
13181
0
    }
13182
13183
0
    ctxt->instate = XML_PARSER_CONTENT;
13184
0
    ctxt->depth = depth;
13185
13186
0
    xmlParseContent(ctxt);
13187
13188
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13189
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13190
0
    } else if (RAW != 0) {
13191
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13192
0
    }
13193
0
    if (ctxt->node != newDoc->children) {
13194
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13195
0
    }
13196
13197
0
    if (!ctxt->wellFormed) {
13198
0
        if (ctxt->errNo == 0)
13199
0
      ret = XML_ERR_INTERNAL_ERROR;
13200
0
  else
13201
0
      ret = (xmlParserErrors)ctxt->errNo;
13202
0
    } else {
13203
0
  if (list != NULL) {
13204
0
      xmlNodePtr cur;
13205
13206
      /*
13207
       * Return the newly created nodeset after unlinking it from
13208
       * they pseudo parent.
13209
       */
13210
0
      cur = newDoc->children->children;
13211
0
      *list = cur;
13212
0
      while (cur != NULL) {
13213
0
    cur->parent = NULL;
13214
0
    cur = cur->next;
13215
0
      }
13216
0
            newDoc->children->children = NULL;
13217
0
  }
13218
0
  ret = XML_ERR_OK;
13219
0
    }
13220
13221
    /*
13222
     * Record in the parent context the number of entities replacement
13223
     * done when parsing that reference.
13224
     */
13225
0
    if (oldctxt != NULL)
13226
0
        oldctxt->nbentities += ctxt->nbentities;
13227
13228
    /*
13229
     * Also record the size of the entity parsed
13230
     */
13231
0
    if (ctxt->input != NULL && oldctxt != NULL) {
13232
0
  oldctxt->sizeentities += ctxt->input->consumed;
13233
0
  oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13234
0
    }
13235
    /*
13236
     * And record the last error if any
13237
     */
13238
0
    if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13239
0
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13240
13241
0
    if (sax != NULL)
13242
0
  ctxt->sax = oldsax;
13243
0
    if (oldctxt != NULL) {
13244
0
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13245
0
        oldctxt->node_seq.length = ctxt->node_seq.length;
13246
0
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13247
0
    }
13248
0
    ctxt->node_seq.maximum = 0;
13249
0
    ctxt->node_seq.length = 0;
13250
0
    ctxt->node_seq.buffer = NULL;
13251
0
    xmlFreeParserCtxt(ctxt);
13252
0
    newDoc->intSubset = NULL;
13253
0
    newDoc->extSubset = NULL;
13254
0
    xmlFreeDoc(newDoc);
13255
13256
0
    return(ret);
13257
0
}
13258
13259
#ifdef LIBXML_SAX1_ENABLED
13260
/**
13261
 * xmlParseExternalEntity:
13262
 * @doc:  the document the chunk pertains to
13263
 * @sax:  the SAX handler bloc (possibly NULL)
13264
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13265
 * @depth:  Used for loop detection, use 0
13266
 * @URL:  the URL for the entity to load
13267
 * @ID:  the System ID for the entity to load
13268
 * @lst:  the return value for the set of parsed nodes
13269
 *
13270
 * Parse an external general entity
13271
 * An external general parsed entity is well-formed if it matches the
13272
 * production labeled extParsedEnt.
13273
 *
13274
 * [78] extParsedEnt ::= TextDecl? content
13275
 *
13276
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13277
 *    the parser error code otherwise
13278
 */
13279
13280
int
13281
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13282
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13283
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13284
0
                           ID, lst));
13285
0
}
13286
13287
/**
13288
 * xmlParseBalancedChunkMemory:
13289
 * @doc:  the document the chunk pertains to
13290
 * @sax:  the SAX handler bloc (possibly NULL)
13291
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13292
 * @depth:  Used for loop detection, use 0
13293
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13294
 * @lst:  the return value for the set of parsed nodes
13295
 *
13296
 * Parse a well-balanced chunk of an XML document
13297
 * called by the parser
13298
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13299
 * the content production in the XML grammar:
13300
 *
13301
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13302
 *
13303
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13304
 *    the parser error code otherwise
13305
 */
13306
13307
int
13308
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13309
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13310
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13311
0
                                                depth, string, lst, 0 );
13312
0
}
13313
#endif /* LIBXML_SAX1_ENABLED */
13314
13315
/**
13316
 * xmlParseBalancedChunkMemoryInternal:
13317
 * @oldctxt:  the existing parsing context
13318
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13319
 * @user_data:  the user data field for the parser context
13320
 * @lst:  the return value for the set of parsed nodes
13321
 *
13322
 *
13323
 * Parse a well-balanced chunk of an XML document
13324
 * called by the parser
13325
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13326
 * the content production in the XML grammar:
13327
 *
13328
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13329
 *
13330
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13331
 * error code otherwise
13332
 *
13333
 * In case recover is set to 1, the nodelist will not be empty even if
13334
 * the parsed chunk is not well balanced.
13335
 */
13336
static xmlParserErrors
13337
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13338
214k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13339
214k
    xmlParserCtxtPtr ctxt;
13340
214k
    xmlDocPtr newDoc = NULL;
13341
214k
    xmlNodePtr newRoot;
13342
214k
    xmlSAXHandlerPtr oldsax = NULL;
13343
214k
    xmlNodePtr content = NULL;
13344
214k
    xmlNodePtr last = NULL;
13345
214k
    int size;
13346
214k
    xmlParserErrors ret = XML_ERR_OK;
13347
214k
#ifdef SAX2
13348
214k
    int i;
13349
214k
#endif
13350
13351
214k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13352
205k
        (oldctxt->depth >  1024)) {
13353
9.01k
  return(XML_ERR_ENTITY_LOOP);
13354
9.01k
    }
13355
13356
13357
205k
    if (lst != NULL)
13358
189k
        *lst = NULL;
13359
205k
    if (string == NULL)
13360
0
        return(XML_ERR_INTERNAL_ERROR);
13361
13362
205k
    size = xmlStrlen(string);
13363
13364
205k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13365
205k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13366
200k
    if (user_data != NULL)
13367
0
  ctxt->userData = user_data;
13368
200k
    else
13369
200k
  ctxt->userData = ctxt;
13370
200k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13371
200k
    ctxt->dict = oldctxt->dict;
13372
200k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13373
200k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13374
200k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13375
13376
200k
#ifdef SAX2
13377
    /* propagate namespaces down the entity */
13378
1.33M
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13379
1.13M
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13380
1.13M
    }
13381
200k
#endif
13382
13383
200k
    oldsax = ctxt->sax;
13384
200k
    ctxt->sax = oldctxt->sax;
13385
200k
    xmlDetectSAX2(ctxt);
13386
200k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13387
200k
    ctxt->options = oldctxt->options;
13388
13389
200k
    ctxt->_private = oldctxt->_private;
13390
200k
    if (oldctxt->myDoc == NULL) {
13391
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13392
0
  if (newDoc == NULL) {
13393
0
      ctxt->sax = oldsax;
13394
0
      ctxt->dict = NULL;
13395
0
      xmlFreeParserCtxt(ctxt);
13396
0
      return(XML_ERR_INTERNAL_ERROR);
13397
0
  }
13398
0
  newDoc->properties = XML_DOC_INTERNAL;
13399
0
  newDoc->dict = ctxt->dict;
13400
0
  xmlDictReference(newDoc->dict);
13401
0
  ctxt->myDoc = newDoc;
13402
200k
    } else {
13403
200k
  ctxt->myDoc = oldctxt->myDoc;
13404
200k
        content = ctxt->myDoc->children;
13405
200k
  last = ctxt->myDoc->last;
13406
200k
    }
13407
200k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13408
200k
    if (newRoot == NULL) {
13409
0
  ctxt->sax = oldsax;
13410
0
  ctxt->dict = NULL;
13411
0
  xmlFreeParserCtxt(ctxt);
13412
0
  if (newDoc != NULL) {
13413
0
      xmlFreeDoc(newDoc);
13414
0
  }
13415
0
  return(XML_ERR_INTERNAL_ERROR);
13416
0
    }
13417
200k
    ctxt->myDoc->children = NULL;
13418
200k
    ctxt->myDoc->last = NULL;
13419
200k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13420
200k
    nodePush(ctxt, ctxt->myDoc->children);
13421
200k
    ctxt->instate = XML_PARSER_CONTENT;
13422
200k
    ctxt->depth = oldctxt->depth + 1;
13423
13424
200k
    ctxt->validate = 0;
13425
200k
    ctxt->loadsubset = oldctxt->loadsubset;
13426
200k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13427
  /*
13428
   * ID/IDREF registration will be done in xmlValidateElement below
13429
   */
13430
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13431
0
    }
13432
200k
    ctxt->dictNames = oldctxt->dictNames;
13433
200k
    ctxt->attsDefault = oldctxt->attsDefault;
13434
200k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13435
13436
200k
    xmlParseContent(ctxt);
13437
200k
    if ((RAW == '<') && (NXT(1) == '/')) {
13438
3.00k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13439
197k
    } else if (RAW != 0) {
13440
370
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13441
370
    }
13442
200k
    if (ctxt->node != ctxt->myDoc->children) {
13443
121k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13444
121k
    }
13445
13446
200k
    if (!ctxt->wellFormed) {
13447
187k
        if (ctxt->errNo == 0)
13448
0
      ret = XML_ERR_INTERNAL_ERROR;
13449
187k
  else
13450
187k
      ret = (xmlParserErrors)ctxt->errNo;
13451
187k
    } else {
13452
12.8k
      ret = XML_ERR_OK;
13453
12.8k
    }
13454
13455
200k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13456
767
  xmlNodePtr cur;
13457
13458
  /*
13459
   * Return the newly created nodeset after unlinking it from
13460
   * they pseudo parent.
13461
   */
13462
767
  cur = ctxt->myDoc->children->children;
13463
767
  *lst = cur;
13464
3.85k
  while (cur != NULL) {
13465
3.08k
#ifdef LIBXML_VALID_ENABLED
13466
3.08k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13467
0
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13468
0
    (cur->type == XML_ELEMENT_NODE)) {
13469
0
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13470
0
      oldctxt->myDoc, cur);
13471
0
      }
13472
3.08k
#endif /* LIBXML_VALID_ENABLED */
13473
3.08k
      cur->parent = NULL;
13474
3.08k
      cur = cur->next;
13475
3.08k
  }
13476
767
  ctxt->myDoc->children->children = NULL;
13477
767
    }
13478
200k
    if (ctxt->myDoc != NULL) {
13479
200k
  xmlFreeNode(ctxt->myDoc->children);
13480
200k
        ctxt->myDoc->children = content;
13481
200k
        ctxt->myDoc->last = last;
13482
200k
    }
13483
13484
    /*
13485
     * Record in the parent context the number of entities replacement
13486
     * done when parsing that reference.
13487
     */
13488
200k
    if (oldctxt != NULL)
13489
200k
        oldctxt->nbentities += ctxt->nbentities;
13490
13491
    /*
13492
     * Also record the last error if any
13493
     */
13494
200k
    if (ctxt->lastError.code != XML_ERR_OK)
13495
187k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13496
13497
200k
    ctxt->sax = oldsax;
13498
200k
    ctxt->dict = NULL;
13499
200k
    ctxt->attsDefault = NULL;
13500
200k
    ctxt->attsSpecial = NULL;
13501
200k
    xmlFreeParserCtxt(ctxt);
13502
200k
    if (newDoc != NULL) {
13503
0
  xmlFreeDoc(newDoc);
13504
0
    }
13505
13506
200k
    return(ret);
13507
200k
}
13508
13509
/**
13510
 * xmlParseInNodeContext:
13511
 * @node:  the context node
13512
 * @data:  the input string
13513
 * @datalen:  the input string length in bytes
13514
 * @options:  a combination of xmlParserOption
13515
 * @lst:  the return value for the set of parsed nodes
13516
 *
13517
 * Parse a well-balanced chunk of an XML document
13518
 * within the context (DTD, namespaces, etc ...) of the given node.
13519
 *
13520
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13521
 * the content production in the XML grammar:
13522
 *
13523
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13524
 *
13525
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13526
 * error code otherwise
13527
 */
13528
xmlParserErrors
13529
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13530
0
                      int options, xmlNodePtr *lst) {
13531
0
#ifdef SAX2
13532
0
    xmlParserCtxtPtr ctxt;
13533
0
    xmlDocPtr doc = NULL;
13534
0
    xmlNodePtr fake, cur;
13535
0
    int nsnr = 0;
13536
13537
0
    xmlParserErrors ret = XML_ERR_OK;
13538
13539
    /*
13540
     * check all input parameters, grab the document
13541
     */
13542
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13543
0
        return(XML_ERR_INTERNAL_ERROR);
13544
0
    switch (node->type) {
13545
0
        case XML_ELEMENT_NODE:
13546
0
        case XML_ATTRIBUTE_NODE:
13547
0
        case XML_TEXT_NODE:
13548
0
        case XML_CDATA_SECTION_NODE:
13549
0
        case XML_ENTITY_REF_NODE:
13550
0
        case XML_PI_NODE:
13551
0
        case XML_COMMENT_NODE:
13552
0
        case XML_DOCUMENT_NODE:
13553
0
        case XML_HTML_DOCUMENT_NODE:
13554
0
      break;
13555
0
  default:
13556
0
      return(XML_ERR_INTERNAL_ERROR);
13557
13558
0
    }
13559
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13560
0
           (node->type != XML_DOCUMENT_NODE) &&
13561
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13562
0
  node = node->parent;
13563
0
    if (node == NULL)
13564
0
  return(XML_ERR_INTERNAL_ERROR);
13565
0
    if (node->type == XML_ELEMENT_NODE)
13566
0
  doc = node->doc;
13567
0
    else
13568
0
        doc = (xmlDocPtr) node;
13569
0
    if (doc == NULL)
13570
0
  return(XML_ERR_INTERNAL_ERROR);
13571
13572
    /*
13573
     * allocate a context and set-up everything not related to the
13574
     * node position in the tree
13575
     */
13576
0
    if (doc->type == XML_DOCUMENT_NODE)
13577
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13578
0
#ifdef LIBXML_HTML_ENABLED
13579
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13580
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13581
        /*
13582
         * When parsing in context, it makes no sense to add implied
13583
         * elements like html/body/etc...
13584
         */
13585
0
        options |= HTML_PARSE_NOIMPLIED;
13586
0
    }
13587
0
#endif
13588
0
    else
13589
0
        return(XML_ERR_INTERNAL_ERROR);
13590
13591
0
    if (ctxt == NULL)
13592
0
        return(XML_ERR_NO_MEMORY);
13593
13594
    /*
13595
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13596
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13597
     * we must wait until the last moment to free the original one.
13598
     */
13599
0
    if (doc->dict != NULL) {
13600
0
        if (ctxt->dict != NULL)
13601
0
      xmlDictFree(ctxt->dict);
13602
0
  ctxt->dict = doc->dict;
13603
0
    } else
13604
0
        options |= XML_PARSE_NODICT;
13605
13606
0
    if (doc->encoding != NULL) {
13607
0
        xmlCharEncodingHandlerPtr hdlr;
13608
13609
0
        if (ctxt->encoding != NULL)
13610
0
      xmlFree((xmlChar *) ctxt->encoding);
13611
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13612
13613
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13614
0
        if (hdlr != NULL) {
13615
0
            xmlSwitchToEncoding(ctxt, hdlr);
13616
0
  } else {
13617
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13618
0
        }
13619
0
    }
13620
13621
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13622
0
    xmlDetectSAX2(ctxt);
13623
0
    ctxt->myDoc = doc;
13624
    /* parsing in context, i.e. as within existing content */
13625
0
    ctxt->instate = XML_PARSER_CONTENT;
13626
13627
0
    fake = xmlNewComment(NULL);
13628
0
    if (fake == NULL) {
13629
0
        xmlFreeParserCtxt(ctxt);
13630
0
  return(XML_ERR_NO_MEMORY);
13631
0
    }
13632
0
    xmlAddChild(node, fake);
13633
13634
0
    if (node->type == XML_ELEMENT_NODE) {
13635
0
  nodePush(ctxt, node);
13636
  /*
13637
   * initialize the SAX2 namespaces stack
13638
   */
13639
0
  cur = node;
13640
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13641
0
      xmlNsPtr ns = cur->nsDef;
13642
0
      const xmlChar *iprefix, *ihref;
13643
13644
0
      while (ns != NULL) {
13645
0
    if (ctxt->dict) {
13646
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13647
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13648
0
    } else {
13649
0
        iprefix = ns->prefix;
13650
0
        ihref = ns->href;
13651
0
    }
13652
13653
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13654
0
        nsPush(ctxt, iprefix, ihref);
13655
0
        nsnr++;
13656
0
    }
13657
0
    ns = ns->next;
13658
0
      }
13659
0
      cur = cur->parent;
13660
0
  }
13661
0
    }
13662
13663
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13664
  /*
13665
   * ID/IDREF registration will be done in xmlValidateElement below
13666
   */
13667
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13668
0
    }
13669
13670
0
#ifdef LIBXML_HTML_ENABLED
13671
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13672
0
        __htmlParseContent(ctxt);
13673
0
    else
13674
0
#endif
13675
0
  xmlParseContent(ctxt);
13676
13677
0
    nsPop(ctxt, nsnr);
13678
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13679
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13680
0
    } else if (RAW != 0) {
13681
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13682
0
    }
13683
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13684
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13685
0
  ctxt->wellFormed = 0;
13686
0
    }
13687
13688
0
    if (!ctxt->wellFormed) {
13689
0
        if (ctxt->errNo == 0)
13690
0
      ret = XML_ERR_INTERNAL_ERROR;
13691
0
  else
13692
0
      ret = (xmlParserErrors)ctxt->errNo;
13693
0
    } else {
13694
0
        ret = XML_ERR_OK;
13695
0
    }
13696
13697
    /*
13698
     * Return the newly created nodeset after unlinking it from
13699
     * the pseudo sibling.
13700
     */
13701
13702
0
    cur = fake->next;
13703
0
    fake->next = NULL;
13704
0
    node->last = fake;
13705
13706
0
    if (cur != NULL) {
13707
0
  cur->prev = NULL;
13708
0
    }
13709
13710
0
    *lst = cur;
13711
13712
0
    while (cur != NULL) {
13713
0
  cur->parent = NULL;
13714
0
  cur = cur->next;
13715
0
    }
13716
13717
0
    xmlUnlinkNode(fake);
13718
0
    xmlFreeNode(fake);
13719
13720
13721
0
    if (ret != XML_ERR_OK) {
13722
0
        xmlFreeNodeList(*lst);
13723
0
  *lst = NULL;
13724
0
    }
13725
13726
0
    if (doc->dict != NULL)
13727
0
        ctxt->dict = NULL;
13728
0
    xmlFreeParserCtxt(ctxt);
13729
13730
0
    return(ret);
13731
#else /* !SAX2 */
13732
    return(XML_ERR_INTERNAL_ERROR);
13733
#endif
13734
0
}
13735
13736
#ifdef LIBXML_SAX1_ENABLED
13737
/**
13738
 * xmlParseBalancedChunkMemoryRecover:
13739
 * @doc:  the document the chunk pertains to
13740
 * @sax:  the SAX handler bloc (possibly NULL)
13741
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13742
 * @depth:  Used for loop detection, use 0
13743
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13744
 * @lst:  the return value for the set of parsed nodes
13745
 * @recover: return nodes even if the data is broken (use 0)
13746
 *
13747
 *
13748
 * Parse a well-balanced chunk of an XML document
13749
 * called by the parser
13750
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13751
 * the content production in the XML grammar:
13752
 *
13753
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13754
 *
13755
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13756
 *    the parser error code otherwise
13757
 *
13758
 * In case recover is set to 1, the nodelist will not be empty even if
13759
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13760
 * some extent.
13761
 */
13762
int
13763
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13764
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13765
0
     int recover) {
13766
0
    xmlParserCtxtPtr ctxt;
13767
0
    xmlDocPtr newDoc;
13768
0
    xmlSAXHandlerPtr oldsax = NULL;
13769
0
    xmlNodePtr content, newRoot;
13770
0
    int size;
13771
0
    int ret = 0;
13772
13773
0
    if (depth > 40) {
13774
0
  return(XML_ERR_ENTITY_LOOP);
13775
0
    }
13776
13777
13778
0
    if (lst != NULL)
13779
0
        *lst = NULL;
13780
0
    if (string == NULL)
13781
0
        return(-1);
13782
13783
0
    size = xmlStrlen(string);
13784
13785
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13786
0
    if (ctxt == NULL) return(-1);
13787
0
    ctxt->userData = ctxt;
13788
0
    if (sax != NULL) {
13789
0
  oldsax = ctxt->sax;
13790
0
        ctxt->sax = sax;
13791
0
  if (user_data != NULL)
13792
0
      ctxt->userData = user_data;
13793
0
    }
13794
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13795
0
    if (newDoc == NULL) {
13796
0
  xmlFreeParserCtxt(ctxt);
13797
0
  return(-1);
13798
0
    }
13799
0
    newDoc->properties = XML_DOC_INTERNAL;
13800
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13801
0
        xmlDictFree(ctxt->dict);
13802
0
  ctxt->dict = doc->dict;
13803
0
  xmlDictReference(ctxt->dict);
13804
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13805
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13806
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13807
0
  ctxt->dictNames = 1;
13808
0
    } else {
13809
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13810
0
    }
13811
0
    if (doc != NULL) {
13812
0
  newDoc->intSubset = doc->intSubset;
13813
0
  newDoc->extSubset = doc->extSubset;
13814
0
    }
13815
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13816
0
    if (newRoot == NULL) {
13817
0
  if (sax != NULL)
13818
0
      ctxt->sax = oldsax;
13819
0
  xmlFreeParserCtxt(ctxt);
13820
0
  newDoc->intSubset = NULL;
13821
0
  newDoc->extSubset = NULL;
13822
0
        xmlFreeDoc(newDoc);
13823
0
  return(-1);
13824
0
    }
13825
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13826
0
    nodePush(ctxt, newRoot);
13827
0
    if (doc == NULL) {
13828
0
  ctxt->myDoc = newDoc;
13829
0
    } else {
13830
0
  ctxt->myDoc = newDoc;
13831
0
  newDoc->children->doc = doc;
13832
  /* Ensure that doc has XML spec namespace */
13833
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13834
0
  newDoc->oldNs = doc->oldNs;
13835
0
    }
13836
0
    ctxt->instate = XML_PARSER_CONTENT;
13837
0
    ctxt->depth = depth;
13838
13839
    /*
13840
     * Doing validity checking on chunk doesn't make sense
13841
     */
13842
0
    ctxt->validate = 0;
13843
0
    ctxt->loadsubset = 0;
13844
0
    xmlDetectSAX2(ctxt);
13845
13846
0
    if ( doc != NULL ){
13847
0
        content = doc->children;
13848
0
        doc->children = NULL;
13849
0
        xmlParseContent(ctxt);
13850
0
        doc->children = content;
13851
0
    }
13852
0
    else {
13853
0
        xmlParseContent(ctxt);
13854
0
    }
13855
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13856
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13857
0
    } else if (RAW != 0) {
13858
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13859
0
    }
13860
0
    if (ctxt->node != newDoc->children) {
13861
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13862
0
    }
13863
13864
0
    if (!ctxt->wellFormed) {
13865
0
        if (ctxt->errNo == 0)
13866
0
      ret = 1;
13867
0
  else
13868
0
      ret = ctxt->errNo;
13869
0
    } else {
13870
0
      ret = 0;
13871
0
    }
13872
13873
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13874
0
  xmlNodePtr cur;
13875
13876
  /*
13877
   * Return the newly created nodeset after unlinking it from
13878
   * they pseudo parent.
13879
   */
13880
0
  cur = newDoc->children->children;
13881
0
  *lst = cur;
13882
0
  while (cur != NULL) {
13883
0
      xmlSetTreeDoc(cur, doc);
13884
0
      cur->parent = NULL;
13885
0
      cur = cur->next;
13886
0
  }
13887
0
  newDoc->children->children = NULL;
13888
0
    }
13889
13890
0
    if (sax != NULL)
13891
0
  ctxt->sax = oldsax;
13892
0
    xmlFreeParserCtxt(ctxt);
13893
0
    newDoc->intSubset = NULL;
13894
0
    newDoc->extSubset = NULL;
13895
0
    newDoc->oldNs = NULL;
13896
0
    xmlFreeDoc(newDoc);
13897
13898
0
    return(ret);
13899
0
}
13900
13901
/**
13902
 * xmlSAXParseEntity:
13903
 * @sax:  the SAX handler block
13904
 * @filename:  the filename
13905
 *
13906
 * parse an XML external entity out of context and build a tree.
13907
 * It use the given SAX function block to handle the parsing callback.
13908
 * If sax is NULL, fallback to the default DOM tree building routines.
13909
 *
13910
 * [78] extParsedEnt ::= TextDecl? content
13911
 *
13912
 * This correspond to a "Well Balanced" chunk
13913
 *
13914
 * Returns the resulting document tree
13915
 */
13916
13917
xmlDocPtr
13918
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13919
0
    xmlDocPtr ret;
13920
0
    xmlParserCtxtPtr ctxt;
13921
13922
0
    ctxt = xmlCreateFileParserCtxt(filename);
13923
0
    if (ctxt == NULL) {
13924
0
  return(NULL);
13925
0
    }
13926
0
    if (sax != NULL) {
13927
0
  if (ctxt->sax != NULL)
13928
0
      xmlFree(ctxt->sax);
13929
0
        ctxt->sax = sax;
13930
0
        ctxt->userData = NULL;
13931
0
    }
13932
13933
0
    xmlParseExtParsedEnt(ctxt);
13934
13935
0
    if (ctxt->wellFormed)
13936
0
  ret = ctxt->myDoc;
13937
0
    else {
13938
0
        ret = NULL;
13939
0
        xmlFreeDoc(ctxt->myDoc);
13940
0
        ctxt->myDoc = NULL;
13941
0
    }
13942
0
    if (sax != NULL)
13943
0
        ctxt->sax = NULL;
13944
0
    xmlFreeParserCtxt(ctxt);
13945
13946
0
    return(ret);
13947
0
}
13948
13949
/**
13950
 * xmlParseEntity:
13951
 * @filename:  the filename
13952
 *
13953
 * parse an XML external entity out of context and build a tree.
13954
 *
13955
 * [78] extParsedEnt ::= TextDecl? content
13956
 *
13957
 * This correspond to a "Well Balanced" chunk
13958
 *
13959
 * Returns the resulting document tree
13960
 */
13961
13962
xmlDocPtr
13963
0
xmlParseEntity(const char *filename) {
13964
0
    return(xmlSAXParseEntity(NULL, filename));
13965
0
}
13966
#endif /* LIBXML_SAX1_ENABLED */
13967
13968
/**
13969
 * xmlCreateEntityParserCtxtInternal:
13970
 * @URL:  the entity URL
13971
 * @ID:  the entity PUBLIC ID
13972
 * @base:  a possible base for the target URI
13973
 * @pctx:  parser context used to set options on new context
13974
 *
13975
 * Create a parser context for an external entity
13976
 * Automatic support for ZLIB/Compress compressed document is provided
13977
 * by default if found at compile-time.
13978
 *
13979
 * Returns the new parser context or NULL
13980
 */
13981
static xmlParserCtxtPtr
13982
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13983
0
                    const xmlChar *base, xmlParserCtxtPtr pctx) {
13984
0
    xmlParserCtxtPtr ctxt;
13985
0
    xmlParserInputPtr inputStream;
13986
0
    char *directory = NULL;
13987
0
    xmlChar *uri;
13988
13989
0
    ctxt = xmlNewParserCtxt();
13990
0
    if (ctxt == NULL) {
13991
0
  return(NULL);
13992
0
    }
13993
13994
0
    if (pctx != NULL) {
13995
0
        ctxt->options = pctx->options;
13996
0
        ctxt->_private = pctx->_private;
13997
0
    }
13998
13999
0
    uri = xmlBuildURI(URL, base);
14000
14001
0
    if (uri == NULL) {
14002
0
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14003
0
  if (inputStream == NULL) {
14004
0
      xmlFreeParserCtxt(ctxt);
14005
0
      return(NULL);
14006
0
  }
14007
14008
0
  inputPush(ctxt, inputStream);
14009
14010
0
  if ((ctxt->directory == NULL) && (directory == NULL))
14011
0
      directory = xmlParserGetDirectory((char *)URL);
14012
0
  if ((ctxt->directory == NULL) && (directory != NULL))
14013
0
      ctxt->directory = directory;
14014
0
    } else {
14015
0
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14016
0
  if (inputStream == NULL) {
14017
0
      xmlFree(uri);
14018
0
      xmlFreeParserCtxt(ctxt);
14019
0
      return(NULL);
14020
0
  }
14021
14022
0
  inputPush(ctxt, inputStream);
14023
14024
0
  if ((ctxt->directory == NULL) && (directory == NULL))
14025
0
      directory = xmlParserGetDirectory((char *)uri);
14026
0
  if ((ctxt->directory == NULL) && (directory != NULL))
14027
0
      ctxt->directory = directory;
14028
0
  xmlFree(uri);
14029
0
    }
14030
0
    return(ctxt);
14031
0
}
14032
14033
/**
14034
 * xmlCreateEntityParserCtxt:
14035
 * @URL:  the entity URL
14036
 * @ID:  the entity PUBLIC ID
14037
 * @base:  a possible base for the target URI
14038
 *
14039
 * Create a parser context for an external entity
14040
 * Automatic support for ZLIB/Compress compressed document is provided
14041
 * by default if found at compile-time.
14042
 *
14043
 * Returns the new parser context or NULL
14044
 */
14045
xmlParserCtxtPtr
14046
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14047
0
                    const xmlChar *base) {
14048
0
    return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14049
14050
0
}
14051
14052
/************************************************************************
14053
 *                  *
14054
 *    Front ends when parsing from a file     *
14055
 *                  *
14056
 ************************************************************************/
14057
14058
/**
14059
 * xmlCreateURLParserCtxt:
14060
 * @filename:  the filename or URL
14061
 * @options:  a combination of xmlParserOption
14062
 *
14063
 * Create a parser context for a file or URL content.
14064
 * Automatic support for ZLIB/Compress compressed document is provided
14065
 * by default if found at compile-time and for file accesses
14066
 *
14067
 * Returns the new parser context or NULL
14068
 */
14069
xmlParserCtxtPtr
14070
xmlCreateURLParserCtxt(const char *filename, int options)
14071
0
{
14072
0
    xmlParserCtxtPtr ctxt;
14073
0
    xmlParserInputPtr inputStream;
14074
0
    char *directory = NULL;
14075
14076
0
    ctxt = xmlNewParserCtxt();
14077
0
    if (ctxt == NULL) {
14078
0
  xmlErrMemory(NULL, "cannot allocate parser context");
14079
0
  return(NULL);
14080
0
    }
14081
14082
0
    if (options)
14083
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14084
0
    ctxt->linenumbers = 1;
14085
14086
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14087
0
    if (inputStream == NULL) {
14088
0
  xmlFreeParserCtxt(ctxt);
14089
0
  return(NULL);
14090
0
    }
14091
14092
0
    inputPush(ctxt, inputStream);
14093
0
    if ((ctxt->directory == NULL) && (directory == NULL))
14094
0
        directory = xmlParserGetDirectory(filename);
14095
0
    if ((ctxt->directory == NULL) && (directory != NULL))
14096
0
        ctxt->directory = directory;
14097
14098
0
    return(ctxt);
14099
0
}
14100
14101
/**
14102
 * xmlCreateFileParserCtxt:
14103
 * @filename:  the filename
14104
 *
14105
 * Create a parser context for a file content.
14106
 * Automatic support for ZLIB/Compress compressed document is provided
14107
 * by default if found at compile-time.
14108
 *
14109
 * Returns the new parser context or NULL
14110
 */
14111
xmlParserCtxtPtr
14112
xmlCreateFileParserCtxt(const char *filename)
14113
0
{
14114
0
    return(xmlCreateURLParserCtxt(filename, 0));
14115
0
}
14116
14117
#ifdef LIBXML_SAX1_ENABLED
14118
/**
14119
 * xmlSAXParseFileWithData:
14120
 * @sax:  the SAX handler block
14121
 * @filename:  the filename
14122
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14123
 *             documents
14124
 * @data:  the userdata
14125
 *
14126
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14127
 * compressed document is provided by default if found at compile-time.
14128
 * It use the given SAX function block to handle the parsing callback.
14129
 * If sax is NULL, fallback to the default DOM tree building routines.
14130
 *
14131
 * User data (void *) is stored within the parser context in the
14132
 * context's _private member, so it is available nearly everywhere in libxml
14133
 *
14134
 * Returns the resulting document tree
14135
 */
14136
14137
xmlDocPtr
14138
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14139
0
                        int recovery, void *data) {
14140
0
    xmlDocPtr ret;
14141
0
    xmlParserCtxtPtr ctxt;
14142
14143
0
    xmlInitParser();
14144
14145
0
    ctxt = xmlCreateFileParserCtxt(filename);
14146
0
    if (ctxt == NULL) {
14147
0
  return(NULL);
14148
0
    }
14149
0
    if (sax != NULL) {
14150
0
  if (ctxt->sax != NULL)
14151
0
      xmlFree(ctxt->sax);
14152
0
        ctxt->sax = sax;
14153
0
    }
14154
0
    xmlDetectSAX2(ctxt);
14155
0
    if (data!=NULL) {
14156
0
  ctxt->_private = data;
14157
0
    }
14158
14159
0
    if (ctxt->directory == NULL)
14160
0
        ctxt->directory = xmlParserGetDirectory(filename);
14161
14162
0
    ctxt->recovery = recovery;
14163
14164
0
    xmlParseDocument(ctxt);
14165
14166
0
    if ((ctxt->wellFormed) || recovery) {
14167
0
        ret = ctxt->myDoc;
14168
0
  if (ret != NULL) {
14169
0
      if (ctxt->input->buf->compressed > 0)
14170
0
    ret->compression = 9;
14171
0
      else
14172
0
    ret->compression = ctxt->input->buf->compressed;
14173
0
  }
14174
0
    }
14175
0
    else {
14176
0
       ret = NULL;
14177
0
       xmlFreeDoc(ctxt->myDoc);
14178
0
       ctxt->myDoc = NULL;
14179
0
    }
14180
0
    if (sax != NULL)
14181
0
        ctxt->sax = NULL;
14182
0
    xmlFreeParserCtxt(ctxt);
14183
14184
0
    return(ret);
14185
0
}
14186
14187
/**
14188
 * xmlSAXParseFile:
14189
 * @sax:  the SAX handler block
14190
 * @filename:  the filename
14191
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14192
 *             documents
14193
 *
14194
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14195
 * compressed document is provided by default if found at compile-time.
14196
 * It use the given SAX function block to handle the parsing callback.
14197
 * If sax is NULL, fallback to the default DOM tree building routines.
14198
 *
14199
 * Returns the resulting document tree
14200
 */
14201
14202
xmlDocPtr
14203
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14204
0
                          int recovery) {
14205
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14206
0
}
14207
14208
/**
14209
 * xmlRecoverDoc:
14210
 * @cur:  a pointer to an array of xmlChar
14211
 *
14212
 * parse an XML in-memory document and build a tree.
14213
 * In the case the document is not Well Formed, a attempt to build a
14214
 * tree is tried anyway
14215
 *
14216
 * Returns the resulting document tree or NULL in case of failure
14217
 */
14218
14219
xmlDocPtr
14220
0
xmlRecoverDoc(const xmlChar *cur) {
14221
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14222
0
}
14223
14224
/**
14225
 * xmlParseFile:
14226
 * @filename:  the filename
14227
 *
14228
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14229
 * compressed document is provided by default if found at compile-time.
14230
 *
14231
 * Returns the resulting document tree if the file was wellformed,
14232
 * NULL otherwise.
14233
 */
14234
14235
xmlDocPtr
14236
0
xmlParseFile(const char *filename) {
14237
0
    return(xmlSAXParseFile(NULL, filename, 0));
14238
0
}
14239
14240
/**
14241
 * xmlRecoverFile:
14242
 * @filename:  the filename
14243
 *
14244
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14245
 * compressed document is provided by default if found at compile-time.
14246
 * In the case the document is not Well Formed, it attempts to build
14247
 * a tree anyway
14248
 *
14249
 * Returns the resulting document tree or NULL in case of failure
14250
 */
14251
14252
xmlDocPtr
14253
0
xmlRecoverFile(const char *filename) {
14254
0
    return(xmlSAXParseFile(NULL, filename, 1));
14255
0
}
14256
14257
14258
/**
14259
 * xmlSetupParserForBuffer:
14260
 * @ctxt:  an XML parser context
14261
 * @buffer:  a xmlChar * buffer
14262
 * @filename:  a file name
14263
 *
14264
 * Setup the parser context to parse a new buffer; Clears any prior
14265
 * contents from the parser context. The buffer parameter must not be
14266
 * NULL, but the filename parameter can be
14267
 */
14268
void
14269
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14270
                             const char* filename)
14271
0
{
14272
0
    xmlParserInputPtr input;
14273
14274
0
    if ((ctxt == NULL) || (buffer == NULL))
14275
0
        return;
14276
14277
0
    input = xmlNewInputStream(ctxt);
14278
0
    if (input == NULL) {
14279
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14280
0
        xmlClearParserCtxt(ctxt);
14281
0
        return;
14282
0
    }
14283
14284
0
    xmlClearParserCtxt(ctxt);
14285
0
    if (filename != NULL)
14286
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14287
0
    input->base = buffer;
14288
0
    input->cur = buffer;
14289
0
    input->end = &buffer[xmlStrlen(buffer)];
14290
0
    inputPush(ctxt, input);
14291
0
}
14292
14293
/**
14294
 * xmlSAXUserParseFile:
14295
 * @sax:  a SAX handler
14296
 * @user_data:  The user data returned on SAX callbacks
14297
 * @filename:  a file name
14298
 *
14299
 * parse an XML file and call the given SAX handler routines.
14300
 * Automatic support for ZLIB/Compress compressed document is provided
14301
 *
14302
 * Returns 0 in case of success or a error number otherwise
14303
 */
14304
int
14305
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14306
0
                    const char *filename) {
14307
0
    int ret = 0;
14308
0
    xmlParserCtxtPtr ctxt;
14309
14310
0
    ctxt = xmlCreateFileParserCtxt(filename);
14311
0
    if (ctxt == NULL) return -1;
14312
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14313
0
  xmlFree(ctxt->sax);
14314
0
    ctxt->sax = sax;
14315
0
    xmlDetectSAX2(ctxt);
14316
14317
0
    if (user_data != NULL)
14318
0
  ctxt->userData = user_data;
14319
14320
0
    xmlParseDocument(ctxt);
14321
14322
0
    if (ctxt->wellFormed)
14323
0
  ret = 0;
14324
0
    else {
14325
0
        if (ctxt->errNo != 0)
14326
0
      ret = ctxt->errNo;
14327
0
  else
14328
0
      ret = -1;
14329
0
    }
14330
0
    if (sax != NULL)
14331
0
  ctxt->sax = NULL;
14332
0
    if (ctxt->myDoc != NULL) {
14333
0
        xmlFreeDoc(ctxt->myDoc);
14334
0
  ctxt->myDoc = NULL;
14335
0
    }
14336
0
    xmlFreeParserCtxt(ctxt);
14337
14338
0
    return ret;
14339
0
}
14340
#endif /* LIBXML_SAX1_ENABLED */
14341
14342
/************************************************************************
14343
 *                  *
14344
 *    Front ends when parsing from memory     *
14345
 *                  *
14346
 ************************************************************************/
14347
14348
/**
14349
 * xmlCreateMemoryParserCtxt:
14350
 * @buffer:  a pointer to a char array
14351
 * @size:  the size of the array
14352
 *
14353
 * Create a parser context for an XML in-memory document.
14354
 *
14355
 * Returns the new parser context or NULL
14356
 */
14357
xmlParserCtxtPtr
14358
205k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14359
205k
    xmlParserCtxtPtr ctxt;
14360
205k
    xmlParserInputPtr input;
14361
205k
    xmlParserInputBufferPtr buf;
14362
14363
205k
    if (buffer == NULL)
14364
0
  return(NULL);
14365
205k
    if (size <= 0)
14366
4.48k
  return(NULL);
14367
14368
200k
    ctxt = xmlNewParserCtxt();
14369
200k
    if (ctxt == NULL)
14370
0
  return(NULL);
14371
14372
    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14373
200k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14374
200k
    if (buf == NULL) {
14375
0
  xmlFreeParserCtxt(ctxt);
14376
0
  return(NULL);
14377
0
    }
14378
14379
200k
    input = xmlNewInputStream(ctxt);
14380
200k
    if (input == NULL) {
14381
0
  xmlFreeParserInputBuffer(buf);
14382
0
  xmlFreeParserCtxt(ctxt);
14383
0
  return(NULL);
14384
0
    }
14385
14386
200k
    input->filename = NULL;
14387
200k
    input->buf = buf;
14388
200k
    xmlBufResetInput(input->buf->buffer, input);
14389
14390
200k
    inputPush(ctxt, input);
14391
200k
    return(ctxt);
14392
200k
}
14393
14394
#ifdef LIBXML_SAX1_ENABLED
14395
/**
14396
 * xmlSAXParseMemoryWithData:
14397
 * @sax:  the SAX handler block
14398
 * @buffer:  an pointer to a char array
14399
 * @size:  the size of the array
14400
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14401
 *             documents
14402
 * @data:  the userdata
14403
 *
14404
 * parse an XML in-memory block and use the given SAX function block
14405
 * to handle the parsing callback. If sax is NULL, fallback to the default
14406
 * DOM tree building routines.
14407
 *
14408
 * User data (void *) is stored within the parser context in the
14409
 * context's _private member, so it is available nearly everywhere in libxml
14410
 *
14411
 * Returns the resulting document tree
14412
 */
14413
14414
xmlDocPtr
14415
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14416
0
            int size, int recovery, void *data) {
14417
0
    xmlDocPtr ret;
14418
0
    xmlParserCtxtPtr ctxt;
14419
14420
0
    xmlInitParser();
14421
14422
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
  if (ctxt->sax != NULL)
14426
0
      xmlFree(ctxt->sax);
14427
0
        ctxt->sax = sax;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
0
    if (data!=NULL) {
14431
0
  ctxt->_private=data;
14432
0
    }
14433
14434
0
    ctxt->recovery = recovery;
14435
14436
0
    xmlParseDocument(ctxt);
14437
14438
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14439
0
    else {
14440
0
       ret = NULL;
14441
0
       xmlFreeDoc(ctxt->myDoc);
14442
0
       ctxt->myDoc = NULL;
14443
0
    }
14444
0
    if (sax != NULL)
14445
0
  ctxt->sax = NULL;
14446
0
    xmlFreeParserCtxt(ctxt);
14447
14448
0
    return(ret);
14449
0
}
14450
14451
/**
14452
 * xmlSAXParseMemory:
14453
 * @sax:  the SAX handler block
14454
 * @buffer:  an pointer to a char array
14455
 * @size:  the size of the array
14456
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14457
 *             documents
14458
 *
14459
 * parse an XML in-memory block and use the given SAX function block
14460
 * to handle the parsing callback. If sax is NULL, fallback to the default
14461
 * DOM tree building routines.
14462
 *
14463
 * Returns the resulting document tree
14464
 */
14465
xmlDocPtr
14466
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14467
0
            int size, int recovery) {
14468
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14469
0
}
14470
14471
/**
14472
 * xmlParseMemory:
14473
 * @buffer:  an pointer to a char array
14474
 * @size:  the size of the array
14475
 *
14476
 * parse an XML in-memory block and build a tree.
14477
 *
14478
 * Returns the resulting document tree
14479
 */
14480
14481
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14482
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14483
0
}
14484
14485
/**
14486
 * xmlRecoverMemory:
14487
 * @buffer:  an pointer to a char array
14488
 * @size:  the size of the array
14489
 *
14490
 * parse an XML in-memory block and build a tree.
14491
 * In the case the document is not Well Formed, an attempt to
14492
 * build a tree is tried anyway
14493
 *
14494
 * Returns the resulting document tree or NULL in case of error
14495
 */
14496
14497
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14498
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14499
0
}
14500
14501
/**
14502
 * xmlSAXUserParseMemory:
14503
 * @sax:  a SAX handler
14504
 * @user_data:  The user data returned on SAX callbacks
14505
 * @buffer:  an in-memory XML document input
14506
 * @size:  the length of the XML document in bytes
14507
 *
14508
 * A better SAX parsing routine.
14509
 * parse an XML in-memory buffer and call the given SAX handler routines.
14510
 *
14511
 * Returns 0 in case of success or a error number otherwise
14512
 */
14513
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14514
0
        const char *buffer, int size) {
14515
0
    int ret = 0;
14516
0
    xmlParserCtxtPtr ctxt;
14517
14518
0
    xmlInitParser();
14519
14520
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14521
0
    if (ctxt == NULL) return -1;
14522
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14523
0
        xmlFree(ctxt->sax);
14524
0
    ctxt->sax = sax;
14525
0
    xmlDetectSAX2(ctxt);
14526
14527
0
    if (user_data != NULL)
14528
0
  ctxt->userData = user_data;
14529
14530
0
    xmlParseDocument(ctxt);
14531
14532
0
    if (ctxt->wellFormed)
14533
0
  ret = 0;
14534
0
    else {
14535
0
        if (ctxt->errNo != 0)
14536
0
      ret = ctxt->errNo;
14537
0
  else
14538
0
      ret = -1;
14539
0
    }
14540
0
    if (sax != NULL)
14541
0
        ctxt->sax = NULL;
14542
0
    if (ctxt->myDoc != NULL) {
14543
0
        xmlFreeDoc(ctxt->myDoc);
14544
0
  ctxt->myDoc = NULL;
14545
0
    }
14546
0
    xmlFreeParserCtxt(ctxt);
14547
14548
0
    return ret;
14549
0
}
14550
#endif /* LIBXML_SAX1_ENABLED */
14551
14552
/**
14553
 * xmlCreateDocParserCtxt:
14554
 * @cur:  a pointer to an array of xmlChar
14555
 *
14556
 * Creates a parser context for an XML in-memory document.
14557
 *
14558
 * Returns the new parser context or NULL
14559
 */
14560
xmlParserCtxtPtr
14561
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14562
0
    int len;
14563
14564
0
    if (cur == NULL)
14565
0
  return(NULL);
14566
0
    len = xmlStrlen(cur);
14567
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14568
0
}
14569
14570
#ifdef LIBXML_SAX1_ENABLED
14571
/**
14572
 * xmlSAXParseDoc:
14573
 * @sax:  the SAX handler block
14574
 * @cur:  a pointer to an array of xmlChar
14575
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14576
 *             documents
14577
 *
14578
 * parse an XML in-memory document and build a tree.
14579
 * It use the given SAX function block to handle the parsing callback.
14580
 * If sax is NULL, fallback to the default DOM tree building routines.
14581
 *
14582
 * Returns the resulting document tree
14583
 */
14584
14585
xmlDocPtr
14586
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14587
0
    xmlDocPtr ret;
14588
0
    xmlParserCtxtPtr ctxt;
14589
0
    xmlSAXHandlerPtr oldsax = NULL;
14590
14591
0
    if (cur == NULL) return(NULL);
14592
14593
14594
0
    ctxt = xmlCreateDocParserCtxt(cur);
14595
0
    if (ctxt == NULL) return(NULL);
14596
0
    if (sax != NULL) {
14597
0
        oldsax = ctxt->sax;
14598
0
        ctxt->sax = sax;
14599
0
        ctxt->userData = NULL;
14600
0
    }
14601
0
    xmlDetectSAX2(ctxt);
14602
14603
0
    xmlParseDocument(ctxt);
14604
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14605
0
    else {
14606
0
       ret = NULL;
14607
0
       xmlFreeDoc(ctxt->myDoc);
14608
0
       ctxt->myDoc = NULL;
14609
0
    }
14610
0
    if (sax != NULL)
14611
0
  ctxt->sax = oldsax;
14612
0
    xmlFreeParserCtxt(ctxt);
14613
14614
0
    return(ret);
14615
0
}
14616
14617
/**
14618
 * xmlParseDoc:
14619
 * @cur:  a pointer to an array of xmlChar
14620
 *
14621
 * parse an XML in-memory document and build a tree.
14622
 *
14623
 * Returns the resulting document tree
14624
 */
14625
14626
xmlDocPtr
14627
0
xmlParseDoc(const xmlChar *cur) {
14628
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14629
0
}
14630
#endif /* LIBXML_SAX1_ENABLED */
14631
14632
#ifdef LIBXML_LEGACY_ENABLED
14633
/************************************************************************
14634
 *                  *
14635
 *  Specific function to keep track of entities references    *
14636
 *  and used by the XSLT debugger         *
14637
 *                  *
14638
 ************************************************************************/
14639
14640
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14641
14642
/**
14643
 * xmlAddEntityReference:
14644
 * @ent : A valid entity
14645
 * @firstNode : A valid first node for children of entity
14646
 * @lastNode : A valid last node of children entity
14647
 *
14648
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14649
 */
14650
static void
14651
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14652
                      xmlNodePtr lastNode)
14653
0
{
14654
0
    if (xmlEntityRefFunc != NULL) {
14655
0
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14656
0
    }
14657
0
}
14658
14659
14660
/**
14661
 * xmlSetEntityReferenceFunc:
14662
 * @func: A valid function
14663
 *
14664
 * Set the function to call call back when a xml reference has been made
14665
 */
14666
void
14667
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14668
0
{
14669
0
    xmlEntityRefFunc = func;
14670
0
}
14671
#endif /* LIBXML_LEGACY_ENABLED */
14672
14673
/************************************************************************
14674
 *                  *
14675
 *        Miscellaneous       *
14676
 *                  *
14677
 ************************************************************************/
14678
14679
#ifdef LIBXML_XPATH_ENABLED
14680
#include <libxml/xpath.h>
14681
#endif
14682
14683
extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14684
static int xmlParserInitialized = 0;
14685
14686
/**
14687
 * xmlInitParser:
14688
 *
14689
 * Initialization function for the XML parser.
14690
 * This is not reentrant. Call once before processing in case of
14691
 * use in multithreaded programs.
14692
 */
14693
14694
void
14695
0
xmlInitParser(void) {
14696
0
    if (xmlParserInitialized != 0)
14697
0
  return;
14698
14699
0
#ifdef LIBXML_THREAD_ENABLED
14700
0
    __xmlGlobalInitMutexLock();
14701
0
    if (xmlParserInitialized == 0) {
14702
0
#endif
14703
0
  xmlInitThreads();
14704
0
  xmlInitGlobals();
14705
0
  if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14706
0
      (xmlGenericError == NULL))
14707
0
      initGenericErrorDefaultFunc(NULL);
14708
0
  xmlInitMemory();
14709
0
        xmlInitializeDict();
14710
0
  xmlInitCharEncodingHandlers();
14711
0
  xmlDefaultSAXHandlerInit();
14712
0
  xmlRegisterDefaultInputCallbacks();
14713
0
#ifdef LIBXML_OUTPUT_ENABLED
14714
0
  xmlRegisterDefaultOutputCallbacks();
14715
0
#endif /* LIBXML_OUTPUT_ENABLED */
14716
0
#ifdef LIBXML_HTML_ENABLED
14717
0
  htmlInitAutoClose();
14718
0
  htmlDefaultSAXHandlerInit();
14719
0
#endif
14720
0
#ifdef LIBXML_XPATH_ENABLED
14721
0
  xmlXPathInit();
14722
0
#endif
14723
0
  xmlParserInitialized = 1;
14724
0
#ifdef LIBXML_THREAD_ENABLED
14725
0
    }
14726
0
    __xmlGlobalInitMutexUnlock();
14727
0
#endif
14728
0
}
14729
14730
/**
14731
 * xmlCleanupParser:
14732
 *
14733
 * This function name is somewhat misleading. It does not clean up
14734
 * parser state, it cleans up memory allocated by the library itself.
14735
 * It is a cleanup function for the XML library. It tries to reclaim all
14736
 * related global memory allocated for the library processing.
14737
 * It doesn't deallocate any document related memory. One should
14738
 * call xmlCleanupParser() only when the process has finished using
14739
 * the library and all XML/HTML documents built with it.
14740
 * See also xmlInitParser() which has the opposite function of preparing
14741
 * the library for operations.
14742
 *
14743
 * WARNING: if your application is multithreaded or has plugin support
14744
 *          calling this may crash the application if another thread or
14745
 *          a plugin is still using libxml2. It's sometimes very hard to
14746
 *          guess if libxml2 is in use in the application, some libraries
14747
 *          or plugins may use it without notice. In case of doubt abstain
14748
 *          from calling this function or do it just before calling exit()
14749
 *          to avoid leak reports from valgrind !
14750
 */
14751
14752
void
14753
0
xmlCleanupParser(void) {
14754
0
    if (!xmlParserInitialized)
14755
0
  return;
14756
14757
0
    xmlCleanupCharEncodingHandlers();
14758
0
#ifdef LIBXML_CATALOG_ENABLED
14759
0
    xmlCatalogCleanup();
14760
0
#endif
14761
0
    xmlDictCleanup();
14762
0
    xmlCleanupInputCallbacks();
14763
0
#ifdef LIBXML_OUTPUT_ENABLED
14764
0
    xmlCleanupOutputCallbacks();
14765
0
#endif
14766
0
#ifdef LIBXML_SCHEMAS_ENABLED
14767
0
    xmlSchemaCleanupTypes();
14768
0
    xmlRelaxNGCleanupTypes();
14769
0
#endif
14770
0
    xmlResetLastError();
14771
0
    xmlCleanupGlobals();
14772
0
    xmlCleanupThreads(); /* must be last if called not from the main thread */
14773
0
    xmlCleanupMemory();
14774
0
    xmlParserInitialized = 0;
14775
0
}
14776
14777
/************************************************************************
14778
 *                  *
14779
 *  New set (2.6.0) of simpler and more flexible APIs   *
14780
 *                  *
14781
 ************************************************************************/
14782
14783
/**
14784
 * DICT_FREE:
14785
 * @str:  a string
14786
 *
14787
 * Free a string if it is not owned by the "dict" dictionary in the
14788
 * current scope
14789
 */
14790
#define DICT_FREE(str)            \
14791
0
  if ((str) && ((!dict) ||       \
14792
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14793
0
      xmlFree((char *)(str));
14794
14795
/**
14796
 * xmlCtxtReset:
14797
 * @ctxt: an XML parser context
14798
 *
14799
 * Reset a parser context
14800
 */
14801
void
14802
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14803
0
{
14804
0
    xmlParserInputPtr input;
14805
0
    xmlDictPtr dict;
14806
14807
0
    if (ctxt == NULL)
14808
0
        return;
14809
14810
0
    dict = ctxt->dict;
14811
14812
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14813
0
        xmlFreeInputStream(input);
14814
0
    }
14815
0
    ctxt->inputNr = 0;
14816
0
    ctxt->input = NULL;
14817
14818
0
    ctxt->spaceNr = 0;
14819
0
    if (ctxt->spaceTab != NULL) {
14820
0
  ctxt->spaceTab[0] = -1;
14821
0
  ctxt->space = &ctxt->spaceTab[0];
14822
0
    } else {
14823
0
        ctxt->space = NULL;
14824
0
    }
14825
14826
14827
0
    ctxt->nodeNr = 0;
14828
0
    ctxt->node = NULL;
14829
14830
0
    ctxt->nameNr = 0;
14831
0
    ctxt->name = NULL;
14832
14833
0
    DICT_FREE(ctxt->version);
14834
0
    ctxt->version = NULL;
14835
0
    DICT_FREE(ctxt->encoding);
14836
0
    ctxt->encoding = NULL;
14837
0
    DICT_FREE(ctxt->directory);
14838
0
    ctxt->directory = NULL;
14839
0
    DICT_FREE(ctxt->extSubURI);
14840
0
    ctxt->extSubURI = NULL;
14841
0
    DICT_FREE(ctxt->extSubSystem);
14842
0
    ctxt->extSubSystem = NULL;
14843
0
    if (ctxt->myDoc != NULL)
14844
0
        xmlFreeDoc(ctxt->myDoc);
14845
0
    ctxt->myDoc = NULL;
14846
14847
0
    ctxt->standalone = -1;
14848
0
    ctxt->hasExternalSubset = 0;
14849
0
    ctxt->hasPErefs = 0;
14850
0
    ctxt->html = 0;
14851
0
    ctxt->external = 0;
14852
0
    ctxt->instate = XML_PARSER_START;
14853
0
    ctxt->token = 0;
14854
14855
0
    ctxt->wellFormed = 1;
14856
0
    ctxt->nsWellFormed = 1;
14857
0
    ctxt->disableSAX = 0;
14858
0
    ctxt->valid = 1;
14859
#if 0
14860
    ctxt->vctxt.userData = ctxt;
14861
    ctxt->vctxt.error = xmlParserValidityError;
14862
    ctxt->vctxt.warning = xmlParserValidityWarning;
14863
#endif
14864
0
    ctxt->record_info = 0;
14865
0
    ctxt->nbChars = 0;
14866
0
    ctxt->checkIndex = 0;
14867
0
    ctxt->inSubset = 0;
14868
0
    ctxt->errNo = XML_ERR_OK;
14869
0
    ctxt->depth = 0;
14870
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14871
0
    ctxt->catalogs = NULL;
14872
0
    ctxt->nbentities = 0;
14873
0
    ctxt->sizeentities = 0;
14874
0
    ctxt->sizeentcopy = 0;
14875
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14876
14877
0
    if (ctxt->attsDefault != NULL) {
14878
0
        xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14879
0
        ctxt->attsDefault = NULL;
14880
0
    }
14881
0
    if (ctxt->attsSpecial != NULL) {
14882
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14883
0
        ctxt->attsSpecial = NULL;
14884
0
    }
14885
14886
0
#ifdef LIBXML_CATALOG_ENABLED
14887
0
    if (ctxt->catalogs != NULL)
14888
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14889
0
#endif
14890
0
    if (ctxt->lastError.code != XML_ERR_OK)
14891
0
        xmlResetError(&ctxt->lastError);
14892
0
}
14893
14894
/**
14895
 * xmlCtxtResetPush:
14896
 * @ctxt: an XML parser context
14897
 * @chunk:  a pointer to an array of chars
14898
 * @size:  number of chars in the array
14899
 * @filename:  an optional file name or URI
14900
 * @encoding:  the document encoding, or NULL
14901
 *
14902
 * Reset a push parser context
14903
 *
14904
 * Returns 0 in case of success and 1 in case of error
14905
 */
14906
int
14907
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14908
                 int size, const char *filename, const char *encoding)
14909
0
{
14910
0
    xmlParserInputPtr inputStream;
14911
0
    xmlParserInputBufferPtr buf;
14912
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14913
14914
0
    if (ctxt == NULL)
14915
0
        return(1);
14916
14917
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14918
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14919
14920
0
    buf = xmlAllocParserInputBuffer(enc);
14921
0
    if (buf == NULL)
14922
0
        return(1);
14923
14924
0
    if (ctxt == NULL) {
14925
0
        xmlFreeParserInputBuffer(buf);
14926
0
        return(1);
14927
0
    }
14928
14929
0
    xmlCtxtReset(ctxt);
14930
14931
0
    if (ctxt->pushTab == NULL) {
14932
0
        ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14933
0
                                      sizeof(xmlChar *));
14934
0
        if (ctxt->pushTab == NULL) {
14935
0
      xmlErrMemory(ctxt, NULL);
14936
0
            xmlFreeParserInputBuffer(buf);
14937
0
            return(1);
14938
0
        }
14939
0
    }
14940
14941
0
    if (filename == NULL) {
14942
0
        ctxt->directory = NULL;
14943
0
    } else {
14944
0
        ctxt->directory = xmlParserGetDirectory(filename);
14945
0
    }
14946
14947
0
    inputStream = xmlNewInputStream(ctxt);
14948
0
    if (inputStream == NULL) {
14949
0
        xmlFreeParserInputBuffer(buf);
14950
0
        return(1);
14951
0
    }
14952
14953
0
    if (filename == NULL)
14954
0
        inputStream->filename = NULL;
14955
0
    else
14956
0
        inputStream->filename = (char *)
14957
0
            xmlCanonicPath((const xmlChar *) filename);
14958
0
    inputStream->buf = buf;
14959
0
    xmlBufResetInput(buf->buffer, inputStream);
14960
14961
0
    inputPush(ctxt, inputStream);
14962
14963
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14964
0
        (ctxt->input->buf != NULL)) {
14965
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14966
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14967
14968
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14969
14970
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14971
#ifdef DEBUG_PUSH
14972
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14973
#endif
14974
0
    }
14975
14976
0
    if (encoding != NULL) {
14977
0
        xmlCharEncodingHandlerPtr hdlr;
14978
14979
0
        if (ctxt->encoding != NULL)
14980
0
      xmlFree((xmlChar *) ctxt->encoding);
14981
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14982
14983
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14984
0
        if (hdlr != NULL) {
14985
0
            xmlSwitchToEncoding(ctxt, hdlr);
14986
0
  } else {
14987
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14988
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14989
0
        }
14990
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14991
0
        xmlSwitchEncoding(ctxt, enc);
14992
0
    }
14993
14994
0
    return(0);
14995
0
}
14996
14997
14998
/**
14999
 * xmlCtxtUseOptionsInternal:
15000
 * @ctxt: an XML parser context
15001
 * @options:  a combination of xmlParserOption
15002
 * @encoding:  the user provided encoding to use
15003
 *
15004
 * Applies the options to the parser context
15005
 *
15006
 * Returns 0 in case of success, the set of unknown or unimplemented options
15007
 *         in case of error.
15008
 */
15009
static int
15010
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15011
234k
{
15012
234k
    if (ctxt == NULL)
15013
0
        return(-1);
15014
234k
    if (encoding != NULL) {
15015
0
        if (ctxt->encoding != NULL)
15016
0
      xmlFree((xmlChar *) ctxt->encoding);
15017
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15018
0
    }
15019
234k
    if (options & XML_PARSE_RECOVER) {
15020
224k
        ctxt->recovery = 1;
15021
224k
        options -= XML_PARSE_RECOVER;
15022
224k
  ctxt->options |= XML_PARSE_RECOVER;
15023
224k
    } else
15024
10.2k
        ctxt->recovery = 0;
15025
234k
    if (options & XML_PARSE_DTDLOAD) {
15026
0
        ctxt->loadsubset = XML_DETECT_IDS;
15027
0
        options -= XML_PARSE_DTDLOAD;
15028
0
  ctxt->options |= XML_PARSE_DTDLOAD;
15029
0
    } else
15030
234k
        ctxt->loadsubset = 0;
15031
234k
    if (options & XML_PARSE_DTDATTR) {
15032
0
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15033
0
        options -= XML_PARSE_DTDATTR;
15034
0
  ctxt->options |= XML_PARSE_DTDATTR;
15035
0
    }
15036
234k
    if (options & XML_PARSE_NOENT) {
15037
0
        ctxt->replaceEntities = 1;
15038
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
15039
0
        options -= XML_PARSE_NOENT;
15040
0
  ctxt->options |= XML_PARSE_NOENT;
15041
0
    } else
15042
234k
        ctxt->replaceEntities = 0;
15043
234k
    if (options & XML_PARSE_PEDANTIC) {
15044
0
        ctxt->pedantic = 1;
15045
0
        options -= XML_PARSE_PEDANTIC;
15046
0
  ctxt->options |= XML_PARSE_PEDANTIC;
15047
0
    } else
15048
234k
        ctxt->pedantic = 0;
15049
234k
    if (options & XML_PARSE_NOBLANKS) {
15050
234k
        ctxt->keepBlanks = 0;
15051
234k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15052
234k
        options -= XML_PARSE_NOBLANKS;
15053
234k
  ctxt->options |= XML_PARSE_NOBLANKS;
15054
234k
    } else
15055
0
        ctxt->keepBlanks = 1;
15056
234k
    if (options & XML_PARSE_DTDVALID) {
15057
0
        ctxt->validate = 1;
15058
0
        if (options & XML_PARSE_NOWARNING)
15059
0
            ctxt->vctxt.warning = NULL;
15060
0
        if (options & XML_PARSE_NOERROR)
15061
0
            ctxt->vctxt.error = NULL;
15062
0
        options -= XML_PARSE_DTDVALID;
15063
0
  ctxt->options |= XML_PARSE_DTDVALID;
15064
0
    } else
15065
234k
        ctxt->validate = 0;
15066
234k
    if (options & XML_PARSE_NOWARNING) {
15067
0
        ctxt->sax->warning = NULL;
15068
0
        options -= XML_PARSE_NOWARNING;
15069
0
    }
15070
234k
    if (options & XML_PARSE_NOERROR) {
15071
0
        ctxt->sax->error = NULL;
15072
0
        ctxt->sax->fatalError = NULL;
15073
0
        options -= XML_PARSE_NOERROR;
15074
0
    }
15075
234k
#ifdef LIBXML_SAX1_ENABLED
15076
234k
    if (options & XML_PARSE_SAX1) {
15077
0
        ctxt->sax->startElement = xmlSAX2StartElement;
15078
0
        ctxt->sax->endElement = xmlSAX2EndElement;
15079
0
        ctxt->sax->startElementNs = NULL;
15080
0
        ctxt->sax->endElementNs = NULL;
15081
0
        ctxt->sax->initialized = 1;
15082
0
        options -= XML_PARSE_SAX1;
15083
0
  ctxt->options |= XML_PARSE_SAX1;
15084
0
    }
15085
234k
#endif /* LIBXML_SAX1_ENABLED */
15086
234k
    if (options & XML_PARSE_NODICT) {
15087
0
        ctxt->dictNames = 0;
15088
0
        options -= XML_PARSE_NODICT;
15089
0
  ctxt->options |= XML_PARSE_NODICT;
15090
234k
    } else {
15091
234k
        ctxt->dictNames = 1;
15092
234k
    }
15093
234k
    if (options & XML_PARSE_NOCDATA) {
15094
0
        ctxt->sax->cdataBlock = NULL;
15095
0
        options -= XML_PARSE_NOCDATA;
15096
0
  ctxt->options |= XML_PARSE_NOCDATA;
15097
0
    }
15098
234k
    if (options & XML_PARSE_NSCLEAN) {
15099
0
  ctxt->options |= XML_PARSE_NSCLEAN;
15100
0
        options -= XML_PARSE_NSCLEAN;
15101
0
    }
15102
234k
    if (options & XML_PARSE_NONET) {
15103
234k
  ctxt->options |= XML_PARSE_NONET;
15104
234k
        options -= XML_PARSE_NONET;
15105
234k
    }
15106
234k
    if (options & XML_PARSE_COMPACT) {
15107
234k
  ctxt->options |= XML_PARSE_COMPACT;
15108
234k
        options -= XML_PARSE_COMPACT;
15109
234k
    }
15110
234k
    if (options & XML_PARSE_OLD10) {
15111
0
  ctxt->options |= XML_PARSE_OLD10;
15112
0
        options -= XML_PARSE_OLD10;
15113
0
    }
15114
234k
    if (options & XML_PARSE_NOBASEFIX) {
15115
0
  ctxt->options |= XML_PARSE_NOBASEFIX;
15116
0
        options -= XML_PARSE_NOBASEFIX;
15117
0
    }
15118
234k
    if (options & XML_PARSE_HUGE) {
15119
0
  ctxt->options |= XML_PARSE_HUGE;
15120
0
        options -= XML_PARSE_HUGE;
15121
0
        if (ctxt->dict != NULL)
15122
0
            xmlDictSetLimit(ctxt->dict, 0);
15123
0
    }
15124
234k
    if (options & XML_PARSE_OLDSAX) {
15125
0
  ctxt->options |= XML_PARSE_OLDSAX;
15126
0
        options -= XML_PARSE_OLDSAX;
15127
0
    }
15128
234k
    if (options & XML_PARSE_IGNORE_ENC) {
15129
0
  ctxt->options |= XML_PARSE_IGNORE_ENC;
15130
0
        options -= XML_PARSE_IGNORE_ENC;
15131
0
    }
15132
234k
    if (options & XML_PARSE_BIG_LINES) {
15133
0
  ctxt->options |= XML_PARSE_BIG_LINES;
15134
0
        options -= XML_PARSE_BIG_LINES;
15135
0
    }
15136
234k
    ctxt->linenumbers = 1;
15137
234k
    return (options);
15138
234k
}
15139
15140
/**
15141
 * xmlCtxtUseOptions:
15142
 * @ctxt: an XML parser context
15143
 * @options:  a combination of xmlParserOption
15144
 *
15145
 * Applies the options to the parser context
15146
 *
15147
 * Returns 0 in case of success, the set of unknown or unimplemented options
15148
 *         in case of error.
15149
 */
15150
int
15151
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15152
234k
{
15153
234k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15154
234k
}
15155
15156
/**
15157
 * xmlDoRead:
15158
 * @ctxt:  an XML parser context
15159
 * @URL:  the base URL to use for the document
15160
 * @encoding:  the document encoding, or NULL
15161
 * @options:  a combination of xmlParserOption
15162
 * @reuse:  keep the context for reuse
15163
 *
15164
 * Common front-end for the xmlRead functions
15165
 *
15166
 * Returns the resulting document tree or NULL
15167
 */
15168
static xmlDocPtr
15169
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15170
          int options, int reuse)
15171
0
{
15172
0
    xmlDocPtr ret;
15173
15174
0
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15175
0
    if (encoding != NULL) {
15176
0
        xmlCharEncodingHandlerPtr hdlr;
15177
15178
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15179
0
  if (hdlr != NULL)
15180
0
      xmlSwitchToEncoding(ctxt, hdlr);
15181
0
    }
15182
0
    if ((URL != NULL) && (ctxt->input != NULL) &&
15183
0
        (ctxt->input->filename == NULL))
15184
0
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15185
0
    xmlParseDocument(ctxt);
15186
0
    if ((ctxt->wellFormed) || ctxt->recovery)
15187
0
        ret = ctxt->myDoc;
15188
0
    else {
15189
0
        ret = NULL;
15190
0
  if (ctxt->myDoc != NULL) {
15191
0
      xmlFreeDoc(ctxt->myDoc);
15192
0
  }
15193
0
    }
15194
0
    ctxt->myDoc = NULL;
15195
0
    if (!reuse) {
15196
0
  xmlFreeParserCtxt(ctxt);
15197
0
    }
15198
15199
0
    return (ret);
15200
0
}
15201
15202
/**
15203
 * xmlReadDoc:
15204
 * @cur:  a pointer to a zero terminated string
15205
 * @URL:  the base URL to use for the document
15206
 * @encoding:  the document encoding, or NULL
15207
 * @options:  a combination of xmlParserOption
15208
 *
15209
 * parse an XML in-memory document and build a tree.
15210
 *
15211
 * Returns the resulting document tree
15212
 */
15213
xmlDocPtr
15214
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15215
0
{
15216
0
    xmlParserCtxtPtr ctxt;
15217
15218
0
    if (cur == NULL)
15219
0
        return (NULL);
15220
0
    xmlInitParser();
15221
15222
0
    ctxt = xmlCreateDocParserCtxt(cur);
15223
0
    if (ctxt == NULL)
15224
0
        return (NULL);
15225
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15226
0
}
15227
15228
/**
15229
 * xmlReadFile:
15230
 * @filename:  a file or URL
15231
 * @encoding:  the document encoding, or NULL
15232
 * @options:  a combination of xmlParserOption
15233
 *
15234
 * parse an XML file from the filesystem or the network.
15235
 *
15236
 * Returns the resulting document tree
15237
 */
15238
xmlDocPtr
15239
xmlReadFile(const char *filename, const char *encoding, int options)
15240
0
{
15241
0
    xmlParserCtxtPtr ctxt;
15242
15243
0
    xmlInitParser();
15244
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15245
0
    if (ctxt == NULL)
15246
0
        return (NULL);
15247
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15248
0
}
15249
15250
/**
15251
 * xmlReadMemory:
15252
 * @buffer:  a pointer to a char array
15253
 * @size:  the size of the array
15254
 * @URL:  the base URL to use for the document
15255
 * @encoding:  the document encoding, or NULL
15256
 * @options:  a combination of xmlParserOption
15257
 *
15258
 * parse an XML in-memory document and build a tree.
15259
 *
15260
 * Returns the resulting document tree
15261
 */
15262
xmlDocPtr
15263
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15264
0
{
15265
0
    xmlParserCtxtPtr ctxt;
15266
15267
0
    xmlInitParser();
15268
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15269
0
    if (ctxt == NULL)
15270
0
        return (NULL);
15271
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15272
0
}
15273
15274
/**
15275
 * xmlReadFd:
15276
 * @fd:  an open file descriptor
15277
 * @URL:  the base URL to use for the document
15278
 * @encoding:  the document encoding, or NULL
15279
 * @options:  a combination of xmlParserOption
15280
 *
15281
 * parse an XML from a file descriptor and build a tree.
15282
 * NOTE that the file descriptor will not be closed when the
15283
 *      reader is closed or reset.
15284
 *
15285
 * Returns the resulting document tree
15286
 */
15287
xmlDocPtr
15288
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15289
0
{
15290
0
    xmlParserCtxtPtr ctxt;
15291
0
    xmlParserInputBufferPtr input;
15292
0
    xmlParserInputPtr stream;
15293
15294
0
    if (fd < 0)
15295
0
        return (NULL);
15296
0
    xmlInitParser();
15297
15298
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15299
0
    if (input == NULL)
15300
0
        return (NULL);
15301
0
    input->closecallback = NULL;
15302
0
    ctxt = xmlNewParserCtxt();
15303
0
    if (ctxt == NULL) {
15304
0
        xmlFreeParserInputBuffer(input);
15305
0
        return (NULL);
15306
0
    }
15307
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15308
0
    if (stream == NULL) {
15309
0
        xmlFreeParserInputBuffer(input);
15310
0
  xmlFreeParserCtxt(ctxt);
15311
0
        return (NULL);
15312
0
    }
15313
0
    inputPush(ctxt, stream);
15314
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15315
0
}
15316
15317
/**
15318
 * xmlReadIO:
15319
 * @ioread:  an I/O read function
15320
 * @ioclose:  an I/O close function
15321
 * @ioctx:  an I/O handler
15322
 * @URL:  the base URL to use for the document
15323
 * @encoding:  the document encoding, or NULL
15324
 * @options:  a combination of xmlParserOption
15325
 *
15326
 * parse an XML document from I/O functions and source and build a tree.
15327
 *
15328
 * Returns the resulting document tree
15329
 */
15330
xmlDocPtr
15331
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15332
          void *ioctx, const char *URL, const char *encoding, int options)
15333
0
{
15334
0
    xmlParserCtxtPtr ctxt;
15335
0
    xmlParserInputBufferPtr input;
15336
0
    xmlParserInputPtr stream;
15337
15338
0
    if (ioread == NULL)
15339
0
        return (NULL);
15340
0
    xmlInitParser();
15341
15342
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15343
0
                                         XML_CHAR_ENCODING_NONE);
15344
0
    if (input == NULL) {
15345
0
        if (ioclose != NULL)
15346
0
            ioclose(ioctx);
15347
0
        return (NULL);
15348
0
    }
15349
0
    ctxt = xmlNewParserCtxt();
15350
0
    if (ctxt == NULL) {
15351
0
        xmlFreeParserInputBuffer(input);
15352
0
        return (NULL);
15353
0
    }
15354
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15355
0
    if (stream == NULL) {
15356
0
        xmlFreeParserInputBuffer(input);
15357
0
  xmlFreeParserCtxt(ctxt);
15358
0
        return (NULL);
15359
0
    }
15360
0
    inputPush(ctxt, stream);
15361
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15362
0
}
15363
15364
/**
15365
 * xmlCtxtReadDoc:
15366
 * @ctxt:  an XML parser context
15367
 * @cur:  a pointer to a zero terminated string
15368
 * @URL:  the base URL to use for the document
15369
 * @encoding:  the document encoding, or NULL
15370
 * @options:  a combination of xmlParserOption
15371
 *
15372
 * parse an XML in-memory document and build a tree.
15373
 * This reuses the existing @ctxt parser context
15374
 *
15375
 * Returns the resulting document tree
15376
 */
15377
xmlDocPtr
15378
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15379
               const char *URL, const char *encoding, int options)
15380
0
{
15381
0
    xmlParserInputPtr stream;
15382
15383
0
    if (cur == NULL)
15384
0
        return (NULL);
15385
0
    if (ctxt == NULL)
15386
0
        return (NULL);
15387
0
    xmlInitParser();
15388
15389
0
    xmlCtxtReset(ctxt);
15390
15391
0
    stream = xmlNewStringInputStream(ctxt, cur);
15392
0
    if (stream == NULL) {
15393
0
        return (NULL);
15394
0
    }
15395
0
    inputPush(ctxt, stream);
15396
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15397
0
}
15398
15399
/**
15400
 * xmlCtxtReadFile:
15401
 * @ctxt:  an XML parser context
15402
 * @filename:  a file or URL
15403
 * @encoding:  the document encoding, or NULL
15404
 * @options:  a combination of xmlParserOption
15405
 *
15406
 * parse an XML file from the filesystem or the network.
15407
 * This reuses the existing @ctxt parser context
15408
 *
15409
 * Returns the resulting document tree
15410
 */
15411
xmlDocPtr
15412
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15413
                const char *encoding, int options)
15414
0
{
15415
0
    xmlParserInputPtr stream;
15416
15417
0
    if (filename == NULL)
15418
0
        return (NULL);
15419
0
    if (ctxt == NULL)
15420
0
        return (NULL);
15421
0
    xmlInitParser();
15422
15423
0
    xmlCtxtReset(ctxt);
15424
15425
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15426
0
    if (stream == NULL) {
15427
0
        return (NULL);
15428
0
    }
15429
0
    inputPush(ctxt, stream);
15430
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15431
0
}
15432
15433
/**
15434
 * xmlCtxtReadMemory:
15435
 * @ctxt:  an XML parser context
15436
 * @buffer:  a pointer to a char array
15437
 * @size:  the size of the array
15438
 * @URL:  the base URL to use for the document
15439
 * @encoding:  the document encoding, or NULL
15440
 * @options:  a combination of xmlParserOption
15441
 *
15442
 * parse an XML in-memory document and build a tree.
15443
 * This reuses the existing @ctxt parser context
15444
 *
15445
 * Returns the resulting document tree
15446
 */
15447
xmlDocPtr
15448
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15449
                  const char *URL, const char *encoding, int options)
15450
0
{
15451
0
    xmlParserInputBufferPtr input;
15452
0
    xmlParserInputPtr stream;
15453
15454
0
    if (ctxt == NULL)
15455
0
        return (NULL);
15456
0
    if (buffer == NULL)
15457
0
        return (NULL);
15458
0
    xmlInitParser();
15459
15460
0
    xmlCtxtReset(ctxt);
15461
15462
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15463
0
    if (input == NULL) {
15464
0
  return(NULL);
15465
0
    }
15466
15467
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15468
0
    if (stream == NULL) {
15469
0
  xmlFreeParserInputBuffer(input);
15470
0
  return(NULL);
15471
0
    }
15472
15473
0
    inputPush(ctxt, stream);
15474
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15475
0
}
15476
15477
/**
15478
 * xmlCtxtReadFd:
15479
 * @ctxt:  an XML parser context
15480
 * @fd:  an open file descriptor
15481
 * @URL:  the base URL to use for the document
15482
 * @encoding:  the document encoding, or NULL
15483
 * @options:  a combination of xmlParserOption
15484
 *
15485
 * parse an XML from a file descriptor and build a tree.
15486
 * This reuses the existing @ctxt parser context
15487
 * NOTE that the file descriptor will not be closed when the
15488
 *      reader is closed or reset.
15489
 *
15490
 * Returns the resulting document tree
15491
 */
15492
xmlDocPtr
15493
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15494
              const char *URL, const char *encoding, int options)
15495
0
{
15496
0
    xmlParserInputBufferPtr input;
15497
0
    xmlParserInputPtr stream;
15498
15499
0
    if (fd < 0)
15500
0
        return (NULL);
15501
0
    if (ctxt == NULL)
15502
0
        return (NULL);
15503
0
    xmlInitParser();
15504
15505
0
    xmlCtxtReset(ctxt);
15506
15507
15508
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15509
0
    if (input == NULL)
15510
0
        return (NULL);
15511
0
    input->closecallback = NULL;
15512
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15513
0
    if (stream == NULL) {
15514
0
        xmlFreeParserInputBuffer(input);
15515
0
        return (NULL);
15516
0
    }
15517
0
    inputPush(ctxt, stream);
15518
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15519
0
}
15520
15521
/**
15522
 * xmlCtxtReadIO:
15523
 * @ctxt:  an XML parser context
15524
 * @ioread:  an I/O read function
15525
 * @ioclose:  an I/O close function
15526
 * @ioctx:  an I/O handler
15527
 * @URL:  the base URL to use for the document
15528
 * @encoding:  the document encoding, or NULL
15529
 * @options:  a combination of xmlParserOption
15530
 *
15531
 * parse an XML document from I/O functions and source and build a tree.
15532
 * This reuses the existing @ctxt parser context
15533
 *
15534
 * Returns the resulting document tree
15535
 */
15536
xmlDocPtr
15537
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15538
              xmlInputCloseCallback ioclose, void *ioctx,
15539
        const char *URL,
15540
              const char *encoding, int options)
15541
0
{
15542
0
    xmlParserInputBufferPtr input;
15543
0
    xmlParserInputPtr stream;
15544
15545
0
    if (ioread == NULL)
15546
0
        return (NULL);
15547
0
    if (ctxt == NULL)
15548
0
        return (NULL);
15549
0
    xmlInitParser();
15550
15551
0
    xmlCtxtReset(ctxt);
15552
15553
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15554
0
                                         XML_CHAR_ENCODING_NONE);
15555
0
    if (input == NULL) {
15556
0
        if (ioclose != NULL)
15557
0
            ioclose(ioctx);
15558
0
        return (NULL);
15559
0
    }
15560
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15561
0
    if (stream == NULL) {
15562
0
        xmlFreeParserInputBuffer(input);
15563
0
        return (NULL);
15564
0
    }
15565
0
    inputPush(ctxt, stream);
15566
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15567
0
}
15568
15569
#define bottom_parser
15570
#include "elfgcchack.h"