Coverage Report

Created: 2024-05-08 16:10

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
16.4M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
768
#define XML_PARSER_NON_LINEAR 10
129
130
38.3M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
212M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
2.94G
#define XML_PARSER_BUFFER_SIZE 100
147
3.24M
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
147M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
38.6k
{
215
38.6k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
38.6k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
38.6k
    if (ctxt != NULL)
219
38.6k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
38.6k
    if (prefix == NULL)
222
28.7k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
28.7k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
28.7k
                        (const char *) localname, NULL, NULL, 0, 0,
225
28.7k
                        "Attribute %s redefined\n", localname);
226
9.91k
    else
227
9.91k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
9.91k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
9.91k
                        (const char *) prefix, (const char *) localname,
230
9.91k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
9.91k
                        localname);
232
38.6k
    if (ctxt != NULL) {
233
38.6k
  ctxt->wellFormed = 0;
234
38.6k
  if (ctxt->recovery == 0)
235
23.4k
      ctxt->disableSAX = 1;
236
38.6k
    }
237
38.6k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
5.34M
{
250
5.34M
    const char *errmsg;
251
252
5.34M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
5.34M
        (ctxt->instate == XML_PARSER_EOF))
254
78.1k
  return;
255
5.27M
    switch (error) {
256
38.8k
        case XML_ERR_INVALID_HEX_CHARREF:
257
38.8k
            errmsg = "CharRef: invalid hexadecimal value";
258
38.8k
            break;
259
98.1k
        case XML_ERR_INVALID_DEC_CHARREF:
260
98.1k
            errmsg = "CharRef: invalid decimal value";
261
98.1k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
2.15M
        case XML_ERR_INTERNAL_ERROR:
266
2.15M
            errmsg = "internal error";
267
2.15M
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
17.1k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
17.1k
            errmsg = "PEReference: expecting ';'";
282
17.1k
            break;
283
2.99k
        case XML_ERR_ENTITY_LOOP:
284
2.99k
            errmsg = "Detected an entity reference loop";
285
2.99k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
17.2k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
17.2k
            errmsg = "PEReferences forbidden in internal subset";
291
17.2k
            break;
292
18.3k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
18.3k
            errmsg = "EntityValue: \" or ' expected";
294
18.3k
            break;
295
91.8k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
91.8k
            errmsg = "AttValue: \" or ' expected";
297
91.8k
            break;
298
186k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
186k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
186k
            break;
301
37.2k
        case XML_ERR_LITERAL_NOT_STARTED:
302
37.2k
            errmsg = "SystemLiteral \" or ' expected";
303
37.2k
            break;
304
42.2k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
42.2k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
42.2k
            break;
307
50.2k
        case XML_ERR_MISPLACED_CDATA_END:
308
50.2k
            errmsg = "Sequence ']]>' not allowed in content";
309
50.2k
            break;
310
31.5k
        case XML_ERR_URI_REQUIRED:
311
31.5k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
31.5k
            break;
313
5.72k
        case XML_ERR_PUBID_REQUIRED:
314
5.72k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
5.72k
            break;
316
96.5k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
96.5k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
96.5k
            break;
319
24.6k
        case XML_ERR_PI_NOT_STARTED:
320
24.6k
            errmsg = "xmlParsePI : no target name";
321
24.6k
            break;
322
5.06k
        case XML_ERR_RESERVED_XML_NAME:
323
5.06k
            errmsg = "Invalid PI name";
324
5.06k
            break;
325
1.97k
        case XML_ERR_NOTATION_NOT_STARTED:
326
1.97k
            errmsg = "NOTATION: Name expected here";
327
1.97k
            break;
328
3.48k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
3.48k
            errmsg = "'>' required to close NOTATION declaration";
330
3.48k
            break;
331
33.4k
        case XML_ERR_VALUE_REQUIRED:
332
33.4k
            errmsg = "Entity value required";
333
33.4k
            break;
334
2.77k
        case XML_ERR_URI_FRAGMENT:
335
2.77k
            errmsg = "Fragment not allowed";
336
2.77k
            break;
337
35.0k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
35.0k
            errmsg = "'(' required to start ATTLIST enumeration";
339
35.0k
            break;
340
2.96k
        case XML_ERR_NMTOKEN_REQUIRED:
341
2.96k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
2.96k
            break;
343
5.33k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
5.33k
            errmsg = "')' required to finish ATTLIST enumeration";
345
5.33k
            break;
346
8.77k
        case XML_ERR_MIXED_NOT_STARTED:
347
8.77k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
8.77k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
25.6k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
25.6k
            errmsg = "ContentDecl : Name or '(' expected";
354
25.6k
            break;
355
29.2k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
29.2k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
29.2k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
361k
        case XML_ERR_GT_REQUIRED:
363
361k
            errmsg = "expected '>'";
364
361k
            break;
365
1.00k
        case XML_ERR_CONDSEC_INVALID:
366
1.00k
            errmsg = "XML conditional section '[' expected";
367
1.00k
            break;
368
38.1k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
38.1k
            errmsg = "Content error in the external subset";
370
38.1k
            break;
371
8.24k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
8.24k
            errmsg =
373
8.24k
                "conditional section INCLUDE or IGNORE keyword expected";
374
8.24k
            break;
375
3.26k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
3.26k
            errmsg = "XML conditional section not closed";
377
3.26k
            break;
378
1.48k
        case XML_ERR_XMLDECL_NOT_STARTED:
379
1.48k
            errmsg = "Text declaration '<?xml' required";
380
1.48k
            break;
381
315k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
315k
            errmsg = "parsing XML declaration: '?>' expected";
383
315k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
149k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
149k
            errmsg = "EntityRef: expecting ';'";
389
149k
            break;
390
89.7k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
89.7k
            errmsg = "DOCTYPE improperly terminated";
392
89.7k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
22.6k
        case XML_ERR_EQUAL_REQUIRED:
397
22.6k
            errmsg = "expected '='";
398
22.6k
            break;
399
92.1k
        case XML_ERR_STRING_NOT_CLOSED:
400
92.1k
            errmsg = "String not closed expecting \" or '";
401
92.1k
            break;
402
20.0k
        case XML_ERR_STRING_NOT_STARTED:
403
20.0k
            errmsg = "String not started expecting ' or \"";
404
20.0k
            break;
405
5.09k
        case XML_ERR_ENCODING_NAME:
406
5.09k
            errmsg = "Invalid XML encoding name";
407
5.09k
            break;
408
9.51k
        case XML_ERR_STANDALONE_VALUE:
409
9.51k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
9.51k
            break;
411
244k
        case XML_ERR_DOCUMENT_EMPTY:
412
244k
            errmsg = "Document is empty";
413
244k
            break;
414
663k
        case XML_ERR_DOCUMENT_END:
415
663k
            errmsg = "Extra content at the end of the document";
416
663k
            break;
417
22.3k
        case XML_ERR_NOT_WELL_BALANCED:
418
22.3k
            errmsg = "chunk is not well balanced";
419
22.3k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
153k
        case XML_ERR_VERSION_MISSING:
424
153k
            errmsg = "Malformed declaration expecting version";
425
153k
            break;
426
22
        case XML_ERR_NAME_TOO_LONG:
427
22
            errmsg = "Name too long";
428
22
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
2.41k
        default:
435
2.41k
            errmsg = "Unregistered error message";
436
5.27M
    }
437
5.27M
    if (ctxt != NULL)
438
5.27M
  ctxt->errNo = error;
439
5.27M
    if (info == NULL) {
440
3.11M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
3.11M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
3.11M
                        errmsg);
443
3.11M
    } else {
444
2.15M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
2.15M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
2.15M
                        errmsg, info);
447
2.15M
    }
448
5.27M
    if (ctxt != NULL) {
449
5.27M
  ctxt->wellFormed = 0;
450
5.27M
  if (ctxt->recovery == 0)
451
1.56M
      ctxt->disableSAX = 1;
452
5.27M
    }
453
5.27M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
5.05M
{
467
5.05M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
5.05M
        (ctxt->instate == XML_PARSER_EOF))
469
0
  return;
470
5.05M
    if (ctxt != NULL)
471
5.05M
  ctxt->errNo = error;
472
5.05M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
5.05M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
5.05M
    if (ctxt != NULL) {
475
5.05M
  ctxt->wellFormed = 0;
476
5.05M
  if (ctxt->recovery == 0)
477
1.79M
      ctxt->disableSAX = 1;
478
5.05M
    }
479
5.05M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
1.24M
{
495
1.24M
    xmlStructuredErrorFunc schannel = NULL;
496
497
1.24M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
1.24M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
1.24M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
1.24M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
1.14M
        schannel = ctxt->sax->serror;
503
1.24M
    if (ctxt != NULL) {
504
1.24M
        __xmlRaiseError(schannel,
505
1.24M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
1.24M
                    ctxt->userData,
507
1.24M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
1.24M
                    XML_ERR_WARNING, NULL, 0,
509
1.24M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
1.24M
        msg, (const char *) str1, (const char *) str2);
511
1.24M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
1.24M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
40.8k
{
533
40.8k
    xmlStructuredErrorFunc schannel = NULL;
534
535
40.8k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
40.8k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
40.8k
    if (ctxt != NULL) {
539
40.8k
  ctxt->errNo = error;
540
40.8k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
23.7k
      schannel = ctxt->sax->serror;
542
40.8k
    }
543
40.8k
    if (ctxt != NULL) {
544
40.8k
        __xmlRaiseError(schannel,
545
40.8k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
40.8k
                    ctxt, NULL, XML_FROM_DTD, error,
547
40.8k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
40.8k
        (const char *) str2, NULL, 0, 0,
549
40.8k
        msg, (const char *) str1, (const char *) str2);
550
40.8k
  ctxt->valid = 0;
551
40.8k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
40.8k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
2.93M
{
573
2.93M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
2.93M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
2.93M
    if (ctxt != NULL)
577
2.93M
  ctxt->errNo = error;
578
2.93M
    __xmlRaiseError(NULL, NULL, NULL,
579
2.93M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
2.93M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
2.93M
    if (ctxt != NULL) {
582
2.93M
  ctxt->wellFormed = 0;
583
2.93M
  if (ctxt->recovery == 0)
584
399k
      ctxt->disableSAX = 1;
585
2.93M
    }
586
2.93M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
1.63M
{
604
1.63M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
1.63M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
1.63M
    if (ctxt != NULL)
608
1.63M
  ctxt->errNo = error;
609
1.63M
    __xmlRaiseError(NULL, NULL, NULL,
610
1.63M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
1.63M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
1.63M
        NULL, val, 0, msg, str1, val, str2);
613
1.63M
    if (ctxt != NULL) {
614
1.63M
  ctxt->wellFormed = 0;
615
1.63M
  if (ctxt->recovery == 0)
616
529k
      ctxt->disableSAX = 1;
617
1.63M
    }
618
1.63M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
4.06M
{
633
4.06M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
4.06M
        (ctxt->instate == XML_PARSER_EOF))
635
0
  return;
636
4.06M
    if (ctxt != NULL)
637
4.06M
  ctxt->errNo = error;
638
4.06M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
4.06M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
4.06M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
4.06M
                    val);
642
4.06M
    if (ctxt != NULL) {
643
4.06M
  ctxt->wellFormed = 0;
644
4.06M
  if (ctxt->recovery == 0)
645
1.43M
      ctxt->disableSAX = 1;
646
4.06M
    }
647
4.06M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
900k
{
662
900k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
900k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
900k
    if (ctxt != NULL)
666
900k
  ctxt->errNo = error;
667
900k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
900k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
900k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
900k
                    val);
671
900k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
902k
{
689
902k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
902k
        (ctxt->instate == XML_PARSER_EOF))
691
0
  return;
692
902k
    if (ctxt != NULL)
693
902k
  ctxt->errNo = error;
694
902k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
902k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
902k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
902k
                    info1, info2, info3);
698
902k
    if (ctxt != NULL)
699
902k
  ctxt->nsWellFormed = 0;
700
902k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
12.7k
{
718
12.7k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
12.7k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
12.7k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
12.7k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
12.7k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
12.7k
                    info1, info2, info3);
725
12.7k
}
726
727
static void
728
143M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
143M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
143M
    else
732
143M
        *dst += val;
733
143M
}
734
735
static void
736
43.0M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
43.0M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
43.0M
    else
740
43.0M
        *dst += val;
741
43.0M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
38.3M
{
770
38.3M
    unsigned long consumed;
771
38.3M
    xmlParserInputPtr input = ctxt->input;
772
38.3M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
38.3M
    consumed = input->parentConsumed;
779
38.3M
    if ((entity == NULL) ||
780
38.3M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
22.7M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
22.7M
        xmlSaturatedAdd(&consumed, input->consumed);
783
22.7M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
22.7M
    }
785
38.3M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
38.3M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
38.3M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
38.3M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
38.3M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
768
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
768
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
768
                       "Maximum entity amplification factor exceeded");
803
768
        xmlHaltParser(ctxt);
804
768
        return(1);
805
768
    }
806
807
38.3M
    return(0);
808
38.3M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
4.78M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
4.78M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
4.78M
    (void) sax;
1048
1049
4.78M
    if (ctxt == NULL) return;
1050
4.78M
    sax = ctxt->sax;
1051
4.78M
#ifdef LIBXML_SAX1_ENABLED
1052
4.78M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
4.78M
        ((sax->startElementNs != NULL) ||
1054
3.08M
         (sax->endElementNs != NULL) ||
1055
3.08M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
3.08M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
4.78M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
4.78M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
4.78M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
4.78M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
4.78M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
4.78M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
325k
{
1103
325k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
374k
    while (*src == 0x20) src++;
1107
3.61M
    while (*src != 0) {
1108
3.28M
  if (*src == 0x20) {
1109
772k
      while (*src == 0x20) src++;
1110
220k
      if (*src != 0)
1111
199k
    *dst++ = 0x20;
1112
3.06M
  } else {
1113
3.06M
      *dst++ = *src++;
1114
3.06M
  }
1115
3.28M
    }
1116
325k
    *dst = 0;
1117
325k
    if (dst == src)
1118
293k
       return(NULL);
1119
32.1k
    return(dst);
1120
325k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
73.6k
{
1136
73.6k
    int i;
1137
73.6k
    int remove_head = 0;
1138
73.6k
    int need_realloc = 0;
1139
73.6k
    const xmlChar *cur;
1140
1141
73.6k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
73.6k
    i = *len;
1144
73.6k
    if (i <= 0)
1145
3.28k
        return(NULL);
1146
1147
70.3k
    cur = src;
1148
86.0k
    while (*cur == 0x20) {
1149
15.7k
        cur++;
1150
15.7k
  remove_head++;
1151
15.7k
    }
1152
2.38M
    while (*cur != 0) {
1153
2.33M
  if (*cur == 0x20) {
1154
189k
      cur++;
1155
189k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
13.3k
          need_realloc = 1;
1157
13.3k
    break;
1158
13.3k
      }
1159
189k
  } else
1160
2.14M
      cur++;
1161
2.33M
    }
1162
70.3k
    if (need_realloc) {
1163
13.3k
        xmlChar *ret;
1164
1165
13.3k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
13.3k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
13.3k
  xmlAttrNormalizeSpace(ret, ret);
1171
13.3k
  *len = strlen((const char *)ret);
1172
13.3k
        return(ret);
1173
57.0k
    } else if (remove_head) {
1174
1.40k
        *len -= remove_head;
1175
1.40k
        memmove(src, src + remove_head, 1 + *len);
1176
1.40k
  return(src);
1177
1.40k
    }
1178
55.6k
    return(NULL);
1179
70.3k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
517k
               const xmlChar *value) {
1195
517k
    xmlDefAttrsPtr defaults;
1196
517k
    int len;
1197
517k
    const xmlChar *name;
1198
517k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
517k
    if (ctxt->attsSpecial != NULL) {
1204
461k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
2.00k
      return;
1206
461k
    }
1207
1208
515k
    if (ctxt->attsDefault == NULL) {
1209
88.7k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
88.7k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
88.7k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
515k
    name = xmlSplitQName3(fullname, &len);
1219
515k
    if (name == NULL) {
1220
492k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
492k
  prefix = NULL;
1222
492k
    } else {
1223
22.5k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
22.5k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
22.5k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
515k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
515k
    if (defaults == NULL) {
1232
307k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
307k
                     (4 * 5) * sizeof(const xmlChar *));
1234
307k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
307k
  defaults->nbAttrs = 0;
1237
307k
  defaults->maxAttrs = 4;
1238
307k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
307k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
307k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
3.40k
        xmlDefAttrsPtr temp;
1245
1246
3.40k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
3.40k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
3.40k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
3.40k
  defaults = temp;
1251
3.40k
  defaults->maxAttrs *= 2;
1252
3.40k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
3.40k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
3.40k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
515k
    name = xmlSplitQName3(fullattr, &len);
1264
515k
    if (name == NULL) {
1265
452k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
452k
  prefix = NULL;
1267
452k
    } else {
1268
62.6k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
62.6k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
62.6k
    }
1271
1272
515k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
515k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
515k
    len = xmlStrlen(value);
1276
515k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
515k
    if (value == NULL)
1278
0
        goto mem_error;
1279
515k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
515k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
515k
    if (ctxt->external)
1282
149k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
365k
    else
1284
365k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
515k
    defaults->nbAttrs++;
1286
1287
515k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
515k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
8.32M
{
1309
8.32M
    if (ctxt->attsSpecial == NULL) {
1310
183k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
183k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
183k
    }
1314
1315
8.32M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
17.4k
        return;
1317
1318
8.30M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
8.30M
                     (void *) (ptrdiff_t) type);
1320
8.30M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
8.32M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
5.65M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
5.65M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
5.65M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
2.28M
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
2.28M
    }
1341
5.65M
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
825k
{
1354
825k
    if (ctxt->attsSpecial == NULL)
1355
705k
        return;
1356
1357
119k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
119k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
22.0k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
22.0k
        ctxt->attsSpecial = NULL;
1362
22.0k
    }
1363
119k
    return;
1364
825k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
12.3k
{
1427
12.3k
    const xmlChar *cur = lang, *nxt;
1428
1429
12.3k
    if (cur == NULL)
1430
546
        return (0);
1431
11.7k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
11.7k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
11.7k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
11.7k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
2
        cur += 2;
1441
4
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
4
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
2
            cur++;
1444
2
        return(cur[0] == 0);
1445
2
    }
1446
11.7k
    nxt = cur;
1447
44.6k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
44.6k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
32.9k
           nxt++;
1450
11.7k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
848
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
693
            return(0);
1456
155
        return(1);
1457
848
    }
1458
10.9k
    if (nxt - cur < 2)
1459
1.21k
        return(0);
1460
    /* we got an ISO 639 code */
1461
9.73k
    if (nxt[0] == 0)
1462
7.83k
        return(1);
1463
1.89k
    if (nxt[0] != '-')
1464
855
        return(0);
1465
1466
1.03k
    nxt++;
1467
1.03k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
1.03k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
128
        goto region_m49;
1471
1472
3.10k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
3.10k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
2.19k
           nxt++;
1475
909
    if (nxt - cur == 4)
1476
41
        goto script;
1477
868
    if (nxt - cur == 2)
1478
626
        goto region;
1479
242
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
47
        goto variant;
1481
195
    if (nxt - cur != 3)
1482
148
        return(0);
1483
    /* we parsed an extlang */
1484
47
    if (nxt[0] == 0)
1485
11
        return(1);
1486
36
    if (nxt[0] != '-')
1487
33
        return(0);
1488
1489
3
    nxt++;
1490
3
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
3
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
3
        goto region_m49;
1494
1495
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
0
           nxt++;
1498
0
    if (nxt - cur == 2)
1499
0
        goto region;
1500
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
0
        goto variant;
1502
0
    if (nxt - cur != 4)
1503
0
        return(0);
1504
    /* we parsed a script */
1505
41
script:
1506
41
    if (nxt[0] == 0)
1507
10
        return(1);
1508
31
    if (nxt[0] != '-')
1509
17
        return(0);
1510
1511
14
    nxt++;
1512
14
    cur = nxt;
1513
    /* now we can have region or variant */
1514
14
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
8
        goto region_m49;
1516
1517
24
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
24
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
18
           nxt++;
1520
1521
6
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
3
        goto variant;
1523
3
    if (nxt - cur != 2)
1524
3
        return(0);
1525
    /* we parsed a region */
1526
691
region:
1527
691
    if (nxt[0] == 0)
1528
595
        return(1);
1529
96
    if (nxt[0] != '-')
1530
79
        return(0);
1531
1532
17
    nxt++;
1533
17
    cur = nxt;
1534
    /* now we can just have a variant */
1535
44
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
44
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
27
           nxt++;
1538
1539
17
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
14
        return(0);
1541
1542
    /* we parsed a variant */
1543
53
variant:
1544
53
    if (nxt[0] == 0)
1545
0
        return(1);
1546
53
    if (nxt[0] != '-')
1547
12
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
41
    return (1);
1550
1551
139
region_m49:
1552
139
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
139
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
65
        nxt += 3;
1555
65
        goto region;
1556
65
    }
1557
74
    return(0);
1558
139
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
390k
{
1584
390k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
154k
        int i;
1586
222k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
78.0k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
9.98k
          if (ctxt->nsTab[i + 1] == URL)
1590
3.34k
        return(-2);
1591
    /* out of scope keep it */
1592
6.63k
    break;
1593
9.98k
      }
1594
78.0k
  }
1595
154k
    }
1596
386k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
252k
  ctxt->nsMax = 10;
1598
252k
  ctxt->nsNr = 0;
1599
252k
  ctxt->nsTab = (const xmlChar **)
1600
252k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
252k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
252k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
972
        const xmlChar ** tmp;
1608
972
        ctxt->nsMax *= 2;
1609
972
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
972
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
972
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
972
  ctxt->nsTab = tmp;
1617
972
    }
1618
386k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
386k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
386k
    return (ctxt->nsNr);
1621
386k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
118k
{
1634
118k
    int i;
1635
1636
118k
    if (ctxt->nsTab == NULL) return(0);
1637
118k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
118k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
426k
    for (i = 0;i < nr;i++) {
1645
307k
         ctxt->nsNr--;
1646
307k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
307k
    }
1648
118k
    return(nr);
1649
118k
}
1650
#endif
1651
1652
static int
1653
489k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
489k
    const xmlChar **atts;
1655
489k
    int *attallocs;
1656
489k
    int maxatts;
1657
1658
489k
    if (nr + 5 > ctxt->maxatts) {
1659
489k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
489k
  atts = (const xmlChar **) xmlMalloc(
1661
489k
             maxatts * sizeof(const xmlChar *));
1662
489k
  if (atts == NULL) goto mem_error;
1663
489k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
489k
                               (maxatts / 5) * sizeof(int));
1665
489k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
489k
        if (ctxt->maxatts > 0)
1670
212
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
489k
        xmlFree(ctxt->atts);
1672
489k
  ctxt->atts = atts;
1673
489k
  ctxt->attallocs = attallocs;
1674
489k
  ctxt->maxatts = maxatts;
1675
489k
    }
1676
489k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
489k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
19.1M
{
1694
19.1M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
19.1M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
261
        size_t newSize = ctxt->inputMax * 2;
1698
261
        xmlParserInputPtr *tmp;
1699
1700
261
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
261
                                               newSize * sizeof(*tmp));
1702
261
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
261
        ctxt->inputTab = tmp;
1707
261
        ctxt->inputMax = newSize;
1708
261
    }
1709
19.1M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
19.1M
    ctxt->input = value;
1711
19.1M
    return (ctxt->inputNr++);
1712
19.1M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
26.4M
{
1724
26.4M
    xmlParserInputPtr ret;
1725
1726
26.4M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
26.4M
    if (ctxt->inputNr <= 0)
1729
7.44M
        return (NULL);
1730
19.0M
    ctxt->inputNr--;
1731
19.0M
    if (ctxt->inputNr > 0)
1732
15.7M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
3.33M
    else
1734
3.33M
        ctxt->input = NULL;
1735
19.0M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
19.0M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
19.0M
    return (ret);
1738
26.4M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
39.1M
{
1751
39.1M
    if (ctxt == NULL) return(0);
1752
39.1M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
19.3k
        xmlNodePtr *tmp;
1754
1755
19.3k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
19.3k
                                      ctxt->nodeMax * 2 *
1757
19.3k
                                      sizeof(ctxt->nodeTab[0]));
1758
19.3k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
19.3k
        ctxt->nodeTab = tmp;
1763
19.3k
  ctxt->nodeMax *= 2;
1764
19.3k
    }
1765
39.1M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
39.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
39.1M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
39.1M
    ctxt->node = value;
1775
39.1M
    return (ctxt->nodeNr++);
1776
39.1M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
37.3M
{
1789
37.3M
    xmlNodePtr ret;
1790
1791
37.3M
    if (ctxt == NULL) return(NULL);
1792
37.3M
    if (ctxt->nodeNr <= 0)
1793
385k
        return (NULL);
1794
36.9M
    ctxt->nodeNr--;
1795
36.9M
    if (ctxt->nodeNr > 0)
1796
35.4M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
1.58M
    else
1798
1.58M
        ctxt->node = NULL;
1799
36.9M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
36.9M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
36.9M
    return (ret);
1802
37.3M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
36.5M
{
1821
36.5M
    xmlStartTag *tag;
1822
1823
36.5M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
31.0k
        const xmlChar * *tmp;
1825
31.0k
        xmlStartTag *tmp2;
1826
31.0k
        ctxt->nameMax *= 2;
1827
31.0k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
31.0k
                                    ctxt->nameMax *
1829
31.0k
                                    sizeof(ctxt->nameTab[0]));
1830
31.0k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
31.0k
  ctxt->nameTab = tmp;
1835
31.0k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
31.0k
                                    ctxt->nameMax *
1837
31.0k
                                    sizeof(ctxt->pushTab[0]));
1838
31.0k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
31.0k
  ctxt->pushTab = tmp2;
1843
36.4M
    } else if (ctxt->pushTab == NULL) {
1844
1.72M
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
1.72M
                                            sizeof(ctxt->pushTab[0]));
1846
1.72M
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
1.72M
    }
1849
36.5M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
36.5M
    ctxt->name = value;
1851
36.5M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
36.5M
    tag->prefix = prefix;
1853
36.5M
    tag->URI = URI;
1854
36.5M
    tag->line = line;
1855
36.5M
    tag->nsNr = nsNr;
1856
36.5M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
36.5M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
9.15M
{
1873
9.15M
    const xmlChar *ret;
1874
1875
9.15M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
9.15M
    ctxt->nameNr--;
1878
9.15M
    if (ctxt->nameNr > 0)
1879
9.03M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
119k
    else
1881
119k
        ctxt->name = NULL;
1882
9.15M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
9.15M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
9.15M
    return (ret);
1885
9.15M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
24.5M
{
1931
24.5M
    const xmlChar *ret;
1932
1933
24.5M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
24.5M
    ctxt->nameNr--;
1936
24.5M
    if (ctxt->nameNr > 0)
1937
23.8M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
697k
    else
1939
697k
        ctxt->name = NULL;
1940
24.5M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
24.5M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
24.5M
    return (ret);
1943
24.5M
}
1944
1945
46.1M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
46.1M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
36.1k
        int *tmp;
1948
1949
36.1k
  ctxt->spaceMax *= 2;
1950
36.1k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
36.1k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
36.1k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
36.1k
  ctxt->spaceTab = tmp;
1958
36.1k
    }
1959
46.1M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
46.1M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
46.1M
    return(ctxt->spaceNr++);
1962
46.1M
}
1963
1964
44.3M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
44.3M
    int ret;
1966
44.3M
    if (ctxt->spaceNr <= 0) return(0);
1967
44.2M
    ctxt->spaceNr--;
1968
44.2M
    if (ctxt->spaceNr > 0)
1969
44.0M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
263k
    else
1971
263k
        ctxt->space = &ctxt->spaceTab[0];
1972
44.2M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
44.2M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
44.2M
    return(ret);
1975
44.3M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
829M
#define RAW (*ctxt->input->cur)
2013
504M
#define CUR (*ctxt->input->cur)
2014
424M
#define NXT(val) ctxt->input->cur[(val)]
2015
77.8M
#define CUR_PTR ctxt->input->cur
2016
2.01M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
336M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
169M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
309M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
266M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
230M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
191M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
85.6M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
85.6M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
855k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
855k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
147M
#define SKIP(val) do {             \
2037
147M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
147M
    if (*ctxt->input->cur == 0)           \
2039
147M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
147M
  } while (0)
2041
2042
249k
#define SKIPL(val) do {             \
2043
249k
    int skipl;                \
2044
62.8M
    for(skipl=0; skipl<val; skipl++) {         \
2045
62.5M
  if (*(ctxt->input->cur) == '\n') {       \
2046
1.08M
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
61.5M
  } else ctxt->input->col++;         \
2048
62.5M
  ctxt->input->cur++;           \
2049
62.5M
    }                  \
2050
249k
    if (*ctxt->input->cur == 0)           \
2051
249k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
249k
  } while (0)
2053
2054
256M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
256M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
256M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
256M
  xmlSHRINK (ctxt);
2058
2059
4.88M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
4.88M
    if ((ctxt->input->buf) &&
2062
4.88M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
39.7k
        xmlParserInputShrink(ctxt->input);
2064
4.88M
    if (*ctxt->input->cur == 0)
2065
219k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
4.88M
}
2067
2068
1.02G
#define GROW if ((ctxt->progressive == 0) &&       \
2069
1.02G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
1.02G
  xmlGROW (ctxt);
2071
2072
89.0M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
89.0M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
89.0M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
89.0M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
89.0M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
89.0M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
89.0M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
89.0M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
89.0M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
89.0M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
89.0M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
2.89M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
89.0M
}
2095
2096
326M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
571M
#define NEXT xmlNextChar(ctxt)
2099
2100
86.6M
#define NEXT1 {               \
2101
86.6M
  ctxt->input->col++;           \
2102
86.6M
  ctxt->input->cur++;           \
2103
86.6M
  if (*ctxt->input->cur == 0)         \
2104
86.6M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
86.6M
    }
2106
2107
837M
#define NEXTL(l) do {             \
2108
837M
    if (*(ctxt->input->cur) == '\n') {         \
2109
13.7M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
823M
    } else ctxt->input->col++;           \
2111
837M
    ctxt->input->cur += l;        \
2112
837M
  } while (0)
2113
2114
861M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
1.11G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
1.79G
    if (l == 1) b[i++] = v;           \
2119
1.79G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
326M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
326M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
326M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
326M
        (ctxt->instate == XML_PARSER_START)) {
2141
187M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
187M
  cur = ctxt->input->cur;
2146
187M
  while (IS_BLANK_CH(*cur)) {
2147
80.2M
      if (*cur == '\n') {
2148
4.16M
    ctxt->input->line++; ctxt->input->col = 1;
2149
76.0M
      } else {
2150
76.0M
    ctxt->input->col++;
2151
76.0M
      }
2152
80.2M
      cur++;
2153
80.2M
      if (res < INT_MAX)
2154
80.2M
    res++;
2155
80.2M
      if (*cur == 0) {
2156
284k
    ctxt->input->cur = cur;
2157
284k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
284k
    cur = ctxt->input->cur;
2159
284k
      }
2160
80.2M
  }
2161
187M
  ctxt->input->cur = cur;
2162
187M
    } else {
2163
139M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
497M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
497M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
325M
    NEXT;
2168
325M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
18.4M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
1.62M
                    break;
2174
16.7M
          xmlParsePEReference(ctxt);
2175
153M
            } else if (CUR == 0) {
2176
15.8M
                unsigned long consumed;
2177
15.8M
                xmlEntityPtr ent;
2178
2179
15.8M
                if (ctxt->inputNr <= 1)
2180
161k
                    break;
2181
2182
15.6M
                consumed = ctxt->input->consumed;
2183
15.6M
                xmlSaturatedAddSizeT(&consumed,
2184
15.6M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
15.6M
                ent = ctxt->input->entity;
2191
15.6M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
15.6M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
23.4k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
23.4k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
23.4k
                }
2197
2198
15.6M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
15.6M
                xmlPopInput(ctxt);
2201
137M
            } else {
2202
137M
                break;
2203
137M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
358M
      if (res < INT_MAX)
2213
358M
    res++;
2214
358M
        }
2215
139M
    }
2216
326M
    return(res);
2217
326M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
15.6M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
15.6M
    xmlParserInputPtr input;
2237
2238
15.6M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
15.6M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
15.6M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
15.6M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
15.6M
    input = inputPop(ctxt);
2247
15.6M
    if (input->entity != NULL)
2248
15.6M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
15.6M
    xmlFreeInputStream(input);
2250
15.6M
    if (*ctxt->input->cur == 0)
2251
5.63M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
15.6M
    return(CUR);
2253
15.6M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
15.8M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
15.8M
    int ret;
2267
15.8M
    if (input == NULL) return(-1);
2268
2269
15.8M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
15.8M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
15.8M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
15.8M
    ret = inputPush(ctxt, input);
2285
15.8M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
15.8M
    GROW;
2288
15.8M
    return(ret);
2289
15.8M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
1.19M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
1.19M
    int val = 0;
2311
1.19M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
1.19M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
1.19M
        (NXT(2) == 'x')) {
2318
276k
  SKIP(3);
2319
276k
  GROW;
2320
1.42M
  while (RAW != ';') { /* loop blocked by count */
2321
1.18M
      if (count++ > 20) {
2322
71.1k
    count = 0;
2323
71.1k
    GROW;
2324
71.1k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
71.1k
      }
2327
1.18M
      if ((RAW >= '0') && (RAW <= '9'))
2328
946k
          val = val * 16 + (CUR - '0');
2329
238k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
177k
          val = val * 16 + (CUR - 'a') + 10;
2331
61.0k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
28.6k
          val = val * 16 + (CUR - 'A') + 10;
2333
32.4k
      else {
2334
32.4k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
32.4k
    val = 0;
2336
32.4k
    break;
2337
32.4k
      }
2338
1.15M
      if (val > 0x110000)
2339
782k
          val = 0x110000;
2340
2341
1.15M
      NEXT;
2342
1.15M
      count++;
2343
1.15M
  }
2344
276k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
244k
      ctxt->input->col++;
2347
244k
      ctxt->input->cur++;
2348
244k
  }
2349
920k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
920k
  SKIP(2);
2351
920k
  GROW;
2352
5.63M
  while (RAW != ';') { /* loop blocked by count */
2353
4.80M
      if (count++ > 20) {
2354
202k
    count = 0;
2355
202k
    GROW;
2356
202k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
202k
      }
2359
4.80M
      if ((RAW >= '0') && (RAW <= '9'))
2360
4.71M
          val = val * 10 + (CUR - '0');
2361
91.9k
      else {
2362
91.9k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
91.9k
    val = 0;
2364
91.9k
    break;
2365
91.9k
      }
2366
4.71M
      if (val > 0x110000)
2367
2.19M
          val = 0x110000;
2368
2369
4.71M
      NEXT;
2370
4.71M
      count++;
2371
4.71M
  }
2372
920k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
828k
      ctxt->input->col++;
2375
828k
      ctxt->input->cur++;
2376
828k
  }
2377
920k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
1.19M
    if (val >= 0x110000) {
2389
2.65k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
2.65k
                "xmlParseCharRef: character reference out of bounds\n",
2391
2.65k
          val);
2392
1.19M
    } else if (IS_CHAR(val)) {
2393
1.06M
        return(val);
2394
1.06M
    } else {
2395
132k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
132k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
132k
                    val);
2398
132k
    }
2399
134k
    return(0);
2400
1.19M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
656k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
656k
    const xmlChar *ptr;
2423
656k
    xmlChar cur;
2424
656k
    int val = 0;
2425
2426
656k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
656k
    ptr = *str;
2428
656k
    cur = *ptr;
2429
656k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
109k
  ptr += 3;
2431
109k
  cur = *ptr;
2432
341k
  while (cur != ';') { /* Non input consuming loop */
2433
238k
      if ((cur >= '0') && (cur <= '9'))
2434
139k
          val = val * 16 + (cur - '0');
2435
98.4k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
8.05k
          val = val * 16 + (cur - 'a') + 10;
2437
90.4k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
83.9k
          val = val * 16 + (cur - 'A') + 10;
2439
6.45k
      else {
2440
6.45k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
6.45k
    val = 0;
2442
6.45k
    break;
2443
6.45k
      }
2444
231k
      if (val > 0x110000)
2445
93.1k
          val = 0x110000;
2446
2447
231k
      ptr++;
2448
231k
      cur = *ptr;
2449
231k
  }
2450
109k
  if (cur == ';')
2451
103k
      ptr++;
2452
546k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
546k
  ptr += 2;
2454
546k
  cur = *ptr;
2455
1.95M
  while (cur != ';') { /* Non input consuming loops */
2456
1.41M
      if ((cur >= '0') && (cur <= '9'))
2457
1.40M
          val = val * 10 + (cur - '0');
2458
6.16k
      else {
2459
6.16k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
6.16k
    val = 0;
2461
6.16k
    break;
2462
6.16k
      }
2463
1.40M
      if (val > 0x110000)
2464
9.26k
          val = 0x110000;
2465
2466
1.40M
      ptr++;
2467
1.40M
      cur = *ptr;
2468
1.40M
  }
2469
546k
  if (cur == ';')
2470
540k
      ptr++;
2471
546k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
656k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
656k
    if (val >= 0x110000) {
2483
1.02k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
1.02k
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
1.02k
                val);
2486
655k
    } else if (IS_CHAR(val)) {
2487
640k
        return(val);
2488
640k
    } else {
2489
14.6k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
14.6k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
14.6k
        val);
2492
14.6k
    }
2493
15.7k
    return(0);
2494
656k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
1.37M
#define growBuffer(buffer, n) {           \
2593
1.37M
    xmlChar *tmp;             \
2594
1.37M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
1.37M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
1.37M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
1.37M
    if (tmp == NULL) goto mem_error;         \
2598
1.37M
    buffer = tmp;             \
2599
1.37M
    buffer##_size = new_size;                                           \
2600
1.37M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
22.1M
                           int check) {
2617
22.1M
    xmlChar *buffer = NULL;
2618
22.1M
    size_t buffer_size = 0;
2619
22.1M
    size_t nbchars = 0;
2620
2621
22.1M
    xmlChar *current = NULL;
2622
22.1M
    xmlChar *rep = NULL;
2623
22.1M
    const xmlChar *last;
2624
22.1M
    xmlEntityPtr ent;
2625
22.1M
    int c,l;
2626
2627
22.1M
    if (str == NULL)
2628
23.0k
        return(NULL);
2629
22.1M
    last = str + len;
2630
2631
22.1M
    if (((ctxt->depth > 40) &&
2632
22.1M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
22.1M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
22.1M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
22.1M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
22.1M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
22.1M
    if (str < last)
2651
21.7M
  c = CUR_SCHAR(str, l);
2652
333k
    else
2653
333k
        c = 0;
2654
862M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
862M
           (c != end2) && (c != end3) &&
2656
862M
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
840M
  if (c == 0) break;
2659
840M
        if ((c == '&') && (str[1] == '#')) {
2660
656k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
656k
      if (val == 0)
2662
15.7k
                goto int_error;
2663
640k
      COPY_BUF(0,buffer,nbchars,val);
2664
640k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
112
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
112
      }
2667
840M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
16.1M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
16.1M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
16.1M
      if ((ent != NULL) &&
2674
16.1M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
11.4k
    if (ent->content != NULL) {
2676
11.4k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
11.4k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
0
        }
2680
11.4k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
16.1M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
16.0M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
99
                    goto int_error;
2688
2689
16.0M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
1.33k
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
1.33k
                    xmlHaltParser(ctxt);
2692
1.33k
                    ent->content[0] = 0;
2693
1.33k
                    goto int_error;
2694
1.33k
                }
2695
2696
16.0M
                ent->flags |= XML_ENT_EXPANDING;
2697
16.0M
    ctxt->depth++;
2698
16.0M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
16.0M
                        ent->length, what, 0, 0, 0, check);
2700
16.0M
    ctxt->depth--;
2701
16.0M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
16.0M
    if (rep == NULL) {
2704
3.95k
                    ent->content[0] = 0;
2705
3.95k
                    goto int_error;
2706
3.95k
                }
2707
2708
16.0M
                current = rep;
2709
1.76G
                while (*current != 0) { /* non input consuming loop */
2710
1.74G
                    buffer[nbchars++] = *current++;
2711
1.74G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
1.76M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
1.76M
                    }
2714
1.74G
                }
2715
16.0M
                xmlFree(rep);
2716
16.0M
                rep = NULL;
2717
16.0M
      } else if (ent != NULL) {
2718
46.2k
    int i = xmlStrlen(ent->name);
2719
46.2k
    const xmlChar *cur = ent->name;
2720
2721
46.2k
    buffer[nbchars++] = '&';
2722
46.2k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
16
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
16
    }
2725
93.4k
    for (;i > 0;i--)
2726
47.1k
        buffer[nbchars++] = *cur++;
2727
46.2k
    buffer[nbchars++] = ';';
2728
46.2k
      }
2729
824M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
933k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
933k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
933k
      if (ent != NULL) {
2735
826k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
6.55k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
6.55k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
6.55k
      (ctxt->validate != 0)) {
2745
5.92k
      xmlLoadEntityContent(ctxt, ent);
2746
5.92k
        } else {
2747
628
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
628
      "not validating will not read content for PE entity %s\n",
2749
628
                          ent->name, NULL);
2750
628
        }
2751
6.55k
    }
2752
2753
826k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
36
                    goto int_error;
2755
2756
826k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
81
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
81
                    xmlHaltParser(ctxt);
2759
81
                    if (ent->content != NULL)
2760
72
                        ent->content[0] = 0;
2761
81
                    goto int_error;
2762
81
                }
2763
2764
826k
                ent->flags |= XML_ENT_EXPANDING;
2765
826k
    ctxt->depth++;
2766
826k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
826k
                        ent->length, what, 0, 0, 0, check);
2768
826k
    ctxt->depth--;
2769
826k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
826k
    if (rep == NULL) {
2772
3.92k
                    if (ent->content != NULL)
2773
165
                        ent->content[0] = 0;
2774
3.92k
                    goto int_error;
2775
3.92k
                }
2776
822k
                current = rep;
2777
349M
                while (*current != 0) { /* non input consuming loop */
2778
348M
                    buffer[nbchars++] = *current++;
2779
348M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
153k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
153k
                    }
2782
348M
                }
2783
822k
                xmlFree(rep);
2784
822k
                rep = NULL;
2785
822k
      }
2786
823M
  } else {
2787
823M
      COPY_BUF(l,buffer,nbchars,c);
2788
823M
      str += l;
2789
823M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
260k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
260k
      }
2792
823M
  }
2793
840M
  if (str < last)
2794
819M
      c = CUR_SCHAR(str, l);
2795
21.7M
  else
2796
21.7M
      c = 0;
2797
840M
    }
2798
22.0M
    buffer[nbchars] = 0;
2799
22.0M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
25.1k
int_error:
2804
25.1k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
25.1k
    if (buffer != NULL)
2807
25.1k
        xmlFree(buffer);
2808
25.1k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
39.5k
                           xmlChar end3) {
2836
39.5k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
39.5k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
39.5k
                                      end, end2, end3, 0));
2840
39.5k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
538k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
538k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
538k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
538k
                                      end, end2, end3, 0));
2868
538k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
21.6M
                     int blank_chars) {
2890
21.6M
    int i, ret;
2891
21.6M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
21.6M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
919k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
20.7M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
20.7M
        (*(ctxt->space) == -2))
2905
6.88M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
13.8M
    if (blank_chars == 0) {
2911
28.2M
  for (i = 0;i < len;i++)
2912
24.1M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
5.75M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
12.2M
    if (ctxt->node == NULL) return(0);
2919
11.9M
    if (ctxt->myDoc != NULL) {
2920
11.9M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
11.9M
        if (ret == 0) return(1);
2922
11.7M
        if (ret == 1) return(0);
2923
11.7M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
11.7M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
11.6M
    if ((ctxt->node->children == NULL) &&
2930
11.6M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
11.6M
    lastChild = xmlGetLastChild(ctxt->node);
2933
11.6M
    if (lastChild == NULL) {
2934
2.36M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
2.36M
            (ctxt->node->content != NULL)) return(0);
2936
9.32M
    } else if (xmlNodeIsText(lastChild))
2937
157k
        return(0);
2938
9.16M
    else if ((ctxt->node->children != NULL) &&
2939
9.16M
             (xmlNodeIsText(ctxt->node->children)))
2940
92.3k
        return(0);
2941
11.4M
    return(1);
2942
11.6M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
37.4M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
37.4M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
37.4M
    xmlChar *buffer = NULL;
2973
37.4M
    int len = 0;
2974
37.4M
    int max = XML_MAX_NAMELEN;
2975
37.4M
    xmlChar *ret = NULL;
2976
37.4M
    const xmlChar *cur = name;
2977
37.4M
    int c;
2978
2979
37.4M
    if (prefix == NULL) return(NULL);
2980
37.4M
    *prefix = NULL;
2981
2982
37.4M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
37.4M
    if (cur[0] == ':')
2993
10.2k
  return(xmlStrdup(name));
2994
2995
37.4M
    c = *cur++;
2996
182M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
145M
  buf[len++] = c;
2998
145M
  c = *cur++;
2999
145M
    }
3000
37.4M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
13.2k
  max = len * 2;
3006
3007
13.2k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
13.2k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
13.2k
  memcpy(buffer, buf, len);
3013
38.0M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
38.0M
      if (len + 10 > max) {
3015
35.2k
          xmlChar *tmp;
3016
3017
35.2k
    max *= 2;
3018
35.2k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
35.2k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
35.2k
    buffer = tmp;
3025
35.2k
      }
3026
38.0M
      buffer[len++] = c;
3027
38.0M
      c = *cur++;
3028
38.0M
  }
3029
13.2k
  buffer[len] = 0;
3030
13.2k
    }
3031
3032
37.4M
    if ((c == ':') && (*cur == 0)) {
3033
16.8k
        if (buffer != NULL)
3034
692
      xmlFree(buffer);
3035
16.8k
  *prefix = NULL;
3036
16.8k
  return(xmlStrdup(name));
3037
16.8k
    }
3038
3039
37.4M
    if (buffer == NULL)
3040
37.4M
  ret = xmlStrndup(buf, len);
3041
12.5k
    else {
3042
12.5k
  ret = buffer;
3043
12.5k
  buffer = NULL;
3044
12.5k
  max = XML_MAX_NAMELEN;
3045
12.5k
    }
3046
3047
3048
37.4M
    if (c == ':') {
3049
1.49M
  c = *cur;
3050
1.49M
        *prefix = ret;
3051
1.49M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
1.49M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
1.49M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
1.49M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
1.49M
        (c == '_') || (c == ':'))) {
3063
12.1k
      int l;
3064
12.1k
      int first = CUR_SCHAR(cur, l);
3065
3066
12.1k
      if (!IS_LETTER(first) && (first != '_')) {
3067
6.29k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
6.29k
          "Name %s is not XML Namespace compliant\n",
3069
6.29k
          name);
3070
6.29k
      }
3071
12.1k
  }
3072
1.49M
  cur++;
3073
3074
9.84M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
8.35M
      buf[len++] = c;
3076
8.35M
      c = *cur++;
3077
8.35M
  }
3078
1.49M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
5.11k
      max = len * 2;
3084
3085
5.11k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
5.11k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
5.11k
      memcpy(buffer, buf, len);
3091
11.5M
      while (c != 0) { /* tested bigname2.xml */
3092
11.5M
    if (len + 10 > max) {
3093
11.3k
        xmlChar *tmp;
3094
3095
11.3k
        max *= 2;
3096
11.3k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
11.3k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
11.3k
        buffer = tmp;
3103
11.3k
    }
3104
11.5M
    buffer[len++] = c;
3105
11.5M
    c = *cur++;
3106
11.5M
      }
3107
5.11k
      buffer[len] = 0;
3108
5.11k
  }
3109
3110
1.49M
  if (buffer == NULL)
3111
1.48M
      ret = xmlStrndup(buf, len);
3112
5.11k
  else {
3113
5.11k
      ret = buffer;
3114
5.11k
  }
3115
1.49M
    }
3116
3117
37.4M
    return(ret);
3118
37.4M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
20.3M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
20.3M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
17.8M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
17.8M
      (((c >= 'a') && (c <= 'z')) ||
3160
17.8M
       ((c >= 'A') && (c <= 'Z')) ||
3161
17.8M
       (c == '_') || (c == ':') ||
3162
17.8M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
17.8M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
17.8M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
17.8M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
17.8M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
17.8M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
17.8M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
17.8M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
17.8M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
17.8M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
17.8M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
17.8M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
17.0M
      return(1);
3175
17.8M
    } else {
3176
2.46M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
2.11M
      return(1);
3178
2.46M
    }
3179
1.15M
    return(0);
3180
20.3M
}
3181
3182
static int
3183
327M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
327M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
298M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
298M
      (((c >= 'a') && (c <= 'z')) ||
3191
296M
       ((c >= 'A') && (c <= 'Z')) ||
3192
296M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
296M
       (c == '_') || (c == ':') ||
3194
296M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
296M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
296M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
296M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
296M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
296M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
296M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
296M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
296M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
296M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
296M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
296M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
296M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
296M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
296M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
278M
       return(1);
3210
298M
    } else {
3211
29.7M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
29.7M
            (c == '.') || (c == '-') ||
3213
29.7M
      (c == '_') || (c == ':') ||
3214
29.7M
      (IS_COMBINING(c)) ||
3215
29.7M
      (IS_EXTENDER(c)))
3216
26.3M
      return(1);
3217
29.7M
    }
3218
23.3M
    return(0);
3219
327M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
2.27M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
2.27M
    int len = 0, l;
3227
2.27M
    int c;
3228
2.27M
    int count = 0;
3229
2.27M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
963k
                    XML_MAX_TEXT_LENGTH :
3231
2.27M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
2.27M
    GROW;
3241
2.27M
    if (ctxt->instate == XML_PARSER_EOF)
3242
0
        return(NULL);
3243
2.27M
    c = CUR_CHAR(l);
3244
2.27M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
1.23M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
1.23M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
1.17M
         ((c >= 'A') && (c <= 'Z')) ||
3252
1.17M
         (c == '_') || (c == ':') ||
3253
1.17M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
1.17M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
1.17M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
1.17M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
1.17M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
1.17M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
1.17M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
1.17M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
1.17M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
1.17M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
1.17M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
1.17M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
718k
      return(NULL);
3266
718k
  }
3267
512k
  len += l;
3268
512k
  NEXTL(l);
3269
512k
  c = CUR_CHAR(l);
3270
36.9M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
36.9M
         (((c >= 'a') && (c <= 'z')) ||
3272
36.8M
          ((c >= 'A') && (c <= 'Z')) ||
3273
36.8M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
36.8M
          (c == '_') || (c == ':') ||
3275
36.8M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
36.8M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
36.8M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
36.8M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
36.8M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
36.8M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
36.8M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
36.8M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
36.8M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
36.8M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
36.8M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
36.8M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
36.8M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
36.8M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
36.8M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
36.8M
    )) {
3291
36.4M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
326k
    count = 0;
3293
326k
    GROW;
3294
326k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
326k
      }
3297
36.4M
            if (len <= INT_MAX - l)
3298
36.4M
          len += l;
3299
36.4M
      NEXTL(l);
3300
36.4M
      c = CUR_CHAR(l);
3301
36.4M
  }
3302
1.04M
    } else {
3303
1.04M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
1.04M
      (!IS_LETTER(c) && (c != '_') &&
3305
999k
       (c != ':'))) {
3306
666k
      return(NULL);
3307
666k
  }
3308
380k
  len += l;
3309
380k
  NEXTL(l);
3310
380k
  c = CUR_CHAR(l);
3311
3312
35.4M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
35.4M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
35.3M
    (c == '.') || (c == '-') ||
3315
35.3M
    (c == '_') || (c == ':') ||
3316
35.3M
    (IS_COMBINING(c)) ||
3317
35.3M
    (IS_EXTENDER(c)))) {
3318
35.0M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
317k
    count = 0;
3320
317k
    GROW;
3321
317k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
317k
      }
3324
35.0M
            if (len <= INT_MAX - l)
3325
35.0M
          len += l;
3326
35.0M
      NEXTL(l);
3327
35.0M
      c = CUR_CHAR(l);
3328
35.0M
  }
3329
380k
    }
3330
893k
    if (len > maxLength) {
3331
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
0
        return(NULL);
3333
0
    }
3334
893k
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
893k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
9.55k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
884k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
893k
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
101M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
101M
    const xmlChar *in;
3370
101M
    const xmlChar *ret;
3371
101M
    size_t count = 0;
3372
101M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
31.3M
                       XML_MAX_TEXT_LENGTH :
3374
101M
                       XML_MAX_NAME_LENGTH;
3375
3376
101M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
101M
    in = ctxt->input->cur;
3386
101M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
101M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
101M
  (*in == '_') || (*in == ':')) {
3389
99.5M
  in++;
3390
577M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
577M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
577M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
577M
         (*in == '_') || (*in == '-') ||
3394
577M
         (*in == ':') || (*in == '.'))
3395
478M
      in++;
3396
99.5M
  if ((*in > 0) && (*in < 0x80)) {
3397
98.7M
      count = in - ctxt->input->cur;
3398
98.7M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
98.7M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
98.7M
      ctxt->input->cur = in;
3404
98.7M
      ctxt->input->col += count;
3405
98.7M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
98.7M
      return(ret);
3408
98.7M
  }
3409
99.5M
    }
3410
    /* accelerator for special cases */
3411
2.27M
    return(xmlParseNameComplex(ctxt));
3412
101M
}
3413
3414
static const xmlChar *
3415
1.63M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
1.63M
    int len = 0, l;
3417
1.63M
    int c;
3418
1.63M
    int count = 0;
3419
1.63M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
565k
                    XML_MAX_TEXT_LENGTH :
3421
1.63M
                    XML_MAX_NAME_LENGTH;
3422
1.63M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
1.63M
    GROW;
3432
1.63M
    startPosition = CUR_PTR - BASE_PTR;
3433
1.63M
    c = CUR_CHAR(l);
3434
1.63M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
1.63M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
1.24M
  return(NULL);
3437
1.24M
    }
3438
3439
36.8M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
36.8M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
36.5M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
328k
      count = 0;
3443
328k
      GROW;
3444
328k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
328k
  }
3447
36.5M
        if (len <= INT_MAX - l)
3448
36.5M
      len += l;
3449
36.5M
  NEXTL(l);
3450
36.5M
  c = CUR_CHAR(l);
3451
36.5M
  if (c == 0) {
3452
59.9k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
59.9k
      ctxt->input->cur -= l;
3459
59.9k
      GROW;
3460
59.9k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
59.9k
      ctxt->input->cur += l;
3463
59.9k
      c = CUR_CHAR(l);
3464
59.9k
  }
3465
36.5M
    }
3466
388k
    if (len > maxLength) {
3467
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
0
        return(NULL);
3469
0
    }
3470
388k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
388k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
56.7M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
56.7M
    const xmlChar *in, *e;
3491
56.7M
    const xmlChar *ret;
3492
56.7M
    size_t count = 0;
3493
56.7M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
16.7M
                       XML_MAX_TEXT_LENGTH :
3495
56.7M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
56.7M
    in = ctxt->input->cur;
3505
56.7M
    e = ctxt->input->end;
3506
56.7M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
56.7M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
56.7M
   (*in == '_')) && (in < e)) {
3509
55.4M
  in++;
3510
252M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
252M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
252M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
252M
          (*in == '_') || (*in == '-') ||
3514
252M
          (*in == '.')) && (in < e))
3515
197M
      in++;
3516
55.4M
  if (in >= e)
3517
15.9k
      goto complex;
3518
55.4M
  if ((*in > 0) && (*in < 0x80)) {
3519
55.1M
      count = in - ctxt->input->cur;
3520
55.1M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
55.1M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
55.1M
      ctxt->input->cur = in;
3526
55.1M
      ctxt->input->col += count;
3527
55.1M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
55.1M
      return(ret);
3531
55.1M
  }
3532
55.4M
    }
3533
1.63M
complex:
3534
1.63M
    return(xmlParseNCNameComplex(ctxt));
3535
56.7M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
26.8M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
26.8M
    register const xmlChar *cmp = other;
3551
26.8M
    register const xmlChar *in;
3552
26.8M
    const xmlChar *ret;
3553
3554
26.8M
    GROW;
3555
26.8M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
26.8M
    in = ctxt->input->cur;
3559
142M
    while (*in != 0 && *in == *cmp) {
3560
115M
  ++in;
3561
115M
  ++cmp;
3562
115M
    }
3563
26.8M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
26.0M
  ctxt->input->col += in - ctxt->input->cur;
3566
26.0M
  ctxt->input->cur = in;
3567
26.0M
  return (const xmlChar*) 1;
3568
26.0M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
782k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
782k
    if (ret == other) {
3573
61.5k
  return (const xmlChar*) 1;
3574
61.5k
    }
3575
720k
    return ret;
3576
782k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
18.8M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
18.8M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
18.8M
    const xmlChar *cur = *str;
3600
18.8M
    int len = 0, l;
3601
18.8M
    int c;
3602
18.8M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
1.89M
                    XML_MAX_TEXT_LENGTH :
3604
18.8M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
18.8M
    c = CUR_SCHAR(cur, l);
3611
18.8M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
11.9k
  return(NULL);
3613
11.9k
    }
3614
3615
18.7M
    COPY_BUF(l,buf,len,c);
3616
18.7M
    cur += l;
3617
18.7M
    c = CUR_SCHAR(cur, l);
3618
143M
    while (xmlIsNameChar(ctxt, c)) {
3619
124M
  COPY_BUF(l,buf,len,c);
3620
124M
  cur += l;
3621
124M
  c = CUR_SCHAR(cur, l);
3622
124M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
477k
      xmlChar *buffer;
3628
477k
      int max = len * 2;
3629
3630
477k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
477k
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
477k
      memcpy(buffer, buf, len);
3636
112M
      while (xmlIsNameChar(ctxt, c)) {
3637
111M
    if (len + 10 > max) {
3638
481k
        xmlChar *tmp;
3639
3640
481k
        max *= 2;
3641
481k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
481k
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
481k
        buffer = tmp;
3648
481k
    }
3649
111M
    COPY_BUF(l,buffer,len,c);
3650
111M
    cur += l;
3651
111M
    c = CUR_SCHAR(cur, l);
3652
111M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
111M
      }
3658
477k
      buffer[len] = 0;
3659
477k
      *str = cur;
3660
477k
      return(buffer);
3661
477k
  }
3662
124M
    }
3663
18.3M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
18.3M
    *str = cur;
3668
18.3M
    return(xmlStrndup(buf, len));
3669
18.3M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
4.34M
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
4.34M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
4.34M
    int len = 0, l;
3690
4.34M
    int c;
3691
4.34M
    int count = 0;
3692
4.34M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
1.47M
                    XML_MAX_TEXT_LENGTH :
3694
4.34M
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
4.34M
    GROW;
3701
4.34M
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
4.34M
    c = CUR_CHAR(l);
3704
3705
25.9M
    while (xmlIsNameChar(ctxt, c)) {
3706
21.6M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
21.6M
  COPY_BUF(l,buf,len,c);
3711
21.6M
  NEXTL(l);
3712
21.6M
  c = CUR_CHAR(l);
3713
21.6M
  if (c == 0) {
3714
3.79k
      count = 0;
3715
3.79k
      GROW;
3716
3.79k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
3.79k
            c = CUR_CHAR(l);
3719
3.79k
  }
3720
21.6M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
6.25k
      xmlChar *buffer;
3726
6.25k
      int max = len * 2;
3727
3728
6.25k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
6.25k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
6.25k
      memcpy(buffer, buf, len);
3734
9.71M
      while (xmlIsNameChar(ctxt, c)) {
3735
9.70M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
98.4k
        count = 0;
3737
98.4k
        GROW;
3738
98.4k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
98.4k
    }
3743
9.70M
    if (len + 10 > max) {
3744
13.3k
        xmlChar *tmp;
3745
3746
13.3k
        max *= 2;
3747
13.3k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
13.3k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
13.3k
        buffer = tmp;
3754
13.3k
    }
3755
9.70M
    COPY_BUF(l,buffer,len,c);
3756
9.70M
    NEXTL(l);
3757
9.70M
    c = CUR_CHAR(l);
3758
9.70M
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
9.70M
      }
3764
6.25k
      buffer[len] = 0;
3765
6.25k
      return(buffer);
3766
6.25k
  }
3767
21.6M
    }
3768
4.34M
    if (len == 0)
3769
44.7k
        return(NULL);
3770
4.29M
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
4.29M
    return(xmlStrndup(buf, len));
3775
4.29M
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
3.61M
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
3.61M
    xmlChar *buf = NULL;
3795
3.61M
    int len = 0;
3796
3.61M
    int size = XML_PARSER_BUFFER_SIZE;
3797
3.61M
    int c, l;
3798
3.61M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
1.09M
                    XML_MAX_HUGE_LENGTH :
3800
3.61M
                    XML_MAX_TEXT_LENGTH;
3801
3.61M
    xmlChar stop;
3802
3.61M
    xmlChar *ret = NULL;
3803
3.61M
    const xmlChar *cur = NULL;
3804
3.61M
    xmlParserInputPtr input;
3805
3806
3.61M
    if (RAW == '"') stop = '"';
3807
879k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
3.61M
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
3.61M
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
3.61M
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
3.61M
    input = ctxt->input;
3824
3.61M
    GROW;
3825
3.61M
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
3.61M
    NEXT;
3828
3.61M
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
138M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
138M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
134M
  if (len + 5 >= size) {
3841
378k
      xmlChar *tmp;
3842
3843
378k
      size *= 2;
3844
378k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
378k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
378k
      buf = tmp;
3850
378k
  }
3851
134M
  COPY_BUF(l,buf,len,c);
3852
134M
  NEXTL(l);
3853
3854
134M
  GROW;
3855
134M
  c = CUR_CHAR(l);
3856
134M
  if (c == 0) {
3857
12.3k
      GROW;
3858
12.3k
      c = CUR_CHAR(l);
3859
12.3k
  }
3860
3861
134M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
134M
    }
3867
3.61M
    buf[len] = 0;
3868
3.61M
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
3.61M
    if (c != stop) {
3871
18.3k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
18.3k
        goto error;
3873
18.3k
    }
3874
3.59M
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
3.59M
    cur = buf;
3882
108M
    while (*cur != 0) { /* non input consuming */
3883
104M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
1.67M
      xmlChar *name;
3885
1.67M
      xmlChar tmp = *cur;
3886
1.67M
            int nameOk = 0;
3887
3888
1.67M
      cur++;
3889
1.67M
      name = xmlParseStringName(ctxt, &cur);
3890
1.67M
            if (name != NULL) {
3891
1.66M
                nameOk = 1;
3892
1.66M
                xmlFree(name);
3893
1.66M
            }
3894
1.67M
            if ((nameOk == 0) || (*cur != ';')) {
3895
24.0k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
24.0k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
24.0k
                            tmp);
3898
24.0k
                goto error;
3899
24.0k
      }
3900
1.65M
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
1.65M
    (ctxt->inputNr == 1)) {
3902
17.2k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
17.2k
                goto error;
3904
17.2k
      }
3905
1.63M
      if (*cur == 0)
3906
0
          break;
3907
1.63M
  }
3908
104M
  cur++;
3909
104M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
3.55M
    ++ctxt->depth;
3920
3.55M
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
3.55M
                                     0, 0, 0, /* check */ 1);
3922
3.55M
    --ctxt->depth;
3923
3924
3.55M
    if (orig != NULL) {
3925
3.55M
        *orig = buf;
3926
3.55M
        buf = NULL;
3927
3.55M
    }
3928
3929
3.61M
error:
3930
3.61M
    if (buf != NULL)
3931
59.6k
        xmlFree(buf);
3932
3.61M
    return(ret);
3933
3.55M
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
1.45M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
1.45M
    xmlChar limit = 0;
3950
1.45M
    xmlChar *buf = NULL;
3951
1.45M
    xmlChar *rep = NULL;
3952
1.45M
    size_t len = 0;
3953
1.45M
    size_t buf_size = 0;
3954
1.45M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
516k
                       XML_MAX_HUGE_LENGTH :
3956
1.45M
                       XML_MAX_TEXT_LENGTH;
3957
1.45M
    int c, l, in_space = 0;
3958
1.45M
    xmlChar *current = NULL;
3959
1.45M
    xmlEntityPtr ent;
3960
3961
1.45M
    if (NXT(0) == '"') {
3962
870k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
870k
  limit = '"';
3964
870k
        NEXT;
3965
870k
    } else if (NXT(0) == '\'') {
3966
586k
  limit = '\'';
3967
586k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
586k
        NEXT;
3969
586k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
1.45M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
1.45M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
1.45M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
1.45M
    c = CUR_CHAR(l);
3985
105M
    while (((NXT(0) != limit) && /* checked */
3986
105M
            (IS_CHAR(c)) && (c != '<')) &&
3987
105M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
103M
  if (c == '&') {
3989
3.37M
      in_space = 0;
3990
3.37M
      if (NXT(1) == '#') {
3991
634k
    int val = xmlParseCharRef(ctxt);
3992
3993
634k
    if (val == '&') {
3994
8.08k
        if (ctxt->replaceEntities) {
3995
4.34k
      if (len + 10 > buf_size) {
3996
126
          growBuffer(buf, 10);
3997
126
      }
3998
4.34k
      buf[len++] = '&';
3999
4.34k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
3.74k
      if (len + 10 > buf_size) {
4005
126
          growBuffer(buf, 10);
4006
126
      }
4007
3.74k
      buf[len++] = '&';
4008
3.74k
      buf[len++] = '#';
4009
3.74k
      buf[len++] = '3';
4010
3.74k
      buf[len++] = '8';
4011
3.74k
      buf[len++] = ';';
4012
3.74k
        }
4013
626k
    } else if (val != 0) {
4014
571k
        if (len + 10 > buf_size) {
4015
16.1k
      growBuffer(buf, 10);
4016
16.1k
        }
4017
571k
        len += xmlCopyChar(0, &buf[len], val);
4018
571k
    }
4019
2.73M
      } else {
4020
2.73M
    ent = xmlParseEntityRef(ctxt);
4021
2.73M
    if ((ent != NULL) &&
4022
2.73M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
229k
        if (len + 10 > buf_size) {
4024
222
      growBuffer(buf, 10);
4025
222
        }
4026
229k
        if ((ctxt->replaceEntities == 0) &&
4027
229k
            (ent->content[0] == '&')) {
4028
66.5k
      buf[len++] = '&';
4029
66.5k
      buf[len++] = '#';
4030
66.5k
      buf[len++] = '3';
4031
66.5k
      buf[len++] = '8';
4032
66.5k
      buf[len++] = ';';
4033
163k
        } else {
4034
163k
      buf[len++] = ent->content[0];
4035
163k
        }
4036
2.50M
    } else if ((ent != NULL) &&
4037
2.50M
               (ctxt->replaceEntities != 0)) {
4038
1.09M
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
1.09M
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
1.09M
      ++ctxt->depth;
4043
1.09M
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
1.09M
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
1.09M
                                /* check */ 1);
4046
1.09M
      --ctxt->depth;
4047
1.09M
      if (rep != NULL) {
4048
1.07M
          current = rep;
4049
166M
          while (*current != 0) { /* non input consuming */
4050
165M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
165M
                                    (*current == 0x9)) {
4052
214k
                                    buf[len++] = 0x20;
4053
214k
                                    current++;
4054
214k
                                } else
4055
165M
                                    buf[len++] = *current++;
4056
165M
        if (len + 10 > buf_size) {
4057
21.1k
            growBuffer(buf, 10);
4058
21.1k
        }
4059
165M
          }
4060
1.07M
          xmlFree(rep);
4061
1.07M
          rep = NULL;
4062
1.07M
      }
4063
1.09M
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
1.41M
    } else if (ent != NULL) {
4071
738k
        int i = xmlStrlen(ent->name);
4072
738k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
738k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
738k
      (ent->content != NULL)) {
4081
718k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
40.0k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
40.0k
                            ctxt->sizeentcopy = ent->length;
4085
4086
40.0k
                            ++ctxt->depth;
4087
40.0k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
40.0k
                                    ent->content, ent->length,
4089
40.0k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
40.0k
                                    /* check */ 1);
4091
40.0k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
40.0k
                            if (ctxt->inSubset == 0) {
4100
37.8k
                                ent->flags |= XML_ENT_CHECKED;
4101
37.8k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
37.8k
                            }
4103
4104
40.0k
                            if (rep != NULL) {
4105
39.5k
                                xmlFree(rep);
4106
39.5k
                                rep = NULL;
4107
39.5k
                            } else {
4108
545
                                ent->content[0] = 0;
4109
545
                            }
4110
4111
40.0k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
0
                                goto error;
4113
678k
                        } else {
4114
678k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
678k
                        }
4117
718k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
738k
        buf[len++] = '&';
4123
742k
        while (len + i + 10 > buf_size) {
4124
9.15k
      growBuffer(buf, i + 10);
4125
9.15k
        }
4126
3.11M
        for (;i > 0;i--)
4127
2.38M
      buf[len++] = *cur++;
4128
738k
        buf[len++] = ';';
4129
738k
    }
4130
2.73M
      }
4131
100M
  } else {
4132
100M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
15.8M
          if ((len != 0) || (!normalize)) {
4134
15.6M
        if ((!normalize) || (!in_space)) {
4135
15.2M
      COPY_BUF(l,buf,len,0x20);
4136
15.2M
      while (len + 10 > buf_size) {
4137
76.4k
          growBuffer(buf, 10);
4138
76.4k
      }
4139
15.2M
        }
4140
15.6M
        in_space = 1;
4141
15.6M
    }
4142
84.6M
      } else {
4143
84.6M
          in_space = 0;
4144
84.6M
    COPY_BUF(l,buf,len,c);
4145
84.6M
    if (len + 10 > buf_size) {
4146
433k
        growBuffer(buf, 10);
4147
433k
    }
4148
84.6M
      }
4149
100M
      NEXTL(l);
4150
100M
  }
4151
103M
  GROW;
4152
103M
  c = CUR_CHAR(l);
4153
103M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
103M
    }
4159
1.45M
    if (ctxt->instate == XML_PARSER_EOF)
4160
1.43k
        goto error;
4161
4162
1.45M
    if ((in_space) && (normalize)) {
4163
70.3k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
34.0k
    }
4165
1.45M
    buf[len] = 0;
4166
1.45M
    if (RAW == '<') {
4167
186k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
1.26M
    } else if (RAW != limit) {
4169
248k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
105k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
105k
         "invalid character in attribute value\n");
4172
142k
  } else {
4173
142k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
142k
         "AttValue: ' expected\n");
4175
142k
        }
4176
248k
    } else
4177
1.02M
  NEXT;
4178
4179
1.45M
    if (attlen != NULL) *attlen = len;
4180
1.45M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
1.43k
error:
4185
1.43k
    if (buf != NULL)
4186
1.43k
        xmlFree(buf);
4187
1.43k
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
1.43k
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
13.8M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
13.8M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
13.8M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
13.8M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
920k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
920k
    xmlChar *buf = NULL;
4250
920k
    int len = 0;
4251
920k
    int size = XML_PARSER_BUFFER_SIZE;
4252
920k
    int cur, l;
4253
920k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
313k
                    XML_MAX_TEXT_LENGTH :
4255
920k
                    XML_MAX_NAME_LENGTH;
4256
920k
    xmlChar stop;
4257
920k
    int state = ctxt->instate;
4258
920k
    int count = 0;
4259
4260
920k
    SHRINK;
4261
920k
    if (RAW == '"') {
4262
861k
        NEXT;
4263
861k
  stop = '"';
4264
861k
    } else if (RAW == '\'') {
4265
27.3k
        NEXT;
4266
27.3k
  stop = '\'';
4267
31.4k
    } else {
4268
31.4k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
31.4k
  return(NULL);
4270
31.4k
    }
4271
4272
889k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
889k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
889k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
889k
    cur = CUR_CHAR(l);
4279
47.0M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
46.1M
  if (len + 5 >= size) {
4281
60.5k
      xmlChar *tmp;
4282
4283
60.5k
      size *= 2;
4284
60.5k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
60.5k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
60.5k
      buf = tmp;
4292
60.5k
  }
4293
46.1M
  count++;
4294
46.1M
  if (count > 50) {
4295
594k
      SHRINK;
4296
594k
      GROW;
4297
594k
      count = 0;
4298
594k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
594k
  }
4303
46.1M
  COPY_BUF(l,buf,len,cur);
4304
46.1M
  NEXTL(l);
4305
46.1M
  cur = CUR_CHAR(l);
4306
46.1M
  if (cur == 0) {
4307
16.0k
      GROW;
4308
16.0k
      SHRINK;
4309
16.0k
      cur = CUR_CHAR(l);
4310
16.0k
  }
4311
46.1M
        if (len > maxLength) {
4312
19
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
19
            xmlFree(buf);
4314
19
            ctxt->instate = (xmlParserInputState) state;
4315
19
            return(NULL);
4316
19
        }
4317
46.1M
    }
4318
889k
    buf[len] = 0;
4319
889k
    ctxt->instate = (xmlParserInputState) state;
4320
889k
    if (!IS_CHAR(cur)) {
4321
24.6k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
864k
    } else {
4323
864k
  NEXT;
4324
864k
    }
4325
889k
    return(buf);
4326
889k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
233k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
233k
    xmlChar *buf = NULL;
4344
233k
    int len = 0;
4345
233k
    int size = XML_PARSER_BUFFER_SIZE;
4346
233k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
73.1k
                    XML_MAX_TEXT_LENGTH :
4348
233k
                    XML_MAX_NAME_LENGTH;
4349
233k
    xmlChar cur;
4350
233k
    xmlChar stop;
4351
233k
    int count = 0;
4352
233k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
233k
    SHRINK;
4355
233k
    if (RAW == '"') {
4356
216k
        NEXT;
4357
216k
  stop = '"';
4358
216k
    } else if (RAW == '\'') {
4359
11.9k
        NEXT;
4360
11.9k
  stop = '\'';
4361
11.9k
    } else {
4362
5.71k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
5.71k
  return(NULL);
4364
5.71k
    }
4365
228k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
228k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
228k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
228k
    cur = CUR;
4372
17.9M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
17.7M
  if (len + 1 >= size) {
4374
25.6k
      xmlChar *tmp;
4375
4376
25.6k
      size *= 2;
4377
25.6k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
25.6k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
25.6k
      buf = tmp;
4384
25.6k
  }
4385
17.7M
  buf[len++] = cur;
4386
17.7M
  count++;
4387
17.7M
  if (count > 50) {
4388
223k
      SHRINK;
4389
223k
      GROW;
4390
223k
      count = 0;
4391
223k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
223k
  }
4396
17.7M
  NEXT;
4397
17.7M
  cur = CUR;
4398
17.7M
  if (cur == 0) {
4399
4.93k
      GROW;
4400
4.93k
      SHRINK;
4401
4.93k
      cur = CUR;
4402
4.93k
  }
4403
17.7M
        if (len > maxLength) {
4404
3
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
3
            xmlFree(buf);
4406
3
            return(NULL);
4407
3
        }
4408
17.7M
    }
4409
228k
    buf[len] = 0;
4410
228k
    if (cur != stop) {
4411
17.5k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
210k
    } else {
4413
210k
  NEXT;
4414
210k
    }
4415
228k
    ctxt->instate = oldstate;
4416
228k
    return(buf);
4417
228k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
74.5M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
74.5M
    const xmlChar *in;
4482
74.5M
    int nbchar = 0;
4483
74.5M
    int line = ctxt->input->line;
4484
74.5M
    int col = ctxt->input->col;
4485
74.5M
    int ccol;
4486
4487
74.5M
    SHRINK;
4488
74.5M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
74.5M
    in = ctxt->input->cur;
4494
91.0M
    do {
4495
119M
get_more_space:
4496
169M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
119M
        if (*in == 0xA) {
4498
29.9M
            do {
4499
29.9M
                ctxt->input->line++; ctxt->input->col = 1;
4500
29.9M
                in++;
4501
29.9M
            } while (*in == 0xA);
4502
28.8M
            goto get_more_space;
4503
28.8M
        }
4504
91.0M
        if (*in == '<') {
4505
24.0M
            nbchar = in - ctxt->input->cur;
4506
24.0M
            if (nbchar > 0) {
4507
24.0M
                const xmlChar *tmp = ctxt->input->cur;
4508
24.0M
                ctxt->input->cur = in;
4509
4510
24.0M
                if ((ctxt->sax != NULL) &&
4511
24.0M
                    (ctxt->sax->ignorableWhitespace !=
4512
24.0M
                     ctxt->sax->characters)) {
4513
9.80M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
7.67M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
7.67M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
7.67M
                                                   tmp, nbchar);
4517
7.67M
                    } else {
4518
2.13M
                        if (ctxt->sax->characters != NULL)
4519
2.13M
                            ctxt->sax->characters(ctxt->userData,
4520
2.13M
                                                  tmp, nbchar);
4521
2.13M
                        if (*ctxt->space == -1)
4522
436k
                            *ctxt->space = -2;
4523
2.13M
                    }
4524
14.2M
                } else if ((ctxt->sax != NULL) &&
4525
14.2M
                           (ctxt->sax->characters != NULL)) {
4526
14.2M
                    ctxt->sax->characters(ctxt->userData,
4527
14.2M
                                          tmp, nbchar);
4528
14.2M
                }
4529
24.0M
            }
4530
24.0M
            return;
4531
24.0M
        }
4532
4533
91.6M
get_more:
4534
91.6M
        ccol = ctxt->input->col;
4535
1.61G
        while (test_char_data[*in]) {
4536
1.52G
            in++;
4537
1.52G
            ccol++;
4538
1.52G
        }
4539
91.6M
        ctxt->input->col = ccol;
4540
91.6M
        if (*in == 0xA) {
4541
23.7M
            do {
4542
23.7M
                ctxt->input->line++; ctxt->input->col = 1;
4543
23.7M
                in++;
4544
23.7M
            } while (*in == 0xA);
4545
23.2M
            goto get_more;
4546
23.2M
        }
4547
68.3M
        if (*in == ']') {
4548
1.42M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
28.5k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
28.5k
                ctxt->input->cur = in + 1;
4551
28.5k
                return;
4552
28.5k
            }
4553
1.39M
            in++;
4554
1.39M
            ctxt->input->col++;
4555
1.39M
            goto get_more;
4556
1.42M
        }
4557
66.9M
        nbchar = in - ctxt->input->cur;
4558
66.9M
        if (nbchar > 0) {
4559
52.7M
            if ((ctxt->sax != NULL) &&
4560
52.7M
                (ctxt->sax->ignorableWhitespace !=
4561
52.7M
                 ctxt->sax->characters) &&
4562
52.7M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
9.98M
                const xmlChar *tmp = ctxt->input->cur;
4564
9.98M
                ctxt->input->cur = in;
4565
4566
9.98M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
3.92M
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
3.92M
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
3.92M
                                                       tmp, nbchar);
4570
6.06M
                } else {
4571
6.06M
                    if (ctxt->sax->characters != NULL)
4572
6.06M
                        ctxt->sax->characters(ctxt->userData,
4573
6.06M
                                              tmp, nbchar);
4574
6.06M
                    if (*ctxt->space == -1)
4575
1.47M
                        *ctxt->space = -2;
4576
6.06M
                }
4577
9.98M
                line = ctxt->input->line;
4578
9.98M
                col = ctxt->input->col;
4579
42.8M
            } else if (ctxt->sax != NULL) {
4580
42.8M
                if (ctxt->sax->characters != NULL)
4581
42.8M
                    ctxt->sax->characters(ctxt->userData,
4582
42.8M
                                          ctxt->input->cur, nbchar);
4583
42.8M
                line = ctxt->input->line;
4584
42.8M
                col = ctxt->input->col;
4585
42.8M
            }
4586
52.7M
        }
4587
66.9M
        ctxt->input->cur = in;
4588
66.9M
        if (*in == 0xD) {
4589
16.6M
            in++;
4590
16.6M
            if (*in == 0xA) {
4591
16.5M
                ctxt->input->cur = in;
4592
16.5M
                in++;
4593
16.5M
                ctxt->input->line++; ctxt->input->col = 1;
4594
16.5M
                continue; /* while */
4595
16.5M
            }
4596
79.5k
            in--;
4597
79.5k
        }
4598
50.3M
        if (*in == '<') {
4599
42.4M
            return;
4600
42.4M
        }
4601
7.94M
        if (*in == '&') {
4602
3.28M
            return;
4603
3.28M
        }
4604
4.66M
        SHRINK;
4605
4.66M
        GROW;
4606
4.66M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
4.66M
        in = ctxt->input->cur;
4609
21.2M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
21.2M
             (*in == 0x09) || (*in == 0x0a));
4611
4.78M
    ctxt->input->line = line;
4612
4.78M
    ctxt->input->col = col;
4613
4.78M
    xmlParseCharDataComplex(ctxt);
4614
4.78M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
4.78M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
4.78M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
4.78M
    int nbchar = 0;
4631
4.78M
    int cur, l;
4632
4.78M
    int count = 0;
4633
4634
4.78M
    SHRINK;
4635
4.78M
    GROW;
4636
4.78M
    cur = CUR_CHAR(l);
4637
147M
    while ((cur != '<') && /* checked */
4638
147M
           (cur != '&') &&
4639
147M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
142M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
21.7k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
21.7k
  }
4643
142M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
142M
  NEXTL(l);
4646
142M
  cur = CUR_CHAR(l);
4647
142M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
372k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
372k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
294k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
1
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
1
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
1
                                     buf, nbchar);
4658
294k
    } else {
4659
294k
        if (ctxt->sax->characters != NULL)
4660
294k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
294k
        if ((ctxt->sax->characters !=
4662
294k
             ctxt->sax->ignorableWhitespace) &&
4663
294k
      (*ctxt->space == -1))
4664
12.7k
      *ctxt->space = -2;
4665
294k
    }
4666
294k
      }
4667
372k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
372k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
372k
  }
4672
142M
  count++;
4673
142M
  if (count > 50) {
4674
2.31M
      SHRINK;
4675
2.31M
      GROW;
4676
2.31M
      count = 0;
4677
2.31M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
2.31M
  }
4680
142M
    }
4681
4.78M
    if (nbchar != 0) {
4682
1.84M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
1.84M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
1.58M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
11.4k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
11.4k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
1.57M
      } else {
4691
1.57M
    if (ctxt->sax->characters != NULL)
4692
1.57M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
1.57M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
1.57M
        (*ctxt->space == -1))
4695
333k
        *ctxt->space = -2;
4696
1.57M
      }
4697
1.58M
  }
4698
1.84M
    }
4699
4.78M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
2.70M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
2.70M
                          "PCDATA invalid Char value %d\n",
4703
2.70M
                    cur ? cur : CUR);
4704
2.70M
  NEXT;
4705
2.70M
    }
4706
4.78M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
1.59M
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
1.59M
    xmlChar *URI = NULL;
4735
4736
1.59M
    SHRINK;
4737
4738
1.59M
    *publicID = NULL;
4739
1.59M
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
691k
        SKIP(6);
4741
691k
  if (SKIP_BLANKS == 0) {
4742
2.91k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
2.91k
                     "Space required after 'SYSTEM'\n");
4744
2.91k
  }
4745
691k
  URI = xmlParseSystemLiteral(ctxt);
4746
691k
  if (URI == NULL) {
4747
4.31k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
4.31k
        }
4749
904k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
233k
        SKIP(6);
4751
233k
  if (SKIP_BLANKS == 0) {
4752
3.81k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
3.81k
        "Space required after 'PUBLIC'\n");
4754
3.81k
  }
4755
233k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
233k
  if (*publicID == NULL) {
4757
5.72k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
5.72k
  }
4759
233k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
229k
      if (SKIP_BLANKS == 0) {
4764
26.2k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
26.2k
      "Space required after the Public Identifier\n");
4766
26.2k
      }
4767
229k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
4.62k
      if (SKIP_BLANKS == 0) return(NULL);
4775
372
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
372
  }
4777
229k
  URI = xmlParseSystemLiteral(ctxt);
4778
229k
  if (URI == NULL) {
4779
27.1k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
27.1k
        }
4781
229k
    }
4782
1.59M
    return(URI);
4783
1.59M
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
430k
                       size_t len, size_t size) {
4802
430k
    int q, ql;
4803
430k
    int r, rl;
4804
430k
    int cur, l;
4805
430k
    size_t count = 0;
4806
430k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
119k
                       XML_MAX_HUGE_LENGTH :
4808
430k
                       XML_MAX_TEXT_LENGTH;
4809
430k
    int inputid;
4810
4811
430k
    inputid = ctxt->input->id;
4812
4813
430k
    if (buf == NULL) {
4814
10.7k
        len = 0;
4815
10.7k
  size = XML_PARSER_BUFFER_SIZE;
4816
10.7k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
10.7k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
10.7k
    }
4822
430k
    GROW; /* Assure there's enough input data */
4823
430k
    q = CUR_CHAR(ql);
4824
430k
    if (q == 0)
4825
166k
        goto not_terminated;
4826
264k
    if (!IS_CHAR(q)) {
4827
30.4k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
30.4k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
30.4k
                    q);
4830
30.4k
  xmlFree (buf);
4831
30.4k
  return;
4832
30.4k
    }
4833
234k
    NEXTL(ql);
4834
234k
    r = CUR_CHAR(rl);
4835
234k
    if (r == 0)
4836
4.43k
        goto not_terminated;
4837
229k
    if (!IS_CHAR(r)) {
4838
3.80k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
3.80k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
3.80k
                    r);
4841
3.80k
  xmlFree (buf);
4842
3.80k
  return;
4843
3.80k
    }
4844
225k
    NEXTL(rl);
4845
225k
    cur = CUR_CHAR(l);
4846
225k
    if (cur == 0)
4847
2.76k
        goto not_terminated;
4848
79.8M
    while (IS_CHAR(cur) && /* checked */
4849
79.8M
           ((cur != '>') ||
4850
79.7M
      (r != '-') || (q != '-'))) {
4851
79.6M
  if ((r == '-') && (q == '-')) {
4852
96.5k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
96.5k
  }
4854
79.6M
  if (len + 5 >= size) {
4855
136k
      xmlChar *new_buf;
4856
136k
            size_t new_size;
4857
4858
136k
      new_size = size * 2;
4859
136k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
136k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
136k
      buf = new_buf;
4866
136k
            size = new_size;
4867
136k
  }
4868
79.6M
  COPY_BUF(ql,buf,len,q);
4869
79.6M
  q = r;
4870
79.6M
  ql = rl;
4871
79.6M
  r = cur;
4872
79.6M
  rl = l;
4873
4874
79.6M
  count++;
4875
79.6M
  if (count > 50) {
4876
1.48M
      SHRINK;
4877
1.48M
      GROW;
4878
1.48M
      count = 0;
4879
1.48M
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
1.48M
  }
4884
79.6M
  NEXTL(l);
4885
79.6M
  cur = CUR_CHAR(l);
4886
79.6M
  if (cur == 0) {
4887
40.9k
      SHRINK;
4888
40.9k
      GROW;
4889
40.9k
      cur = CUR_CHAR(l);
4890
40.9k
  }
4891
4892
79.6M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
79.6M
    }
4899
223k
    buf[len] = 0;
4900
223k
    if (cur == 0) {
4901
40.9k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
40.9k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
182k
    } else if (!IS_CHAR(cur)) {
4904
24.0k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
24.0k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
24.0k
                    cur);
4907
158k
    } else {
4908
158k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
158k
        NEXT;
4914
158k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
158k
      (!ctxt->disableSAX))
4916
117k
      ctxt->sax->comment(ctxt->userData, buf);
4917
158k
    }
4918
223k
    xmlFree(buf);
4919
223k
    return;
4920
173k
not_terminated:
4921
173k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
173k
       "Comment not terminated\n", NULL);
4923
173k
    xmlFree(buf);
4924
173k
    return;
4925
223k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
15.2M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
15.2M
    xmlChar *buf = NULL;
4943
15.2M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
15.2M
    size_t len = 0;
4945
15.2M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
1.12M
                       XML_MAX_HUGE_LENGTH :
4947
15.2M
                       XML_MAX_TEXT_LENGTH;
4948
15.2M
    xmlParserInputState state;
4949
15.2M
    const xmlChar *in;
4950
15.2M
    size_t nbchar = 0;
4951
15.2M
    int ccol;
4952
15.2M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
15.2M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
15.2M
    SKIP(2);
4960
15.2M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
1.90k
        return;
4962
15.2M
    state = ctxt->instate;
4963
15.2M
    ctxt->instate = XML_PARSER_COMMENT;
4964
15.2M
    inputid = ctxt->input->id;
4965
15.2M
    SKIP(2);
4966
15.2M
    SHRINK;
4967
15.2M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
15.2M
    in = ctxt->input->cur;
4974
15.2M
    do {
4975
15.2M
  if (*in == 0xA) {
4976
453k
      do {
4977
453k
    ctxt->input->line++; ctxt->input->col = 1;
4978
453k
    in++;
4979
453k
      } while (*in == 0xA);
4980
428k
  }
4981
32.4M
get_more:
4982
32.4M
        ccol = ctxt->input->col;
4983
760M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
760M
         ((*in >= 0x20) && (*in < '-')) ||
4985
760M
         (*in == 0x09)) {
4986
727M
        in++;
4987
727M
        ccol++;
4988
727M
  }
4989
32.4M
  ctxt->input->col = ccol;
4990
32.4M
  if (*in == 0xA) {
4991
5.02M
      do {
4992
5.02M
    ctxt->input->line++; ctxt->input->col = 1;
4993
5.02M
    in++;
4994
5.02M
      } while (*in == 0xA);
4995
4.80M
      goto get_more;
4996
4.80M
  }
4997
27.6M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
27.6M
  if (nbchar > 0) {
5002
16.1M
      if ((ctxt->sax != NULL) &&
5003
16.1M
    (ctxt->sax->comment != NULL)) {
5004
16.1M
    if (buf == NULL) {
5005
3.93M
        if ((*in == '-') && (in[1] == '-'))
5006
1.96M
            size = nbchar + 1;
5007
1.97M
        else
5008
1.97M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
3.93M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
3.93M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
3.93M
        len = 0;
5016
12.2M
    } else if (len + nbchar + 1 >= size) {
5017
1.62M
        xmlChar *new_buf;
5018
1.62M
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
1.62M
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
1.62M
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
1.62M
        buf = new_buf;
5027
1.62M
    }
5028
16.1M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
16.1M
    len += nbchar;
5030
16.1M
    buf[len] = 0;
5031
16.1M
      }
5032
16.1M
  }
5033
27.6M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
27.6M
  ctxt->input->cur = in;
5040
27.6M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
27.6M
  if (*in == 0xD) {
5045
7.74M
      in++;
5046
7.74M
      if (*in == 0xA) {
5047
7.73M
    ctxt->input->cur = in;
5048
7.73M
    in++;
5049
7.73M
    ctxt->input->line++; ctxt->input->col = 1;
5050
7.73M
    goto get_more;
5051
7.73M
      }
5052
15.8k
      in--;
5053
15.8k
  }
5054
19.9M
  SHRINK;
5055
19.9M
  GROW;
5056
19.9M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
19.9M
  in = ctxt->input->cur;
5061
19.9M
  if (*in == '-') {
5062
19.5M
      if (in[1] == '-') {
5063
14.9M
          if (in[2] == '>') {
5064
14.8M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
14.8M
        SKIP(3);
5070
14.8M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
14.8M
            (!ctxt->disableSAX)) {
5072
13.1M
      if (buf != NULL)
5073
3.10M
          ctxt->sax->comment(ctxt->userData, buf);
5074
10.0M
      else
5075
10.0M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
13.1M
        }
5077
14.8M
        if (buf != NULL)
5078
3.51M
            xmlFree(buf);
5079
14.8M
        if (ctxt->instate != XML_PARSER_EOF)
5080
14.8M
      ctxt->instate = state;
5081
14.8M
        return;
5082
14.8M
    }
5083
195k
    if (buf != NULL) {
5084
72.3k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
72.3k
                          "Double hyphen within comment: "
5086
72.3k
                                      "<!--%.50s\n",
5087
72.3k
              buf);
5088
72.3k
    } else
5089
123k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
123k
                          "Double hyphen within comment\n", NULL);
5091
195k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
195k
    in++;
5096
195k
    ctxt->input->col++;
5097
195k
      }
5098
4.71M
      in++;
5099
4.71M
      ctxt->input->col++;
5100
4.71M
      goto get_more;
5101
19.5M
  }
5102
19.9M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
430k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
430k
    ctxt->instate = state;
5105
430k
    return;
5106
15.2M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
513k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
513k
    const xmlChar *name;
5125
5126
513k
    name = xmlParseName(ctxt);
5127
513k
    if ((name != NULL) &&
5128
513k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
513k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
513k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
109k
  int i;
5132
109k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
109k
      (name[2] == 'l') && (name[3] == 0)) {
5134
28.8k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
28.8k
     "XML declaration allowed only at the start of the document\n");
5136
28.8k
      return(name);
5137
80.4k
  } else if (name[3] == 0) {
5138
5.06k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
5.06k
      return(name);
5140
5.06k
  }
5141
126k
  for (i = 0;;i++) {
5142
126k
      if (xmlW3CPIs[i] == NULL) break;
5143
101k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
49.8k
          return(name);
5145
101k
  }
5146
25.5k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
25.5k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
25.5k
          NULL, NULL);
5149
25.5k
    }
5150
430k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
8.37k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
8.37k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
8.37k
    }
5154
430k
    return(name);
5155
513k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
21
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
21
    xmlChar *URL = NULL;
5176
21
    const xmlChar *tmp, *base;
5177
21
    xmlChar marker;
5178
5179
21
    tmp = catalog;
5180
21
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
21
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
18
  goto error;
5183
3
    tmp += 7;
5184
3
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
3
    if (*tmp != '=') {
5186
3
  return;
5187
3
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
18
error:
5211
18
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
18
            "Catalog PI syntax error: %s\n",
5213
18
      catalog, NULL);
5214
18
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
18
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
513k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
513k
    xmlChar *buf = NULL;
5235
513k
    size_t len = 0;
5236
513k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
513k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
192k
                       XML_MAX_HUGE_LENGTH :
5239
513k
                       XML_MAX_TEXT_LENGTH;
5240
513k
    int cur, l;
5241
513k
    const xmlChar *target;
5242
513k
    xmlParserInputState state;
5243
513k
    int count = 0;
5244
5245
513k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
513k
  int inputid = ctxt->input->id;
5247
513k
  state = ctxt->instate;
5248
513k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
513k
  SKIP(2);
5253
513k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
513k
        target = xmlParsePITarget(ctxt);
5260
513k
  if (target != NULL) {
5261
489k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
18.7k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
18.7k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
18.7k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
18.7k
        (ctxt->sax->processingInstruction != NULL))
5274
16.5k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
16.5k
                                         target, NULL);
5276
18.7k
    if (ctxt->instate != XML_PARSER_EOF)
5277
18.7k
        ctxt->instate = state;
5278
18.7k
    return;
5279
18.7k
      }
5280
470k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
470k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
470k
      if (SKIP_BLANKS == 0) {
5287
92.4k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
92.4k
        "ParsePI: PI %s space expected\n", target);
5289
92.4k
      }
5290
470k
      cur = CUR_CHAR(l);
5291
124M
      while (IS_CHAR(cur) && /* checked */
5292
124M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
123M
    if (len + 5 >= size) {
5294
124k
        xmlChar *tmp;
5295
124k
                    size_t new_size = size * 2;
5296
124k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
124k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
124k
        buf = tmp;
5304
124k
                    size = new_size;
5305
124k
    }
5306
123M
    count++;
5307
123M
    if (count > 50) {
5308
2.29M
        SHRINK;
5309
2.29M
        GROW;
5310
2.29M
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
2.29M
        count = 0;
5315
2.29M
    }
5316
123M
    COPY_BUF(l,buf,len,cur);
5317
123M
    NEXTL(l);
5318
123M
    cur = CUR_CHAR(l);
5319
123M
    if (cur == 0) {
5320
52.5k
        SHRINK;
5321
52.5k
        GROW;
5322
52.5k
        cur = CUR_CHAR(l);
5323
52.5k
    }
5324
123M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
123M
      }
5332
470k
      buf[len] = 0;
5333
470k
      if (cur != '?') {
5334
96.8k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
96.8k
          "ParsePI: PI %s never end ...\n", target);
5336
373k
      } else {
5337
373k
    if (inputid != ctxt->input->id) {
5338
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
0
                             "PI declaration doesn't start and stop in"
5340
0
                                   " the same entity\n");
5341
0
    }
5342
373k
    SKIP(2);
5343
5344
373k
#ifdef LIBXML_CATALOG_ENABLED
5345
373k
    if (((state == XML_PARSER_MISC) ||
5346
373k
               (state == XML_PARSER_START)) &&
5347
373k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
21
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
21
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
21
      (allow == XML_CATA_ALLOW_ALL))
5351
21
      xmlParseCatalogPI(ctxt, buf);
5352
21
    }
5353
373k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
373k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
373k
        (ctxt->sax->processingInstruction != NULL))
5361
332k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
332k
                                         target, buf);
5363
373k
      }
5364
470k
      xmlFree(buf);
5365
470k
  } else {
5366
24.6k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
24.6k
  }
5368
495k
  if (ctxt->instate != XML_PARSER_EOF)
5369
495k
      ctxt->instate = state;
5370
495k
    }
5371
513k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
20.0k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
20.0k
    const xmlChar *name;
5394
20.0k
    xmlChar *Pubid;
5395
20.0k
    xmlChar *Systemid;
5396
5397
20.0k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
20.0k
    SKIP(2);
5400
5401
20.0k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
13.7k
  int inputid = ctxt->input->id;
5403
13.7k
  SHRINK;
5404
13.7k
  SKIP(8);
5405
13.7k
  if (SKIP_BLANKS == 0) {
5406
1.35k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
1.35k
         "Space required after '<!NOTATION'\n");
5408
1.35k
      return;
5409
1.35k
  }
5410
5411
12.3k
        name = xmlParseName(ctxt);
5412
12.3k
  if (name == NULL) {
5413
886
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
886
      return;
5415
886
  }
5416
11.4k
  if (xmlStrchr(name, ':') != NULL) {
5417
426
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
426
         "colons are forbidden from notation names '%s'\n",
5419
426
         name, NULL, NULL);
5420
426
  }
5421
11.4k
  if (SKIP_BLANKS == 0) {
5422
1.10k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
1.10k
         "Space required after the NOTATION name'\n");
5424
1.10k
      return;
5425
1.10k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
10.3k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
10.3k
  SKIP_BLANKS;
5432
5433
10.3k
  if (RAW == '>') {
5434
8.00k
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
8.00k
      NEXT;
5440
8.00k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
8.00k
    (ctxt->sax->notationDecl != NULL))
5442
5.92k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
8.00k
  } else {
5444
2.35k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
2.35k
  }
5446
10.3k
  if (Systemid != NULL) xmlFree(Systemid);
5447
10.3k
  if (Pubid != NULL) xmlFree(Pubid);
5448
10.3k
    }
5449
20.0k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
3.98M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
3.98M
    const xmlChar *name = NULL;
5478
3.98M
    xmlChar *value = NULL;
5479
3.98M
    xmlChar *URI = NULL, *literal = NULL;
5480
3.98M
    const xmlChar *ndata = NULL;
5481
3.98M
    int isParameter = 0;
5482
3.98M
    xmlChar *orig = NULL;
5483
5484
3.98M
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
3.98M
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
3.98M
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
3.97M
  int inputid = ctxt->input->id;
5491
3.97M
  SHRINK;
5492
3.97M
  SKIP(6);
5493
3.97M
  if (SKIP_BLANKS == 0) {
5494
10.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
10.9k
         "Space required after '<!ENTITY'\n");
5496
10.9k
  }
5497
5498
3.97M
  if (RAW == '%') {
5499
1.42M
      NEXT;
5500
1.42M
      if (SKIP_BLANKS == 0) {
5501
2.53k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
2.53k
             "Space required after '%%'\n");
5503
2.53k
      }
5504
1.42M
      isParameter = 1;
5505
1.42M
  }
5506
5507
3.97M
        name = xmlParseName(ctxt);
5508
3.97M
  if (name == NULL) {
5509
12.2k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
12.2k
                     "xmlParseEntityDecl: no name\n");
5511
12.2k
            return;
5512
12.2k
  }
5513
3.96M
  if (xmlStrchr(name, ':') != NULL) {
5514
3.92k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
3.92k
         "colons are forbidden from entities names '%s'\n",
5516
3.92k
         name, NULL, NULL);
5517
3.92k
  }
5518
3.96M
  if (SKIP_BLANKS == 0) {
5519
21.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
21.1k
         "Space required after the entity name\n");
5521
21.1k
  }
5522
5523
3.96M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
3.96M
  if (isParameter) {
5528
1.42M
      if ((RAW == '"') || (RAW == '\'')) {
5529
1.27M
          value = xmlParseEntityValue(ctxt, &orig);
5530
1.27M
    if (value) {
5531
1.24M
        if ((ctxt->sax != NULL) &&
5532
1.24M
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
1.11M
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
1.11M
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
1.11M
            NULL, NULL, value);
5536
1.24M
    }
5537
1.27M
      } else {
5538
146k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
146k
    if ((URI == NULL) && (literal == NULL)) {
5540
6.06k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
6.06k
    }
5542
146k
    if (URI) {
5543
140k
        xmlURIPtr uri;
5544
5545
140k
        uri = xmlParseURI((const char *) URI);
5546
140k
        if (uri == NULL) {
5547
5.67k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
5.67k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
134k
        } else {
5555
134k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
567
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
133k
      } else {
5562
133k
          if ((ctxt->sax != NULL) &&
5563
133k
        (!ctxt->disableSAX) &&
5564
133k
        (ctxt->sax->entityDecl != NULL))
5565
131k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
131k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
131k
              literal, URI, NULL);
5568
133k
      }
5569
134k
      xmlFreeURI(uri);
5570
134k
        }
5571
140k
    }
5572
146k
      }
5573
2.53M
  } else {
5574
2.53M
      if ((RAW == '"') || (RAW == '\'')) {
5575
2.33M
          value = xmlParseEntityValue(ctxt, &orig);
5576
2.33M
    if ((ctxt->sax != NULL) &&
5577
2.33M
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
2.11M
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
2.11M
        XML_INTERNAL_GENERAL_ENTITY,
5580
2.11M
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
2.33M
    if ((ctxt->myDoc == NULL) ||
5585
2.33M
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
23.2k
        if (ctxt->myDoc == NULL) {
5587
3.72k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
3.72k
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
3.72k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
3.72k
        }
5594
23.2k
        if (ctxt->myDoc->intSubset == NULL)
5595
3.72k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
3.72k
              BAD_CAST "fake", NULL, NULL);
5597
5598
23.2k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
23.2k
                    NULL, NULL, value);
5600
23.2k
    }
5601
2.33M
      } else {
5602
201k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
201k
    if ((URI == NULL) && (literal == NULL)) {
5604
27.3k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
27.3k
    }
5606
201k
    if (URI) {
5607
170k
        xmlURIPtr uri;
5608
5609
170k
        uri = xmlParseURI((const char *)URI);
5610
170k
        if (uri == NULL) {
5611
10.5k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
10.5k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
160k
        } else {
5619
160k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
2.20k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
2.20k
      }
5626
160k
      xmlFreeURI(uri);
5627
160k
        }
5628
170k
    }
5629
201k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
32.5k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
32.5k
           "Space required before 'NDATA'\n");
5632
32.5k
    }
5633
201k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
17.9k
        SKIP(5);
5635
17.9k
        if (SKIP_BLANKS == 0) {
5636
1.26k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
1.26k
               "Space required after 'NDATA'\n");
5638
1.26k
        }
5639
17.9k
        ndata = xmlParseName(ctxt);
5640
17.9k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
17.9k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
15.6k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
15.6k
            literal, URI, ndata);
5644
183k
    } else {
5645
183k
        if ((ctxt->sax != NULL) &&
5646
183k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
154k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
154k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
154k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
183k
        if ((ctxt->replaceEntities != 0) &&
5655
183k
      ((ctxt->myDoc == NULL) ||
5656
112k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
2.21k
      if (ctxt->myDoc == NULL) {
5658
951
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
951
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
951
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
951
      }
5665
5666
2.21k
      if (ctxt->myDoc->intSubset == NULL)
5667
951
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
951
            BAD_CAST "fake", NULL, NULL);
5669
2.21k
      xmlSAX2EntityDecl(ctxt, name,
5670
2.21k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
2.21k
                  literal, URI, NULL);
5672
2.21k
        }
5673
183k
    }
5674
201k
      }
5675
2.53M
  }
5676
3.96M
  if (ctxt->instate == XML_PARSER_EOF)
5677
117
      goto done;
5678
3.96M
  SKIP_BLANKS;
5679
3.96M
  if (RAW != '>') {
5680
86.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
86.9k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
86.9k
      xmlHaltParser(ctxt);
5683
3.87M
  } else {
5684
3.87M
      if (inputid != ctxt->input->id) {
5685
20
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
20
                         "Entity declaration doesn't start and stop in"
5687
20
                               " the same entity\n");
5688
20
      }
5689
3.87M
      NEXT;
5690
3.87M
  }
5691
3.96M
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
3.55M
      xmlEntityPtr cur = NULL;
5696
5697
3.55M
      if (isParameter) {
5698
1.24M
          if ((ctxt->sax != NULL) &&
5699
1.24M
        (ctxt->sax->getParameterEntity != NULL))
5700
1.24M
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
2.30M
      } else {
5702
2.30M
          if ((ctxt->sax != NULL) &&
5703
2.30M
        (ctxt->sax->getEntity != NULL))
5704
2.30M
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
2.30M
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
167k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
167k
    }
5708
2.30M
      }
5709
3.55M
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
3.19M
    cur->orig = orig;
5711
3.19M
                orig = NULL;
5712
3.19M
      }
5713
3.55M
  }
5714
5715
3.96M
done:
5716
3.96M
  if (value != NULL) xmlFree(value);
5717
3.96M
  if (URI != NULL) xmlFree(URI);
5718
3.96M
  if (literal != NULL) xmlFree(literal);
5719
3.96M
        if (orig != NULL) xmlFree(orig);
5720
3.96M
    }
5721
3.98M
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
12.0M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
12.0M
    int val;
5757
12.0M
    xmlChar *ret;
5758
5759
12.0M
    *value = NULL;
5760
12.0M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
748k
  SKIP(9);
5762
748k
  return(XML_ATTRIBUTE_REQUIRED);
5763
748k
    }
5764
11.3M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
10.5M
  SKIP(8);
5766
10.5M
  return(XML_ATTRIBUTE_IMPLIED);
5767
10.5M
    }
5768
768k
    val = XML_ATTRIBUTE_NONE;
5769
768k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
314k
  SKIP(6);
5771
314k
  val = XML_ATTRIBUTE_FIXED;
5772
314k
  if (SKIP_BLANKS == 0) {
5773
1.78k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
1.78k
         "Space required after '#FIXED'\n");
5775
1.78k
  }
5776
314k
    }
5777
768k
    ret = xmlParseAttValue(ctxt);
5778
768k
    ctxt->instate = XML_PARSER_DTD;
5779
768k
    if (ret == NULL) {
5780
27.6k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
27.6k
           "Attribute default value declaration error\n");
5782
27.6k
    } else
5783
740k
        *value = ret;
5784
768k
    return(val);
5785
11.3M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
5.33k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
5.33k
    const xmlChar *name;
5809
5.33k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
5.33k
    if (RAW != '(') {
5812
1.09k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
1.09k
  return(NULL);
5814
1.09k
    }
5815
4.24k
    SHRINK;
5816
4.39k
    do {
5817
4.39k
        NEXT;
5818
4.39k
  SKIP_BLANKS;
5819
4.39k
        name = xmlParseName(ctxt);
5820
4.39k
  if (name == NULL) {
5821
474
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
474
         "Name expected in NOTATION declaration\n");
5823
474
            xmlFreeEnumeration(ret);
5824
474
      return(NULL);
5825
474
  }
5826
3.92k
  tmp = ret;
5827
4.02k
  while (tmp != NULL) {
5828
102
      if (xmlStrEqual(name, tmp->name)) {
5829
6
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
6
    "standalone: attribute notation value token %s duplicated\n",
5831
6
         name, NULL);
5832
6
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
6
    break;
5835
6
      }
5836
96
      tmp = tmp->next;
5837
96
  }
5838
3.92k
  if (tmp == NULL) {
5839
3.91k
      cur = xmlCreateEnumeration(name);
5840
3.91k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
3.91k
      if (last == NULL) ret = last = cur;
5845
72
      else {
5846
72
    last->next = cur;
5847
72
    last = cur;
5848
72
      }
5849
3.91k
  }
5850
3.92k
  SKIP_BLANKS;
5851
3.92k
    } while (RAW == '|');
5852
3.77k
    if (RAW != ')') {
5853
1.13k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
1.13k
        xmlFreeEnumeration(ret);
5855
1.13k
  return(NULL);
5856
1.13k
    }
5857
2.64k
    NEXT;
5858
2.64k
    return(ret);
5859
3.77k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
1.39M
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
1.39M
    xmlChar *name;
5881
1.39M
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
1.39M
    if (RAW != '(') {
5884
35.0k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
35.0k
  return(NULL);
5886
35.0k
    }
5887
1.36M
    SHRINK;
5888
4.28M
    do {
5889
4.28M
        NEXT;
5890
4.28M
  SKIP_BLANKS;
5891
4.28M
        name = xmlParseNmtoken(ctxt);
5892
4.28M
  if (name == NULL) {
5893
2.96k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
2.96k
      return(ret);
5895
2.96k
  }
5896
4.28M
  tmp = ret;
5897
11.7M
  while (tmp != NULL) {
5898
7.44M
      if (xmlStrEqual(name, tmp->name)) {
5899
719
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
719
    "standalone: attribute enumeration value token %s duplicated\n",
5901
719
         name, NULL);
5902
719
    if (!xmlDictOwns(ctxt->dict, name))
5903
719
        xmlFree(name);
5904
719
    break;
5905
719
      }
5906
7.43M
      tmp = tmp->next;
5907
7.43M
  }
5908
4.28M
  if (tmp == NULL) {
5909
4.28M
      cur = xmlCreateEnumeration(name);
5910
4.28M
      if (!xmlDictOwns(ctxt->dict, name))
5911
4.28M
    xmlFree(name);
5912
4.28M
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
4.28M
      if (last == NULL) ret = last = cur;
5917
2.92M
      else {
5918
2.92M
    last->next = cur;
5919
2.92M
    last = cur;
5920
2.92M
      }
5921
4.28M
  }
5922
4.28M
  SKIP_BLANKS;
5923
4.28M
    } while (RAW == '|');
5924
1.36M
    if (RAW != ')') {
5925
5.33k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
5.33k
  return(ret);
5927
5.33k
    }
5928
1.35M
    NEXT;
5929
1.35M
    return(ret);
5930
1.36M
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
1.40M
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
1.40M
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
6.13k
  SKIP(8);
5953
6.13k
  if (SKIP_BLANKS == 0) {
5954
792
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
792
         "Space required after 'NOTATION'\n");
5956
792
      return(0);
5957
792
  }
5958
5.33k
  *tree = xmlParseNotationType(ctxt);
5959
5.33k
  if (*tree == NULL) return(0);
5960
2.64k
  return(XML_ATTRIBUTE_NOTATION);
5961
5.33k
    }
5962
1.39M
    *tree = xmlParseEnumerationType(ctxt);
5963
1.39M
    if (*tree == NULL) return(0);
5964
1.36M
    return(XML_ATTRIBUTE_ENUMERATION);
5965
1.39M
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
12.1M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
12.1M
    SHRINK;
6017
12.1M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
5.10M
  SKIP(5);
6019
5.10M
  return(XML_ATTRIBUTE_CDATA);
6020
7.04M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
13.4k
  SKIP(6);
6022
13.4k
  return(XML_ATTRIBUTE_IDREFS);
6023
7.03M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
84.3k
  SKIP(5);
6025
84.3k
  return(XML_ATTRIBUTE_IDREF);
6026
6.94M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
3.00M
        SKIP(2);
6028
3.00M
  return(XML_ATTRIBUTE_ID);
6029
3.94M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
47.6k
  SKIP(6);
6031
47.6k
  return(XML_ATTRIBUTE_ENTITY);
6032
3.90M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
2.05k
  SKIP(8);
6034
2.05k
  return(XML_ATTRIBUTE_ENTITIES);
6035
3.89M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
729k
  SKIP(8);
6037
729k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
3.16M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
1.76M
  SKIP(7);
6040
1.76M
  return(XML_ATTRIBUTE_NMTOKEN);
6041
1.76M
     }
6042
1.40M
     return(xmlParseEnumeratedType(ctxt, tree));
6043
12.1M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
3.71M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
3.71M
    const xmlChar *elemName;
6061
3.71M
    const xmlChar *attrName;
6062
3.71M
    xmlEnumerationPtr tree;
6063
6064
3.71M
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
3.71M
    SKIP(2);
6067
6068
3.71M
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
3.70M
  int inputid = ctxt->input->id;
6070
6071
3.70M
  SKIP(7);
6072
3.70M
  if (SKIP_BLANKS == 0) {
6073
7.90k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
7.90k
                     "Space required after '<!ATTLIST'\n");
6075
7.90k
  }
6076
3.70M
        elemName = xmlParseName(ctxt);
6077
3.70M
  if (elemName == NULL) {
6078
5.41k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
5.41k
         "ATTLIST: no name for Element\n");
6080
5.41k
      return;
6081
5.41k
  }
6082
3.70M
  SKIP_BLANKS;
6083
3.70M
  GROW;
6084
15.7M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
12.1M
      int type;
6086
12.1M
      int def;
6087
12.1M
      xmlChar *defaultValue = NULL;
6088
6089
12.1M
      GROW;
6090
12.1M
            tree = NULL;
6091
12.1M
      attrName = xmlParseName(ctxt);
6092
12.1M
      if (attrName == NULL) {
6093
22.7k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
22.7k
             "ATTLIST: no name for Attribute\n");
6095
22.7k
    break;
6096
22.7k
      }
6097
12.1M
      GROW;
6098
12.1M
      if (SKIP_BLANKS == 0) {
6099
10.6k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
10.6k
            "Space required after the attribute name\n");
6101
10.6k
    break;
6102
10.6k
      }
6103
6104
12.1M
      type = xmlParseAttributeType(ctxt, &tree);
6105
12.1M
      if (type <= 0) {
6106
40.4k
          break;
6107
40.4k
      }
6108
6109
12.1M
      GROW;
6110
12.1M
      if (SKIP_BLANKS == 0) {
6111
15.2k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
15.2k
             "Space required after the attribute type\n");
6113
15.2k
          if (tree != NULL)
6114
6.74k
        xmlFreeEnumeration(tree);
6115
15.2k
    break;
6116
15.2k
      }
6117
6118
12.0M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
12.0M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
12.0M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
312k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
12.0M
      GROW;
6130
12.0M
            if (RAW != '>') {
6131
11.6M
    if (SKIP_BLANKS == 0) {
6132
44.1k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
44.1k
      "Space required after the attribute default value\n");
6134
44.1k
        if (defaultValue != NULL)
6135
15.4k
      xmlFree(defaultValue);
6136
44.1k
        if (tree != NULL)
6137
3.81k
      xmlFreeEnumeration(tree);
6138
44.1k
        break;
6139
44.1k
    }
6140
11.6M
      }
6141
12.0M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
12.0M
    (ctxt->sax->attributeDecl != NULL))
6143
11.1M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
11.1M
                          type, def, defaultValue, tree);
6145
871k
      else if (tree != NULL)
6146
88.4k
    xmlFreeEnumeration(tree);
6147
6148
12.0M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
12.0M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
12.0M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
517k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
517k
      }
6153
12.0M
      if (ctxt->sax2) {
6154
8.32M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
8.32M
      }
6156
12.0M
      if (defaultValue != NULL)
6157
725k
          xmlFree(defaultValue);
6158
12.0M
      GROW;
6159
12.0M
  }
6160
3.70M
  if (RAW == '>') {
6161
3.57M
      if (inputid != ctxt->input->id) {
6162
95
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
95
                               "Attribute list declaration doesn't start and"
6164
95
                               " stop in the same entity\n");
6165
95
      }
6166
3.57M
      NEXT;
6167
3.57M
  }
6168
3.70M
    }
6169
3.71M
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
1.71M
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
1.71M
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
1.71M
    const xmlChar *elem = NULL;
6196
6197
1.71M
    GROW;
6198
1.71M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
1.71M
  SKIP(7);
6200
1.71M
  SKIP_BLANKS;
6201
1.71M
  SHRINK;
6202
1.71M
  if (RAW == ')') {
6203
977k
      if (ctxt->input->id != inputchk) {
6204
45
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
45
                               "Element content declaration doesn't start and"
6206
45
                               " stop in the same entity\n");
6207
45
      }
6208
977k
      NEXT;
6209
977k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
977k
      if (ret == NULL)
6211
0
          return(NULL);
6212
977k
      if (RAW == '*') {
6213
1.01k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
1.01k
    NEXT;
6215
1.01k
      }
6216
977k
      return(ret);
6217
977k
  }
6218
738k
  if ((RAW == '(') || (RAW == '|')) {
6219
734k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
734k
      if (ret == NULL) return(NULL);
6221
734k
  }
6222
7.99M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
7.25M
      NEXT;
6224
7.25M
      if (elem == NULL) {
6225
733k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
733k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
733k
    ret->c1 = cur;
6231
733k
    if (cur != NULL)
6232
733k
        cur->parent = ret;
6233
733k
    cur = ret;
6234
6.52M
      } else {
6235
6.52M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
6.52M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
6.52M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
6.52M
    if (n->c1 != NULL)
6242
6.52M
        n->c1->parent = n;
6243
6.52M
          cur->c2 = n;
6244
6.52M
    if (n != NULL)
6245
6.52M
        n->parent = cur;
6246
6.52M
    cur = n;
6247
6.52M
      }
6248
7.25M
      SKIP_BLANKS;
6249
7.25M
      elem = xmlParseName(ctxt);
6250
7.25M
      if (elem == NULL) {
6251
2.24k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
2.24k
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
2.24k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
2.24k
    return(NULL);
6255
2.24k
      }
6256
7.25M
      SKIP_BLANKS;
6257
7.25M
      GROW;
6258
7.25M
  }
6259
736k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
727k
      if (elem != NULL) {
6261
727k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
727k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
727k
    if (cur->c2 != NULL)
6264
727k
        cur->c2->parent = cur;
6265
727k
            }
6266
727k
            if (ret != NULL)
6267
727k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
727k
      if (ctxt->input->id != inputchk) {
6269
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
0
                               "Element content declaration doesn't start and"
6271
0
                               " stop in the same entity\n");
6272
0
      }
6273
727k
      SKIP(2);
6274
727k
  } else {
6275
8.77k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
8.77k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
8.77k
      return(NULL);
6278
8.77k
  }
6279
6280
736k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
727k
    return(ret);
6284
1.71M
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
1.74M
                                       int depth) {
6321
1.74M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
1.74M
    const xmlChar *elem;
6323
1.74M
    xmlChar type = 0;
6324
6325
1.74M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
1.74M
        (depth >  2048)) {
6327
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
0
                          depth);
6330
0
  return(NULL);
6331
0
    }
6332
1.74M
    SKIP_BLANKS;
6333
1.74M
    GROW;
6334
1.74M
    if (RAW == '(') {
6335
82.7k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
82.7k
  NEXT;
6339
82.7k
  SKIP_BLANKS;
6340
82.7k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
82.7k
                                                           depth + 1);
6342
82.7k
        if (cur == NULL)
6343
16.6k
            return(NULL);
6344
66.1k
  SKIP_BLANKS;
6345
66.1k
  GROW;
6346
1.66M
    } else {
6347
1.66M
  elem = xmlParseName(ctxt);
6348
1.66M
  if (elem == NULL) {
6349
19.5k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
19.5k
      return(NULL);
6351
19.5k
  }
6352
1.64M
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
1.64M
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
1.64M
  GROW;
6358
1.64M
  if (RAW == '?') {
6359
205k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
205k
      NEXT;
6361
1.43M
  } else if (RAW == '*') {
6362
214k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
214k
      NEXT;
6364
1.22M
  } else if (RAW == '+') {
6365
213k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
213k
      NEXT;
6367
1.01M
  } else {
6368
1.01M
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
1.01M
  }
6370
1.64M
  GROW;
6371
1.64M
    }
6372
1.70M
    SKIP_BLANKS;
6373
1.70M
    SHRINK;
6374
6.65M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
4.98M
        if (RAW == ',') {
6379
1.66M
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
1.02M
      else if (type != CUR) {
6385
987
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
987
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
987
                      type);
6388
987
    if ((last != NULL) && (last != ret))
6389
987
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
987
    if (ret != NULL)
6391
987
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
987
    return(NULL);
6393
987
      }
6394
1.66M
      NEXT;
6395
6396
1.66M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
1.66M
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
1.66M
      if (last == NULL) {
6404
640k
    op->c1 = ret;
6405
640k
    if (ret != NULL)
6406
640k
        ret->parent = op;
6407
640k
    ret = cur = op;
6408
1.02M
      } else {
6409
1.02M
          cur->c2 = op;
6410
1.02M
    if (op != NULL)
6411
1.02M
        op->parent = cur;
6412
1.02M
    op->c1 = last;
6413
1.02M
    if (last != NULL)
6414
1.02M
        last->parent = op;
6415
1.02M
    cur =op;
6416
1.02M
    last = NULL;
6417
1.02M
      }
6418
3.32M
  } else if (RAW == '|') {
6419
3.29M
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
2.84M
      else if (type != CUR) {
6425
871
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
871
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
871
          type);
6428
871
    if ((last != NULL) && (last != ret))
6429
871
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
871
    if (ret != NULL)
6431
871
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
871
    return(NULL);
6433
871
      }
6434
3.29M
      NEXT;
6435
6436
3.29M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
3.29M
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
3.29M
      if (last == NULL) {
6445
451k
    op->c1 = ret;
6446
451k
    if (ret != NULL)
6447
451k
        ret->parent = op;
6448
451k
    ret = cur = op;
6449
2.83M
      } else {
6450
2.83M
          cur->c2 = op;
6451
2.83M
    if (op != NULL)
6452
2.83M
        op->parent = cur;
6453
2.83M
    op->c1 = last;
6454
2.83M
    if (last != NULL)
6455
2.83M
        last->parent = op;
6456
2.83M
    cur =op;
6457
2.83M
    last = NULL;
6458
2.83M
      }
6459
3.29M
  } else {
6460
29.2k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
29.2k
      if ((last != NULL) && (last != ret))
6462
10.9k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
29.2k
      if (ret != NULL)
6464
29.2k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
29.2k
      return(NULL);
6466
29.2k
  }
6467
4.95M
  GROW;
6468
4.95M
  SKIP_BLANKS;
6469
4.95M
  GROW;
6470
4.95M
  if (RAW == '(') {
6471
201k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
201k
      NEXT;
6474
201k
      SKIP_BLANKS;
6475
201k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
201k
                                                          depth + 1);
6477
201k
            if (last == NULL) {
6478
4.00k
    if (ret != NULL)
6479
4.00k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
4.00k
    return(NULL);
6481
4.00k
            }
6482
197k
      SKIP_BLANKS;
6483
4.75M
  } else {
6484
4.75M
      elem = xmlParseName(ctxt);
6485
4.75M
      if (elem == NULL) {
6486
6.11k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
6.11k
    if (ret != NULL)
6488
6.11k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
6.11k
    return(NULL);
6490
6.11k
      }
6491
4.74M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
4.74M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
4.74M
      if (RAW == '?') {
6498
757k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
757k
    NEXT;
6500
3.99M
      } else if (RAW == '*') {
6501
482k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
482k
    NEXT;
6503
3.50M
      } else if (RAW == '+') {
6504
79.3k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
79.3k
    NEXT;
6506
3.42M
      } else {
6507
3.42M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
3.42M
      }
6509
4.74M
  }
6510
4.94M
  SKIP_BLANKS;
6511
4.94M
  GROW;
6512
4.94M
    }
6513
1.66M
    if ((cur != NULL) && (last != NULL)) {
6514
1.06M
        cur->c2 = last;
6515
1.06M
  if (last != NULL)
6516
1.06M
      last->parent = cur;
6517
1.06M
    }
6518
1.66M
    if (ctxt->input->id != inputchk) {
6519
93
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
93
                       "Element content declaration doesn't start and stop in"
6521
93
                       " the same entity\n");
6522
93
    }
6523
1.66M
    NEXT;
6524
1.66M
    if (RAW == '?') {
6525
59.8k
  if (ret != NULL) {
6526
59.8k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
59.8k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
87
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
59.7k
      else
6530
59.7k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
59.8k
  }
6532
59.8k
  NEXT;
6533
1.60M
    } else if (RAW == '*') {
6534
438k
  if (ret != NULL) {
6535
438k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
438k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
2.61M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
2.18M
    if ((cur->c1 != NULL) &&
6543
2.18M
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
2.18M
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
93.3k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
2.18M
    if ((cur->c2 != NULL) &&
6547
2.18M
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
2.18M
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
15.0k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
2.18M
    cur = cur->c2;
6551
2.18M
      }
6552
438k
  }
6553
438k
  NEXT;
6554
1.16M
    } else if (RAW == '+') {
6555
219k
  if (ret != NULL) {
6556
219k
      int found = 0;
6557
6558
219k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
219k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
27
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
219k
      else
6562
219k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
344k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
124k
    if ((cur->c1 != NULL) &&
6570
124k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
124k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
173
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
173
        found = 1;
6574
173
    }
6575
124k
    if ((cur->c2 != NULL) &&
6576
124k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
124k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
189
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
189
        found = 1;
6580
189
    }
6581
124k
    cur = cur->c2;
6582
124k
      }
6583
219k
      if (found)
6584
236
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
219k
  }
6586
219k
  NEXT;
6587
219k
    }
6588
1.66M
    return(ret);
6589
1.70M
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
3.17M
                           xmlElementContentPtr *result) {
6648
6649
3.17M
    xmlElementContentPtr tree = NULL;
6650
3.17M
    int inputid = ctxt->input->id;
6651
3.17M
    int res;
6652
6653
3.17M
    *result = NULL;
6654
6655
3.17M
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
3.17M
    NEXT;
6661
3.17M
    GROW;
6662
3.17M
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
3.17M
    SKIP_BLANKS;
6665
3.17M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
1.71M
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
1.71M
  res = XML_ELEMENT_TYPE_MIXED;
6668
1.71M
    } else {
6669
1.46M
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
1.46M
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
1.46M
    }
6672
3.17M
    SKIP_BLANKS;
6673
3.17M
    *result = tree;
6674
3.17M
    return(res);
6675
3.17M
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
4.36M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
4.36M
    const xmlChar *name;
6695
4.36M
    int ret = -1;
6696
4.36M
    xmlElementContentPtr content  = NULL;
6697
6698
4.36M
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
4.36M
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
4.36M
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
4.34M
  int inputid = ctxt->input->id;
6705
6706
4.34M
  SKIP(7);
6707
4.34M
  if (SKIP_BLANKS == 0) {
6708
6.64k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
6.64k
               "Space required after 'ELEMENT'\n");
6710
6.64k
      return(-1);
6711
6.64k
  }
6712
4.34M
        name = xmlParseName(ctxt);
6713
4.34M
  if (name == NULL) {
6714
5.30k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
5.30k
         "xmlParseElementDecl: no name for Element\n");
6716
5.30k
      return(-1);
6717
5.30k
  }
6718
4.33M
  if (SKIP_BLANKS == 0) {
6719
18.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
18.5k
         "Space required after the element name\n");
6721
18.5k
  }
6722
4.33M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
1.10M
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
1.10M
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
3.22M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
3.22M
             (NXT(2) == 'Y')) {
6730
20.6k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
20.6k
      ret = XML_ELEMENT_TYPE_ANY;
6735
3.20M
  } else if (RAW == '(') {
6736
3.17M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
3.17M
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
31.4k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
31.4k
          (ctxt->inputNr == 1)) {
6743
2.10k
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
2.10k
    "PEReference: forbidden within markup decl in internal subset\n");
6745
29.3k
      } else {
6746
29.3k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
29.3k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
29.3k
            }
6749
31.4k
      return(-1);
6750
31.4k
  }
6751
6752
4.30M
  SKIP_BLANKS;
6753
6754
4.30M
  if (RAW != '>') {
6755
69.4k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
69.4k
      if (content != NULL) {
6757
7.18k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
7.18k
      }
6759
4.23M
  } else {
6760
4.23M
      if (inputid != ctxt->input->id) {
6761
66
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
66
                               "Element declaration doesn't start and stop in"
6763
66
                               " the same entity\n");
6764
66
      }
6765
6766
4.23M
      NEXT;
6767
4.23M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
4.23M
    (ctxt->sax->elementDecl != NULL)) {
6769
3.87M
    if (content != NULL)
6770
2.85M
        content->parent = NULL;
6771
3.87M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
3.87M
                           content);
6773
3.87M
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
8.79k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
8.79k
    }
6782
3.87M
      } else if (content != NULL) {
6783
248k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
248k
      }
6785
4.23M
  }
6786
4.30M
    }
6787
4.32M
    return(ret);
6788
4.36M
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
25.3k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
25.3k
    int *inputIds = NULL;
6806
25.3k
    size_t inputIdsSize = 0;
6807
25.3k
    size_t depth = 0;
6808
6809
132k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
132k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
80.0k
            int id = ctxt->input->id;
6812
6813
80.0k
            SKIP(3);
6814
80.0k
            SKIP_BLANKS;
6815
6816
80.0k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
66.8k
                SKIP(7);
6818
66.8k
                SKIP_BLANKS;
6819
66.8k
                if (RAW != '[') {
6820
768
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
768
                    xmlHaltParser(ctxt);
6822
768
                    goto error;
6823
768
                }
6824
66.0k
                if (ctxt->input->id != id) {
6825
3
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
3
                                   "All markup of the conditional section is"
6827
3
                                   " not in the same entity\n");
6828
3
                }
6829
66.0k
                NEXT;
6830
6831
66.0k
                if (inputIdsSize <= depth) {
6832
20.2k
                    int *tmp;
6833
6834
20.2k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
20.2k
                    tmp = (int *) xmlRealloc(inputIds,
6836
20.2k
                            inputIdsSize * sizeof(int));
6837
20.2k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
20.2k
                    inputIds = tmp;
6842
20.2k
                }
6843
66.0k
                inputIds[depth] = id;
6844
66.0k
                depth++;
6845
66.0k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
4.97k
                size_t ignoreDepth = 0;
6847
6848
4.97k
                SKIP(6);
6849
4.97k
                SKIP_BLANKS;
6850
4.97k
                if (RAW != '[') {
6851
240
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
240
                    xmlHaltParser(ctxt);
6853
240
                    goto error;
6854
240
                }
6855
4.73k
                if (ctxt->input->id != id) {
6856
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                   "All markup of the conditional section is"
6858
0
                                   " not in the same entity\n");
6859
0
                }
6860
4.73k
                NEXT;
6861
6862
8.22M
                while (RAW != 0) {
6863
8.22M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
10.7k
                        SKIP(3);
6865
10.7k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
10.7k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
8.21M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
8.21M
                               (NXT(2) == '>')) {
6873
6.34k
                        if (ignoreDepth == 0)
6874
1.47k
                            break;
6875
4.87k
                        SKIP(3);
6876
4.87k
                        ignoreDepth--;
6877
8.20M
                    } else {
6878
8.20M
                        NEXT;
6879
8.20M
                    }
6880
8.22M
                }
6881
6882
4.73k
    if (RAW == 0) {
6883
3.26k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
3.26k
                    goto error;
6885
3.26k
    }
6886
1.47k
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
1.47k
                SKIP(3);
6892
8.24k
            } else {
6893
8.24k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
8.24k
                xmlHaltParser(ctxt);
6895
8.24k
                goto error;
6896
8.24k
            }
6897
80.0k
        } else if ((depth > 0) &&
6898
52.4k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
28.8k
            depth--;
6900
28.8k
            if (ctxt->input->id != inputIds[depth]) {
6901
597
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
597
                               "All markup of the conditional section is not"
6903
597
                               " in the same entity\n");
6904
597
            }
6905
28.8k
            SKIP(3);
6906
28.8k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
17.4k
            xmlParseMarkupDecl(ctxt);
6908
17.4k
        } else {
6909
6.18k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
6.18k
            xmlHaltParser(ctxt);
6911
6.18k
            goto error;
6912
6.18k
        }
6913
6914
113k
        if (depth == 0)
6915
6.29k
            break;
6916
6917
107k
        SKIP_BLANKS;
6918
107k
        GROW;
6919
107k
    }
6920
6921
25.3k
error:
6922
25.3k
    xmlFree(inputIds);
6923
25.3k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
25.5M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
25.5M
    GROW;
6952
25.5M
    if (CUR == '<') {
6953
25.5M
        if (NXT(1) == '!') {
6954
25.5M
      switch (NXT(2)) {
6955
8.35M
          case 'E':
6956
8.35M
        if (NXT(3) == 'L')
6957
4.36M
      xmlParseElementDecl(ctxt);
6958
3.99M
        else if (NXT(3) == 'N')
6959
3.98M
      xmlParseEntityDecl(ctxt);
6960
4.91k
                    else
6961
4.91k
                        SKIP(2);
6962
8.35M
        break;
6963
3.71M
          case 'A':
6964
3.71M
        xmlParseAttributeListDecl(ctxt);
6965
3.71M
        break;
6966
20.0k
          case 'N':
6967
20.0k
        xmlParseNotationDecl(ctxt);
6968
20.0k
        break;
6969
13.4M
          case '-':
6970
13.4M
        xmlParseComment(ctxt);
6971
13.4M
        break;
6972
18.2k
    default:
6973
        /* there is an error but it will be detected later */
6974
18.2k
                    SKIP(2);
6975
18.2k
        break;
6976
25.5M
      }
6977
25.5M
  } else if (NXT(1) == '?') {
6978
15.3k
      xmlParsePI(ctxt);
6979
15.3k
  }
6980
25.5M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
25.5M
    if (ctxt->instate == XML_PARSER_EOF)
6987
87.1k
        return;
6988
6989
25.4M
    ctxt->instate = XML_PARSER_DTD;
6990
25.4M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
34.9k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
34.9k
    xmlChar *version;
7006
34.9k
    const xmlChar *encoding;
7007
34.9k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
34.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
33.5k
  SKIP(5);
7014
33.5k
    } else {
7015
1.48k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
1.48k
  return;
7017
1.48k
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
33.5k
    oldstate = ctxt->instate;
7021
33.5k
    ctxt->instate = XML_PARSER_START;
7022
7023
33.5k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
33.5k
    version = xmlParseVersionInfo(ctxt);
7032
33.5k
    if (version == NULL)
7033
3.48k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
30.0k
    else {
7035
30.0k
  if (SKIP_BLANKS == 0) {
7036
1.47k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
1.47k
               "Space needed here\n");
7038
1.47k
  }
7039
30.0k
    }
7040
33.5k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
33.5k
    encoding = xmlParseEncodingDecl(ctxt);
7046
33.5k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
33.5k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
558
        ctxt->instate = oldstate;
7053
558
        return;
7054
558
    }
7055
32.9k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
12.4k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
12.4k
           "Missing encoding in text declaration\n");
7058
12.4k
    }
7059
7060
32.9k
    SKIP_BLANKS;
7061
32.9k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
23.3k
        SKIP(2);
7063
23.3k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
494
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
494
  NEXT;
7067
9.09k
    } else {
7068
9.09k
        int c;
7069
7070
9.09k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
764k
        while ((c = CUR) != 0) {
7072
760k
            NEXT;
7073
760k
            if (c == '>')
7074
5.79k
                break;
7075
760k
        }
7076
9.09k
    }
7077
7078
32.9k
    ctxt->instate = oldstate;
7079
32.9k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
119k
                       const xmlChar *SystemID) {
7096
119k
    xmlDetectSAX2(ctxt);
7097
119k
    GROW;
7098
7099
119k
    if ((ctxt->encoding == NULL) &&
7100
119k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
118k
        xmlChar start[4];
7102
118k
  xmlCharEncoding enc;
7103
7104
118k
  start[0] = RAW;
7105
118k
  start[1] = NXT(1);
7106
118k
  start[2] = NXT(2);
7107
118k
  start[3] = NXT(3);
7108
118k
  enc = xmlDetectCharEncoding(start, 4);
7109
118k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
32.1k
      xmlSwitchEncoding(ctxt, enc);
7111
118k
    }
7112
7113
119k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
29.9k
  xmlParseTextDecl(ctxt);
7115
29.9k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
429
      xmlHaltParser(ctxt);
7120
429
      return;
7121
429
  }
7122
29.9k
    }
7123
119k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
119k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
119k
    ctxt->instate = XML_PARSER_DTD;
7135
119k
    ctxt->external = 1;
7136
119k
    SKIP_BLANKS;
7137
4.13M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
4.05M
  GROW;
7139
4.05M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
25.3k
            xmlParseConditionalSections(ctxt);
7141
4.02M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
3.99M
            xmlParseMarkupDecl(ctxt);
7143
3.99M
        } else {
7144
31.9k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
31.9k
            xmlHaltParser(ctxt);
7146
31.9k
            return;
7147
31.9k
        }
7148
4.01M
        SKIP_BLANKS;
7149
4.01M
    }
7150
7151
87.0k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
87.0k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
10.4M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
10.4M
    xmlEntityPtr ent;
7175
10.4M
    xmlChar *val;
7176
10.4M
    int was_checked;
7177
10.4M
    xmlNodePtr list = NULL;
7178
10.4M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
10.4M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
10.4M
    if (NXT(1) == '#') {
7188
562k
  int i = 0;
7189
562k
  xmlChar out[16];
7190
562k
  int hex = NXT(2);
7191
562k
  int value = xmlParseCharRef(ctxt);
7192
7193
562k
  if (value == 0)
7194
79.4k
      return;
7195
483k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
278k
      if (value <= 0xFF) {
7202
273k
    out[0] = value;
7203
273k
    out[1] = 0;
7204
273k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
273k
        (!ctxt->disableSAX))
7206
229k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
273k
      } else {
7208
5.57k
    if ((hex == 'x') || (hex == 'X'))
7209
1.15k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
4.42k
    else
7211
4.42k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
5.57k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
5.57k
        (!ctxt->disableSAX))
7214
4.39k
        ctxt->sax->reference(ctxt->userData, out);
7215
5.57k
      }
7216
278k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
204k
      COPY_BUF(0 ,out, i, value);
7221
204k
      out[i] = 0;
7222
204k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
204k
    (!ctxt->disableSAX))
7224
170k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
204k
  }
7226
483k
  return;
7227
562k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
9.91M
    ent = xmlParseEntityRef(ctxt);
7233
9.91M
    if (ent == NULL) return;
7234
7.90M
    if (!ctxt->wellFormed)
7235
2.92M
  return;
7236
4.97M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
4.97M
    if ((ent->name == NULL) ||
7240
4.97M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
748k
  val = ent->content;
7242
748k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
748k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
748k
      (!ctxt->disableSAX))
7248
748k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
748k
  return;
7250
748k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
4.23M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
4.23M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
360k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
349k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
349k
  void *user_data;
7273
349k
  if (ctxt->userData == ctxt)
7274
349k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
349k
        ctxt->sizeentcopy = 0;
7280
7281
349k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
1.14k
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
1.14k
            xmlHaltParser(ctxt);
7284
1.14k
            return;
7285
1.14k
        }
7286
7287
348k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
348k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
274k
      ctxt->depth++;
7297
274k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
274k
                                                user_data, &list);
7299
274k
      ctxt->depth--;
7300
7301
274k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
74.2k
      ctxt->depth++;
7303
74.2k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
74.2k
                                     user_data, ctxt->depth, ent->URI,
7305
74.2k
             ent->ExternalID, &list);
7306
74.2k
      ctxt->depth--;
7307
74.2k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
348k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
348k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
348k
        ent->expandedSize = ctxt->sizeentcopy;
7316
348k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
17.2k
            xmlHaltParser(ctxt);
7318
17.2k
      xmlFreeNodeList(list);
7319
17.2k
      return;
7320
17.2k
  }
7321
331k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
331k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
249k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
249k
            if ((ctxt->replaceEntities == 0) ||
7333
249k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
249k
                ((list->type == XML_TEXT_NODE) &&
7335
238k
                 (list->next == NULL))) {
7336
238k
                ent->owner = 1;
7337
605k
                while (list != NULL) {
7338
367k
                    list->parent = (xmlNodePtr) ent;
7339
367k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
367k
                    if (list->next == NULL)
7342
238k
                        ent->last = list;
7343
367k
                    list = list->next;
7344
367k
                }
7345
238k
                list = NULL;
7346
238k
            } else {
7347
10.6k
                ent->owner = 0;
7348
66.0k
                while (list != NULL) {
7349
55.4k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
55.4k
                    list->doc = ctxt->myDoc;
7351
55.4k
                    if (list->next == NULL)
7352
10.6k
                        ent->last = list;
7353
55.4k
                    list = list->next;
7354
55.4k
                }
7355
10.6k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
10.6k
            }
7361
249k
  } else if ((ret != XML_ERR_OK) &&
7362
82.0k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
46.1k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
46.1k
         "Entity '%s' failed to parse\n", ent->name);
7365
46.1k
            if (ent->content != NULL)
7366
13.7k
                ent->content[0] = 0;
7367
46.1k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
331k
        was_checked = 0;
7374
331k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
4.21M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
459k
  if (was_checked != 0) {
7389
367k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
367k
      if (ctxt->userData == ctxt)
7396
367k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
367k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
12
    ctxt->depth++;
7402
12
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
12
           ent->content, user_data, NULL);
7404
12
    ctxt->depth--;
7405
366k
      } else if (ent->etype ==
7406
366k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
366k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
366k
    ctxt->depth++;
7410
366k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
366k
         ctxt->sax, user_data, ctxt->depth,
7412
366k
         ent->URI, ent->ExternalID, NULL);
7413
366k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
366k
                ctxt->sizeentities = oldsizeentities;
7417
366k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
367k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
367k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
367k
  }
7429
459k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
459k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
125k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
125k
  }
7437
459k
  return;
7438
459k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
3.75M
    if ((was_checked != 0) &&
7445
3.75M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
564
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
3.75M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
3.75M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
1.32M
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
1.32M
  return;
7458
1.32M
    }
7459
7460
2.42M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
2.42M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
2.42M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
2.42M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
784k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
784k
    cur = ent->children;
7492
836k
    while (cur != NULL) {
7493
836k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
836k
        if (nw != NULL) {
7495
836k
      if (nw->_private == NULL)
7496
836k
          nw->_private = cur->_private;
7497
836k
      if (firstChild == NULL){
7498
784k
          firstChild = nw;
7499
784k
      }
7500
836k
      nw = xmlAddChild(ctxt->node, nw);
7501
836k
        }
7502
836k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
784k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
784k
          (nw != NULL) &&
7509
784k
          (nw->type == XML_ELEMENT_NODE) &&
7510
784k
          (nw->children == NULL))
7511
3.04k
          nw->extra = 1;
7512
7513
784k
      break;
7514
784k
        }
7515
52.7k
        cur = cur->next;
7516
52.7k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
1.63M
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
1.63M
    xmlNodePtr nw = NULL, cur, next, last,
7523
1.63M
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
1.63M
    cur = ent->children;
7532
1.63M
    ent->children = NULL;
7533
1.63M
    last = ent->last;
7534
1.63M
    ent->last = NULL;
7535
1.73M
    while (cur != NULL) {
7536
1.73M
        next = cur->next;
7537
1.73M
        cur->next = NULL;
7538
1.73M
        cur->parent = NULL;
7539
1.73M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
1.73M
        if (nw != NULL) {
7541
1.73M
      if (nw->_private == NULL)
7542
1.73M
          nw->_private = cur->_private;
7543
1.73M
      if (firstChild == NULL){
7544
1.63M
          firstChild = cur;
7545
1.63M
      }
7546
1.73M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
1.73M
        }
7548
1.73M
        xmlAddChild(ctxt->node, cur);
7549
1.73M
        if (cur == last)
7550
1.63M
      break;
7551
98.0k
        cur = next;
7552
98.0k
    }
7553
1.63M
    if (ent->owner == 0)
7554
10.6k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
1.63M
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
2.42M
      ctxt->nodemem = 0;
7582
2.42M
      ctxt->nodelen = 0;
7583
2.42M
      return;
7584
2.42M
  }
7585
2.42M
    }
7586
2.42M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
12.6M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
12.6M
    const xmlChar *name;
7621
12.6M
    xmlEntityPtr ent = NULL;
7622
7623
12.6M
    GROW;
7624
12.6M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
12.6M
    if (RAW != '&')
7628
0
        return(NULL);
7629
12.6M
    NEXT;
7630
12.6M
    name = xmlParseName(ctxt);
7631
12.6M
    if (name == NULL) {
7632
179k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
179k
           "xmlParseEntityRef: no name\n");
7634
179k
        return(NULL);
7635
179k
    }
7636
12.4M
    if (RAW != ';') {
7637
148k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
148k
  return(NULL);
7639
148k
    }
7640
12.3M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
12.3M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
8.60M
        ent = xmlGetPredefinedEntity(name);
7647
8.60M
        if (ent != NULL)
7648
1.20M
            return(ent);
7649
8.60M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
11.1M
    if (ctxt->sax != NULL) {
7656
11.1M
  if (ctxt->sax->getEntity != NULL)
7657
11.1M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
11.1M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
11.1M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
113k
      ent = xmlGetPredefinedEntity(name);
7661
11.1M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
11.1M
      (ctxt->userData==ctxt)) {
7663
268k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
268k
  }
7665
11.1M
    }
7666
11.1M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
11.1M
    if (ent == NULL) {
7690
2.36M
  if ((ctxt->standalone == 1) ||
7691
2.36M
      ((ctxt->hasExternalSubset == 0) &&
7692
2.21M
       (ctxt->hasPErefs == 0))) {
7693
1.48M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
1.48M
         "Entity '%s' not defined\n", name);
7695
1.48M
  } else {
7696
877k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
877k
         "Entity '%s' not defined\n", name);
7698
877k
      if ((ctxt->inSubset == 0) &&
7699
877k
    (ctxt->sax != NULL) &&
7700
877k
    (ctxt->sax->reference != NULL)) {
7701
864k
    ctxt->sax->reference(ctxt->userData, name);
7702
864k
      }
7703
877k
  }
7704
2.36M
  ctxt->valid = 0;
7705
2.36M
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
8.75M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
252
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
252
     "Entity reference to unparsed entity %s\n", name);
7715
252
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
8.75M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
8.75M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
11.6k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
11.6k
       "Attribute references external entity '%s'\n", name);
7726
11.6k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
8.74M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
8.74M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
1.82M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
109k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
890
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
109k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
109k
        }
7740
1.82M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
3.13k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
3.13k
                    "'<' in entity '%s' is not allowed in attributes "
7743
3.13k
                    "values\n", name);
7744
1.82M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
6.92M
    else {
7750
6.92M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
6.92M
      default:
7758
6.92M
      break;
7759
6.92M
  }
7760
6.92M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
11.1M
    return(ent);
7769
11.1M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
16.1M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
16.1M
    xmlChar *name;
7805
16.1M
    const xmlChar *ptr;
7806
16.1M
    xmlChar cur;
7807
16.1M
    xmlEntityPtr ent = NULL;
7808
7809
16.1M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
16.1M
    ptr = *str;
7812
16.1M
    cur = *ptr;
7813
16.1M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
16.1M
    ptr++;
7817
16.1M
    name = xmlParseStringName(ctxt, &ptr);
7818
16.1M
    if (name == NULL) {
7819
516
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
516
           "xmlParseStringEntityRef: no name\n");
7821
516
  *str = ptr;
7822
516
  return(NULL);
7823
516
    }
7824
16.1M
    if (*ptr != ';') {
7825
696
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
696
        xmlFree(name);
7827
696
  *str = ptr;
7828
696
  return(NULL);
7829
696
    }
7830
16.1M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
16.1M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
15.3M
        ent = xmlGetPredefinedEntity(name);
7838
15.3M
        if (ent != NULL) {
7839
6.18k
            xmlFree(name);
7840
6.18k
            *str = ptr;
7841
6.18k
            return(ent);
7842
6.18k
        }
7843
15.3M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
16.1M
    if (ctxt->sax != NULL) {
7850
16.1M
  if (ctxt->sax->getEntity != NULL)
7851
16.1M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
16.1M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
55.8k
      ent = xmlGetPredefinedEntity(name);
7854
16.1M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
93.5k
      ent = xmlSAX2GetEntity(ctxt, name);
7856
93.5k
  }
7857
16.1M
    }
7858
16.1M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
16.1M
    if (ent == NULL) {
7885
93.5k
  if ((ctxt->standalone == 1) ||
7886
93.5k
      ((ctxt->hasExternalSubset == 0) &&
7887
92.0k
       (ctxt->hasPErefs == 0))) {
7888
87.5k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
87.5k
         "Entity '%s' not defined\n", name);
7890
87.5k
  } else {
7891
6.03k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
6.03k
        "Entity '%s' not defined\n",
7893
6.03k
        name);
7894
6.03k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
93.5k
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
16.0M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
3
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
3
     "Entity reference to unparsed entity %s\n", name);
7906
3
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
16.0M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
16.0M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
388
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
388
   "Attribute references external entity '%s'\n", name);
7917
388
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
16.0M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
16.0M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
15.9M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
12.8k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
454
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
12.8k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
12.8k
        }
7931
15.9M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
62.1k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
62.1k
                    "'<' in entity '%s' is not allowed in attributes "
7934
62.1k
                    "values\n", name);
7935
15.9M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
165k
    else {
7941
165k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
165k
      default:
7949
165k
      break;
7950
165k
  }
7951
165k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
16.1M
    xmlFree(name);
7961
16.1M
    *str = ptr;
7962
16.1M
    return(ent);
7963
16.1M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
16.9M
{
8000
16.9M
    const xmlChar *name;
8001
16.9M
    xmlEntityPtr entity = NULL;
8002
16.9M
    xmlParserInputPtr input;
8003
8004
16.9M
    if (RAW != '%')
8005
0
        return;
8006
16.9M
    NEXT;
8007
16.9M
    name = xmlParseName(ctxt);
8008
16.9M
    if (name == NULL) {
8009
31.8k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
31.8k
  return;
8011
31.8k
    }
8012
16.9M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
16.9M
    if (RAW != ';') {
8016
17.1k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
17.1k
        return;
8018
17.1k
    }
8019
8020
16.9M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
16.9M
    if ((ctxt->sax != NULL) &&
8026
16.9M
  (ctxt->sax->getParameterEntity != NULL))
8027
16.9M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
16.9M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
16.9M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
1.14M
  if ((ctxt->standalone == 1) ||
8040
1.14M
      ((ctxt->hasExternalSubset == 0) &&
8041
1.14M
       (ctxt->hasPErefs == 0))) {
8042
8.57k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
8.57k
            "PEReference: %%%s; not found\n",
8044
8.57k
            name);
8045
1.13M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
1.13M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
40.1k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
40.1k
                                 "PEReference: %%%s; not found\n",
8056
40.1k
                                 name, NULL);
8057
40.1k
            } else
8058
1.09M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
1.09M
                              "PEReference: %%%s; not found\n",
8060
1.09M
                              name, NULL);
8061
1.13M
            ctxt->valid = 0;
8062
1.13M
  }
8063
15.7M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
15.7M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
15.7M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
15.7M
  } else {
8073
15.7M
            xmlChar start[4];
8074
15.7M
            xmlCharEncoding enc;
8075
15.7M
            unsigned long parentConsumed;
8076
15.7M
            xmlEntityPtr oldEnt;
8077
8078
15.7M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
15.7M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
15.7M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
15.7M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
15.7M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
15.7M
    (ctxt->replaceEntities == 0) &&
8084
15.7M
    (ctxt->validate == 0))
8085
6.99k
    return;
8086
8087
15.7M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
429
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
429
                xmlHaltParser(ctxt);
8090
429
                return;
8091
429
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
15.7M
            parentConsumed = ctxt->input->parentConsumed;
8095
15.7M
            oldEnt = ctxt->input->entity;
8096
15.7M
            if ((oldEnt == NULL) ||
8097
15.7M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
14.3M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
4.36M
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
4.36M
                xmlSaturatedAddSizeT(&parentConsumed,
8101
4.36M
                                     ctxt->input->cur - ctxt->input->base);
8102
4.36M
            }
8103
8104
15.7M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
15.7M
      if (xmlPushInput(ctxt, input) < 0) {
8106
58.0k
                xmlFreeInputStream(input);
8107
58.0k
    return;
8108
58.0k
            }
8109
8110
15.7M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
15.7M
            input->parentConsumed = parentConsumed;
8113
8114
15.7M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
53.7k
                GROW
8125
53.7k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
53.7k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
53.6k
                    start[0] = RAW;
8129
53.6k
                    start[1] = NXT(1);
8130
53.6k
                    start[2] = NXT(2);
8131
53.6k
                    start[3] = NXT(3);
8132
53.6k
                    enc = xmlDetectCharEncoding(start, 4);
8133
53.6k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
9
                        xmlSwitchEncoding(ctxt, enc);
8135
9
                    }
8136
53.6k
                }
8137
8138
53.7k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
53.7k
                    (IS_BLANK_CH(NXT(5)))) {
8140
9
                    xmlParseTextDecl(ctxt);
8141
9
                }
8142
53.7k
            }
8143
15.7M
  }
8144
15.7M
    }
8145
16.8M
    ctxt->hasPErefs = 1;
8146
16.8M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
5.92k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
5.92k
    xmlParserInputPtr input;
8162
5.92k
    xmlBufferPtr buf;
8163
5.92k
    int l, c;
8164
5.92k
    int count = 0;
8165
8166
5.92k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
5.92k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
5.92k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
5.92k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
5.92k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
5.92k
    buf = xmlBufferCreate();
8180
5.92k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
5.92k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
5.92k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
5.92k
    if (input == NULL) {
8189
1.59k
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
1.59k
              "xmlLoadEntityContent input error");
8191
1.59k
  xmlBufferFree(buf);
8192
1.59k
        return(-1);
8193
1.59k
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
4.33k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
4.33k
    GROW;
8206
4.33k
    c = CUR_CHAR(l);
8207
7.85M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
7.85M
           (IS_CHAR(c))) {
8209
7.85M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
7.85M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
75.1k
      count = 0;
8212
75.1k
      GROW;
8213
75.1k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
75.1k
  }
8218
7.85M
  NEXTL(l);
8219
7.85M
  c = CUR_CHAR(l);
8220
7.85M
  if (c == 0) {
8221
3.57k
      count = 0;
8222
3.57k
      GROW;
8223
3.57k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
3.57k
      c = CUR_CHAR(l);
8228
3.57k
  }
8229
7.85M
    }
8230
8231
4.33k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
2.78k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
2.78k
        xmlPopInput(ctxt);
8234
2.78k
    } else if (!IS_CHAR(c)) {
8235
1.54k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
1.54k
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
1.54k
                    c);
8238
1.54k
  xmlBufferFree(buf);
8239
1.54k
  return(-1);
8240
1.54k
    }
8241
2.78k
    entity->content = buf->content;
8242
2.78k
    entity->length = buf->use;
8243
2.78k
    buf->content = NULL;
8244
2.78k
    xmlBufferFree(buf);
8245
8246
2.78k
    return(0);
8247
4.33k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
933k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
933k
    const xmlChar *ptr;
8283
933k
    xmlChar cur;
8284
933k
    xmlChar *name;
8285
933k
    xmlEntityPtr entity = NULL;
8286
8287
933k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
933k
    ptr = *str;
8289
933k
    cur = *ptr;
8290
933k
    if (cur != '%')
8291
0
        return(NULL);
8292
933k
    ptr++;
8293
933k
    name = xmlParseStringName(ctxt, &ptr);
8294
933k
    if (name == NULL) {
8295
2.13k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
2.13k
           "xmlParseStringPEReference: no name\n");
8297
2.13k
  *str = ptr;
8298
2.13k
  return(NULL);
8299
2.13k
    }
8300
931k
    cur = *ptr;
8301
931k
    if (cur != ';') {
8302
768
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
768
  xmlFree(name);
8304
768
  *str = ptr;
8305
768
  return(NULL);
8306
768
    }
8307
930k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
930k
    if ((ctxt->sax != NULL) &&
8313
930k
  (ctxt->sax->getParameterEntity != NULL))
8314
930k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
930k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
930k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
104k
  if ((ctxt->standalone == 1) ||
8330
104k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
104k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
104k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
104k
        "PEReference: %%%s; not found\n",
8343
104k
        name, NULL);
8344
104k
      ctxt->valid = 0;
8345
104k
  }
8346
826k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
826k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
826k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
826k
    }
8357
930k
    ctxt->hasPErefs = 1;
8358
930k
    xmlFree(name);
8359
930k
    *str = ptr;
8360
930k
    return(entity);
8361
930k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
1.23M
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
1.23M
    const xmlChar *name = NULL;
8382
1.23M
    xmlChar *ExternalID = NULL;
8383
1.23M
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
1.23M
    SKIP(9);
8389
8390
1.23M
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
1.23M
    name = xmlParseName(ctxt);
8396
1.23M
    if (name == NULL) {
8397
7.99k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
7.99k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
7.99k
    }
8400
1.23M
    ctxt->intSubName = name;
8401
8402
1.23M
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
1.23M
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
1.23M
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
592k
        ctxt->hasExternalSubset = 1;
8411
592k
    }
8412
1.23M
    ctxt->extSubURI = URI;
8413
1.23M
    ctxt->extSubSystem = ExternalID;
8414
8415
1.23M
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
1.23M
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
1.23M
  (!ctxt->disableSAX))
8422
1.18M
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
1.23M
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
1.23M
    if (RAW == '[')
8431
924k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
313k
    if (RAW != '>') {
8437
82.5k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
82.5k
    }
8439
313k
    NEXT;
8440
313k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
921k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
921k
    if (RAW == '[') {
8457
921k
        int baseInputNr = ctxt->inputNr;
8458
921k
        ctxt->instate = XML_PARSER_DTD;
8459
921k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
921k
  SKIP_BLANKS;
8466
22.6M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
22.6M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
22.0M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
22.0M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
22.0M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
21.5M
          xmlParseMarkupDecl(ctxt);
8478
21.5M
            } else if (RAW == '%') {
8479
180k
          xmlParsePEReference(ctxt);
8480
308k
            } else {
8481
308k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
308k
                        "xmlParseInternalSubset: error detected in"
8483
308k
                        " Markup declaration\n");
8484
308k
                xmlHaltParser(ctxt);
8485
308k
                return;
8486
308k
            }
8487
21.7M
      SKIP_BLANKS;
8488
21.7M
  }
8489
612k
  if (RAW == ']') {
8490
537k
      NEXT;
8491
537k
      SKIP_BLANKS;
8492
537k
  }
8493
612k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
612k
    if (RAW != '>') {
8499
82.1k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
82.1k
  return;
8501
82.1k
    }
8502
530k
    NEXT;
8503
530k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
13.9M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
13.9M
    const xmlChar *name;
8544
13.9M
    xmlChar *val;
8545
8546
13.9M
    *value = NULL;
8547
13.9M
    GROW;
8548
13.9M
    name = xmlParseName(ctxt);
8549
13.9M
    if (name == NULL) {
8550
473k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
473k
                 "error parsing attribute name\n");
8552
473k
        return(NULL);
8553
473k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
13.4M
    SKIP_BLANKS;
8559
13.4M
    if (RAW == '=') {
8560
13.0M
        NEXT;
8561
13.0M
  SKIP_BLANKS;
8562
13.0M
  val = xmlParseAttValue(ctxt);
8563
13.0M
  ctxt->instate = XML_PARSER_CONTENT;
8564
13.0M
    } else {
8565
430k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
430k
         "Specification mandates value for attribute %s\n", name);
8567
430k
  return(name);
8568
430k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
13.0M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
6.89k
  if (!xmlCheckLanguageID(val)) {
8577
2.26k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
2.26k
              "Malformed value for xml:lang : %s\n",
8579
2.26k
        val, NULL);
8580
2.26k
  }
8581
6.89k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
13.0M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
2.08k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
19
      *(ctxt->space) = 0;
8589
2.06k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
727
      *(ctxt->space) = 1;
8591
1.33k
  else {
8592
1.33k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
1.33k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
1.33k
                                 val, NULL);
8595
1.33k
  }
8596
2.08k
    }
8597
8598
13.0M
    *value = val;
8599
13.0M
    return(name);
8600
13.4M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
16.9M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
16.9M
    const xmlChar *name;
8634
16.9M
    const xmlChar *attname;
8635
16.9M
    xmlChar *attvalue;
8636
16.9M
    const xmlChar **atts = ctxt->atts;
8637
16.9M
    int nbatts = 0;
8638
16.9M
    int maxatts = ctxt->maxatts;
8639
16.9M
    int i;
8640
8641
16.9M
    if (RAW != '<') return(NULL);
8642
16.9M
    NEXT1;
8643
8644
16.9M
    name = xmlParseName(ctxt);
8645
16.9M
    if (name == NULL) {
8646
553k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
553k
       "xmlParseStartTag: invalid element name\n");
8648
553k
        return(NULL);
8649
553k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
16.3M
    SKIP_BLANKS;
8657
16.3M
    GROW;
8658
8659
21.4M
    while (((RAW != '>') &&
8660
21.4M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
21.4M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
13.9M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
13.9M
        if (attname == NULL) {
8664
473k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
473k
         "xmlParseStartTag: problem parsing attributes\n");
8666
473k
      break;
8667
473k
  }
8668
13.4M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
17.4M
      for (i = 0; i < nbatts;i += 2) {
8675
4.46M
          if (xmlStrEqual(atts[i], attname)) {
8676
10.9k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
10.9k
        xmlFree(attvalue);
8678
10.9k
        goto failed;
8679
10.9k
    }
8680
4.46M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
13.0M
      if (atts == NULL) {
8685
371k
          maxatts = 22; /* allow for 10 attrs by default */
8686
371k
          atts = (const xmlChar **)
8687
371k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
371k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
371k
    ctxt->atts = atts;
8695
371k
    ctxt->maxatts = maxatts;
8696
12.6M
      } else if (nbatts + 4 > maxatts) {
8697
322
          const xmlChar **n;
8698
8699
322
          maxatts *= 2;
8700
322
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
322
               maxatts * sizeof(const xmlChar *));
8702
322
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
322
    atts = n;
8709
322
    ctxt->atts = atts;
8710
322
    ctxt->maxatts = maxatts;
8711
322
      }
8712
13.0M
      atts[nbatts++] = attname;
8713
13.0M
      atts[nbatts++] = attvalue;
8714
13.0M
      atts[nbatts] = NULL;
8715
13.0M
      atts[nbatts + 1] = NULL;
8716
13.0M
  } else {
8717
460k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
460k
  }
8720
8721
13.4M
failed:
8722
8723
13.4M
  GROW
8724
13.4M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
8.46M
      break;
8726
5.01M
  if (SKIP_BLANKS == 0) {
8727
679k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
679k
         "attributes construct error\n");
8729
679k
  }
8730
5.01M
  SHRINK;
8731
5.01M
        GROW;
8732
5.01M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
16.3M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
16.3M
  (!ctxt->disableSAX)) {
8739
14.6M
  if (nbatts > 0)
8740
7.83M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
6.84M
  else
8742
6.84M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
14.6M
    }
8744
8745
16.3M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
28.1M
        for (i = 1;i < nbatts;i+=2)
8748
13.0M
      if (atts[i] != NULL)
8749
13.0M
         xmlFree((xmlChar *) atts[i]);
8750
15.1M
    }
8751
16.3M
    return(name);
8752
16.3M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
10.3M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
10.3M
    const xmlChar *name;
8772
8773
10.3M
    GROW;
8774
10.3M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
10.3M
    SKIP(2);
8780
8781
10.3M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
10.3M
    GROW;
8787
10.3M
    SKIP_BLANKS;
8788
10.3M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
129k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
129k
    } else
8791
10.1M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
10.3M
    if (name != (xmlChar*)1) {
8800
364k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
364k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
364k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
364k
                    ctxt->name, line, name);
8804
364k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
10.3M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
10.3M
  (!ctxt->disableSAX))
8811
9.20M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
10.3M
    namePop(ctxt);
8814
10.3M
    spacePop(ctxt);
8815
10.3M
    return;
8816
10.3M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
28.9M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
28.9M
    int i;
8858
8859
28.9M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
29.7M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
2.74M
        if (ctxt->nsTab[i] == prefix) {
8862
1.69M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
34.5k
          return(NULL);
8864
1.65M
      return(ctxt->nsTab[i + 1]);
8865
1.69M
  }
8866
27.0M
    return(NULL);
8867
28.7M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
54.5M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
54.5M
    const xmlChar *l, *p;
8886
8887
54.5M
    GROW;
8888
8889
54.5M
    l = xmlParseNCName(ctxt);
8890
54.5M
    if (l == NULL) {
8891
1.18M
        if (CUR == ':') {
8892
15.1k
      l = xmlParseName(ctxt);
8893
15.1k
      if (l != NULL) {
8894
15.1k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
15.1k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
15.1k
    *prefix = NULL;
8897
15.1k
    return(l);
8898
15.1k
      }
8899
15.1k
  }
8900
1.16M
        return(NULL);
8901
1.18M
    }
8902
53.4M
    if (CUR == ':') {
8903
2.19M
        NEXT;
8904
2.19M
  p = l;
8905
2.19M
  l = xmlParseNCName(ctxt);
8906
2.19M
  if (l == NULL) {
8907
60.7k
      xmlChar *tmp;
8908
8909
60.7k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
60.7k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
60.7k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
60.7k
      l = xmlParseNmtoken(ctxt);
8914
60.7k
      if (l == NULL) {
8915
41.8k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
41.8k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
41.8k
            } else {
8919
18.9k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
18.9k
    xmlFree((char *)l);
8921
18.9k
      }
8922
60.7k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
60.7k
      if (tmp != NULL) xmlFree(tmp);
8924
60.7k
      *prefix = NULL;
8925
60.7k
      return(p);
8926
60.7k
  }
8927
2.13M
  if (CUR == ':') {
8928
34.1k
      xmlChar *tmp;
8929
8930
34.1k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
34.1k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
34.1k
      NEXT;
8933
34.1k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
34.1k
      if (tmp != NULL) {
8935
21.7k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
21.7k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
21.7k
    if (tmp != NULL) xmlFree(tmp);
8938
21.7k
    *prefix = p;
8939
21.7k
    return(l);
8940
21.7k
      }
8941
12.4k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
12.4k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
12.4k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
12.4k
      if (tmp != NULL) xmlFree(tmp);
8946
12.4k
      *prefix = p;
8947
12.4k
      return(l);
8948
12.4k
  }
8949
2.09M
  *prefix = p;
8950
2.09M
    } else
8951
51.2M
        *prefix = NULL;
8952
53.3M
    return(l);
8953
53.4M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
715k
                        xmlChar const *prefix) {
8971
715k
    const xmlChar *cmp;
8972
715k
    const xmlChar *in;
8973
715k
    const xmlChar *ret;
8974
715k
    const xmlChar *prefix2;
8975
8976
715k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
715k
    GROW;
8979
715k
    in = ctxt->input->cur;
8980
8981
715k
    cmp = prefix;
8982
2.42M
    while (*in != 0 && *in == *cmp) {
8983
1.70M
  ++in;
8984
1.70M
  ++cmp;
8985
1.70M
    }
8986
715k
    if ((*cmp == 0) && (*in == ':')) {
8987
655k
        in++;
8988
655k
  cmp = name;
8989
5.11M
  while (*in != 0 && *in == *cmp) {
8990
4.45M
      ++in;
8991
4.45M
      ++cmp;
8992
4.45M
  }
8993
655k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
593k
            ctxt->input->col += in - ctxt->input->cur;
8996
593k
      ctxt->input->cur = in;
8997
593k
      return((const xmlChar*) 1);
8998
593k
  }
8999
655k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
122k
    ret = xmlParseQName (ctxt, &prefix2);
9004
122k
    if ((ret == name) && (prefix == prefix2))
9005
1.97k
  return((const xmlChar*) 1);
9006
120k
    return ret;
9007
122k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
26.2k
    const xmlChar *oldbase = ctxt->input->base;\
9045
26.2k
    GROW;\
9046
26.2k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
26.2k
        return(NULL);\
9048
26.2k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
26.2k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
38.5M
{
9059
38.5M
    xmlChar limit = 0;
9060
38.5M
    const xmlChar *in = NULL, *start, *end, *last;
9061
38.5M
    xmlChar *ret = NULL;
9062
38.5M
    int line, col;
9063
38.5M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
13.3M
                    XML_MAX_HUGE_LENGTH :
9065
38.5M
                    XML_MAX_TEXT_LENGTH;
9066
9067
38.5M
    GROW;
9068
38.5M
    in = (xmlChar *) CUR_PTR;
9069
38.5M
    line = ctxt->input->line;
9070
38.5M
    col = ctxt->input->col;
9071
38.5M
    if (*in != '"' && *in != '\'') {
9072
91.8k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
91.8k
        return (NULL);
9074
91.8k
    }
9075
38.5M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
38.5M
    limit = *in++;
9083
38.5M
    col++;
9084
38.5M
    end = ctxt->input->end;
9085
38.5M
    start = in;
9086
38.5M
    if (in >= end) {
9087
3.83k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
3.83k
    }
9089
38.5M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
1.59M
  while ((in < end) && (*in != limit) &&
9094
1.59M
         ((*in == 0x20) || (*in == 0x9) ||
9095
1.58M
          (*in == 0xA) || (*in == 0xD))) {
9096
224k
      if (*in == 0xA) {
9097
58.8k
          line++; col = 1;
9098
165k
      } else {
9099
165k
          col++;
9100
165k
      }
9101
224k
      in++;
9102
224k
      start = in;
9103
224k
      if (in >= end) {
9104
446
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
446
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
446
      }
9111
224k
  }
9112
13.5M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
13.5M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
12.2M
      col++;
9115
12.2M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
12.2M
      if (in >= end) {
9117
1.10k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
1.10k
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
1.10k
      }
9124
12.2M
  }
9125
1.37M
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
1.38M
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
1.56M
  while ((in < end) && (*in != limit) &&
9131
1.56M
         ((*in == 0x20) || (*in == 0x9) ||
9132
265k
          (*in == 0xA) || (*in == 0xD))) {
9133
194k
      if (*in == 0xA) {
9134
42.1k
          line++, col = 1;
9135
152k
      } else {
9136
152k
          col++;
9137
152k
      }
9138
194k
      in++;
9139
194k
      if (in >= end) {
9140
668
    const xmlChar *oldbase = ctxt->input->base;
9141
668
    GROW;
9142
668
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
668
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
668
    end = ctxt->input->end;
9151
668
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
668
      }
9157
194k
  }
9158
1.37M
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
1.37M
  if (*in != limit) goto need_complex;
9164
37.1M
    } else {
9165
688M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
688M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
651M
      in++;
9168
651M
      col++;
9169
651M
      if (in >= end) {
9170
20.8k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
20.8k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
20.8k
      }
9177
651M
  }
9178
37.1M
  last = in;
9179
37.1M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
37.1M
  if (*in != limit) goto need_complex;
9185
37.1M
    }
9186
37.0M
    in++;
9187
37.0M
    col++;
9188
37.0M
    if (len != NULL) {
9189
23.8M
        if (alloc) *alloc = 0;
9190
23.8M
        *len = last - start;
9191
23.8M
        ret = (xmlChar *) start;
9192
23.8M
    } else {
9193
13.1M
        if (alloc) *alloc = 1;
9194
13.1M
        ret = xmlStrndup(start, last - start);
9195
13.1M
    }
9196
37.0M
    CUR_PTR = in;
9197
37.0M
    ctxt->input->line = line;
9198
37.0M
    ctxt->input->col = col;
9199
37.0M
    return ret;
9200
1.45M
need_complex:
9201
1.45M
    if (alloc) *alloc = 1;
9202
1.45M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
38.5M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
25.2M
{
9226
25.2M
    const xmlChar *name;
9227
25.2M
    xmlChar *val, *internal_val = NULL;
9228
25.2M
    int normalize = 0;
9229
9230
25.2M
    *value = NULL;
9231
25.2M
    GROW;
9232
25.2M
    name = xmlParseQName(ctxt, prefix);
9233
25.2M
    if (name == NULL) {
9234
306k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
306k
                       "error parsing attribute name\n");
9236
306k
        return (NULL);
9237
306k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
24.9M
    if (ctxt->attsSpecial != NULL) {
9243
2.66M
        int type;
9244
9245
2.66M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
2.66M
                                                 pref, elem, *prefix, name);
9247
2.66M
        if (type != 0)
9248
1.37M
            normalize = 1;
9249
2.66M
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
24.9M
    SKIP_BLANKS;
9255
24.9M
    if (RAW == '=') {
9256
24.7M
        NEXT;
9257
24.7M
        SKIP_BLANKS;
9258
24.7M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
24.7M
        if (val == NULL)
9260
35.2k
            return (NULL);
9261
24.7M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
1.37M
      if (*alloc) {
9269
73.6k
          const xmlChar *val2;
9270
9271
73.6k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
73.6k
    if ((val2 != NULL) && (val2 != val)) {
9273
13.3k
        xmlFree(val);
9274
13.3k
        val = (xmlChar *) val2;
9275
13.3k
    }
9276
73.6k
      }
9277
1.37M
  }
9278
24.7M
        ctxt->instate = XML_PARSER_CONTENT;
9279
24.7M
    } else {
9280
173k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
173k
                          "Specification mandates value for attribute %s\n",
9282
173k
                          name);
9283
173k
        return (name);
9284
173k
    }
9285
9286
24.7M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
70.5k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
5.44k
            internal_val = xmlStrndup(val, *len);
9294
5.44k
            if (!xmlCheckLanguageID(internal_val)) {
9295
1.41k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
1.41k
                              "Malformed value for xml:lang : %s\n",
9297
1.41k
                              internal_val, NULL);
9298
1.41k
            }
9299
5.44k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
70.5k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
2.40k
            internal_val = xmlStrndup(val, *len);
9306
2.40k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
24
                *(ctxt->space) = 0;
9308
2.37k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
926
                *(ctxt->space) = 1;
9310
1.45k
            else {
9311
1.45k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
1.45k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
1.45k
                              internal_val, NULL);
9314
1.45k
            }
9315
2.40k
        }
9316
70.5k
        if (internal_val) {
9317
7.84k
            xmlFree(internal_val);
9318
7.84k
        }
9319
70.5k
    }
9320
9321
24.7M
    *value = val;
9322
24.7M
    return (name);
9323
24.9M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
29.2M
                  const xmlChar **URI, int *tlen) {
9356
29.2M
    const xmlChar *localname;
9357
29.2M
    const xmlChar *prefix;
9358
29.2M
    const xmlChar *attname;
9359
29.2M
    const xmlChar *aprefix;
9360
29.2M
    const xmlChar *nsname;
9361
29.2M
    xmlChar *attvalue;
9362
29.2M
    const xmlChar **atts = ctxt->atts;
9363
29.2M
    int maxatts = ctxt->maxatts;
9364
29.2M
    int nratts, nbatts, nbdef, inputid;
9365
29.2M
    int i, j, nbNs, attval;
9366
29.2M
    unsigned long cur;
9367
29.2M
    int nsNr = ctxt->nsNr;
9368
9369
29.2M
    if (RAW != '<') return(NULL);
9370
29.2M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
29.2M
    SHRINK;
9380
29.2M
    cur = ctxt->input->cur - ctxt->input->base;
9381
29.2M
    inputid = ctxt->input->id;
9382
29.2M
    nbatts = 0;
9383
29.2M
    nratts = 0;
9384
29.2M
    nbdef = 0;
9385
29.2M
    nbNs = 0;
9386
29.2M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
29.2M
    ctxt->nsNr = nsNr;
9389
9390
29.2M
    localname = xmlParseQName(ctxt, &prefix);
9391
29.2M
    if (localname == NULL) {
9392
855k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
855k
           "StartTag: invalid element name\n");
9394
855k
        return(NULL);
9395
855k
    }
9396
28.3M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
28.3M
    SKIP_BLANKS;
9404
28.3M
    GROW;
9405
9406
37.1M
    while (((RAW != '>') &&
9407
37.1M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
37.1M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
25.2M
  int len = -1, alloc = 0;
9410
9411
25.2M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
25.2M
                               &aprefix, &attvalue, &len, &alloc);
9413
25.2M
        if (attname == NULL) {
9414
341k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
341k
           "xmlParseStartTag: problem parsing attributes\n");
9416
341k
      break;
9417
341k
  }
9418
24.9M
        if (attvalue == NULL)
9419
173k
            goto next_attr;
9420
24.7M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
24.7M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
121k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
121k
            xmlURIPtr uri;
9425
9426
121k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
121k
            if (*URL != 0) {
9434
117k
                uri = xmlParseURI((const char *) URL);
9435
117k
                if (uri == NULL) {
9436
38.9k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
38.9k
                             "xmlns: '%s' is not a valid URI\n",
9438
38.9k
                                       URL, NULL, NULL);
9439
78.7k
                } else {
9440
78.7k
                    if (uri->scheme == NULL) {
9441
8.72k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
8.72k
                                  "xmlns: URI %s is not absolute\n",
9443
8.72k
                                  URL, NULL, NULL);
9444
8.72k
                    }
9445
78.7k
                    xmlFreeURI(uri);
9446
78.7k
                }
9447
117k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
117k
                if ((len == 29) &&
9456
117k
                    (xmlStrEqual(URL,
9457
2.15k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
27
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
27
                         "reuse of the xmlns namespace name is forbidden\n",
9460
27
                             NULL, NULL, NULL);
9461
27
                    goto next_attr;
9462
27
                }
9463
117k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
153k
            for (j = 1;j <= nbNs;j++)
9468
43.5k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
11.4k
                    break;
9470
121k
            if (j <= nbNs)
9471
11.4k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
110k
            else
9473
110k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
24.6M
        } else if (aprefix == ctxt->str_xmlns) {
9476
285k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
285k
            xmlURIPtr uri;
9478
9479
285k
            if (attname == ctxt->str_xml) {
9480
2.14k
                if (URL != ctxt->str_xml_ns) {
9481
2.14k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
2.14k
                             "xml namespace prefix mapped to wrong URI\n",
9483
2.14k
                             NULL, NULL, NULL);
9484
2.14k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
2.14k
                goto next_attr;
9489
2.14k
            }
9490
283k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
283k
            if (attname == ctxt->str_xmlns) {
9499
50
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
50
                         "redefinition of the xmlns prefix is forbidden\n",
9501
50
                         NULL, NULL, NULL);
9502
50
                goto next_attr;
9503
50
            }
9504
283k
            if ((len == 29) &&
9505
283k
                (xmlStrEqual(URL,
9506
7.47k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
55
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
55
                         "reuse of the xmlns namespace name is forbidden\n",
9509
55
                         NULL, NULL, NULL);
9510
55
                goto next_attr;
9511
55
            }
9512
283k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
2.35k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
2.35k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
2.35k
                              attname, NULL, NULL);
9516
2.35k
                goto next_attr;
9517
281k
            } else {
9518
281k
                uri = xmlParseURI((const char *) URL);
9519
281k
                if (uri == NULL) {
9520
66.5k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
66.5k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
66.5k
                                       attname, URL, NULL);
9523
214k
                } else {
9524
214k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
3.99k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
3.99k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
3.99k
                                  attname, URL, NULL);
9528
3.99k
                    }
9529
214k
                    xmlFreeURI(uri);
9530
214k
                }
9531
281k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
389k
            for (j = 1;j <= nbNs;j++)
9537
115k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
7.34k
                    break;
9539
281k
            if (j <= nbNs)
9540
7.34k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
274k
            else
9542
274k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
24.3M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
24.3M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
483k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
483k
                maxatts = ctxt->maxatts;
9553
483k
                atts = ctxt->atts;
9554
483k
            }
9555
24.3M
            ctxt->attallocs[nratts++] = alloc;
9556
24.3M
            atts[nbatts++] = attname;
9557
24.3M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
24.3M
            if (alloc)
9565
752k
                atts[nbatts++] = NULL;
9566
23.5M
            else
9567
23.5M
                atts[nbatts++] = ctxt->input->base;
9568
24.3M
            atts[nbatts++] = attvalue;
9569
24.3M
            attvalue += len;
9570
24.3M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
24.3M
            if (alloc != 0) attval = 1;
9575
24.3M
            attvalue = NULL; /* moved into atts */
9576
24.3M
        }
9577
9578
24.9M
next_attr:
9579
24.9M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
114k
            xmlFree(attvalue);
9581
114k
            attvalue = NULL;
9582
114k
        }
9583
9584
24.9M
  GROW
9585
24.9M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
24.9M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
15.6M
      break;
9589
9.29M
  if (SKIP_BLANKS == 0) {
9590
487k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
487k
         "attributes construct error\n");
9592
487k
      break;
9593
487k
  }
9594
8.80M
        GROW;
9595
8.80M
    }
9596
9597
28.3M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
52.6M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
24.3M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
23.5M
            const xmlChar *old = atts[i+2];
9612
23.5M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
23.5M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
23.5M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
23.5M
        }
9616
24.3M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
28.3M
    if (ctxt->attsDefault != NULL) {
9622
4.05M
        xmlDefAttrsPtr defaults;
9623
9624
4.05M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
4.05M
  if (defaults != NULL) {
9626
822k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
574k
          attname = defaults->values[5 * i];
9628
574k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
574k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
4.06k
        for (j = 1;j <= nbNs;j++)
9638
2.91k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
1.58k
          break;
9640
2.73k
              if (j <= nbNs) continue;
9641
9642
1.15k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
1.15k
        if (nsname != defaults->values[5 * i + 2]) {
9644
1.09k
      if (nsPush(ctxt, NULL,
9645
1.09k
                 defaults->values[5 * i + 2]) > 0)
9646
1.09k
          nbNs++;
9647
1.09k
        }
9648
571k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
12.3k
        for (j = 1;j <= nbNs;j++)
9653
6.60k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
5.93k
          break;
9655
11.6k
              if (j <= nbNs) continue;
9656
9657
5.72k
        nsname = xmlGetNamespace(ctxt, attname);
9658
5.72k
        if (nsname != defaults->values[5 * i + 2]) {
9659
4.73k
      if (nsPush(ctxt, attname,
9660
4.73k
                 defaults->values[5 * i + 2]) > 0)
9661
4.73k
          nbNs++;
9662
4.73k
        }
9663
560k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
1.56M
        for (j = 0;j < nbatts;j+=5) {
9668
1.01M
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
9.74k
          break;
9670
1.01M
        }
9671
560k
        if (j < nbatts) continue;
9672
9673
550k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
6.15k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
6.15k
      maxatts = ctxt->maxatts;
9679
6.15k
      atts = ctxt->atts;
9680
6.15k
        }
9681
550k
        atts[nbatts++] = attname;
9682
550k
        atts[nbatts++] = aprefix;
9683
550k
        if (aprefix == NULL)
9684
446k
      atts[nbatts++] = NULL;
9685
103k
        else
9686
103k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
550k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
550k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
550k
        if ((ctxt->standalone == 1) &&
9690
550k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
550k
        nbdef++;
9696
550k
    }
9697
574k
      }
9698
247k
  }
9699
4.05M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
53.2M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
24.8M
  if (atts[i + 1] != NULL) {
9709
537k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
537k
      if (nsname == NULL) {
9711
175k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
175k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
175k
        atts[i + 1], atts[i], localname);
9714
175k
      }
9715
537k
      atts[i + 2] = nsname;
9716
537k
  } else
9717
24.3M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
34.6M
        for (j = 0; j < i;j += 5) {
9725
9.79M
      if (atts[i] == atts[j]) {
9726
30.7k
          if (atts[i+1] == atts[j+1]) {
9727
8.91k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
8.91k
        break;
9729
8.91k
    }
9730
21.7k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
1.11k
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
1.11k
           "Namespaced Attribute %s in '%s' redefined\n",
9733
1.11k
           atts[i], nsname, NULL);
9734
1.11k
        break;
9735
1.11k
    }
9736
21.7k
      }
9737
9.79M
  }
9738
24.8M
    }
9739
9740
28.3M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
28.3M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
493k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
493k
           "Namespace prefix %s on %s is not defined\n",
9744
493k
     prefix, localname, NULL);
9745
493k
    }
9746
28.3M
    *pref = prefix;
9747
28.3M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
28.3M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
28.3M
  (!ctxt->disableSAX)) {
9754
24.1M
  if (nbNs > 0)
9755
204k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
204k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
204k
        nbatts / 5, nbdef, atts);
9758
23.9M
  else
9759
23.9M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
23.9M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
24.1M
    }
9762
9763
28.3M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
28.3M
    if (attval != 0) {
9768
1.60M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
877k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
752k
          xmlFree((xmlChar *) atts[i]);
9771
725k
    }
9772
9773
28.3M
    return(localname);
9774
28.3M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
17.1M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
17.1M
    const xmlChar *name;
9794
9795
17.1M
    GROW;
9796
17.1M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
17.1M
    SKIP(2);
9801
9802
17.1M
    if (tag->prefix == NULL)
9803
16.4M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
715k
    else
9805
715k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
17.1M
    GROW;
9811
17.1M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
17.1M
    SKIP_BLANKS;
9814
17.1M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
163k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
163k
    } else
9817
17.0M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
17.1M
    if (name != (xmlChar*)1) {
9826
476k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
476k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
476k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
476k
                    ctxt->name, tag->line, name);
9830
476k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
17.1M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
17.1M
  (!ctxt->disableSAX))
9837
14.2M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
14.2M
                                tag->URI);
9839
9840
17.1M
    spacePop(ctxt);
9841
17.1M
    if (tag->nsNr != 0)
9842
52.8k
  nsPop(ctxt, tag->nsNr);
9843
17.1M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
93.2k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
93.2k
    xmlChar *buf = NULL;
9864
93.2k
    int len = 0;
9865
93.2k
    int size = XML_PARSER_BUFFER_SIZE;
9866
93.2k
    int r, rl;
9867
93.2k
    int s, sl;
9868
93.2k
    int cur, l;
9869
93.2k
    int count = 0;
9870
93.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
27.5k
                    XML_MAX_HUGE_LENGTH :
9872
93.2k
                    XML_MAX_TEXT_LENGTH;
9873
9874
93.2k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
93.2k
    SKIP(3);
9877
9878
93.2k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
93.2k
    SKIP(6);
9881
9882
93.2k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
93.2k
    r = CUR_CHAR(rl);
9884
93.2k
    if (!IS_CHAR(r)) {
9885
1.54k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
1.54k
        goto out;
9887
1.54k
    }
9888
91.6k
    NEXTL(rl);
9889
91.6k
    s = CUR_CHAR(sl);
9890
91.6k
    if (!IS_CHAR(s)) {
9891
865
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
865
        goto out;
9893
865
    }
9894
90.7k
    NEXTL(sl);
9895
90.7k
    cur = CUR_CHAR(l);
9896
90.7k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
90.7k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
61.1M
    while (IS_CHAR(cur) &&
9902
61.1M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
61.0M
  if (len + 5 >= size) {
9904
138k
      xmlChar *tmp;
9905
9906
138k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
138k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
138k
      buf = tmp;
9912
138k
      size *= 2;
9913
138k
  }
9914
61.0M
  COPY_BUF(rl,buf,len,r);
9915
61.0M
  r = s;
9916
61.0M
  rl = sl;
9917
61.0M
  s = cur;
9918
61.0M
  sl = l;
9919
61.0M
  count++;
9920
61.0M
  if (count > 50) {
9921
1.15M
      SHRINK;
9922
1.15M
      GROW;
9923
1.15M
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
1.15M
      count = 0;
9927
1.15M
  }
9928
61.0M
  NEXTL(l);
9929
61.0M
  cur = CUR_CHAR(l);
9930
61.0M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
61.0M
    }
9936
90.7k
    buf[len] = 0;
9937
90.7k
    if (cur != '>') {
9938
22.8k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
22.8k
                       "CData section not finished\n%.50s\n", buf);
9940
22.8k
        goto out;
9941
22.8k
    }
9942
67.9k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
67.9k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
45.4k
  if (ctxt->sax->cdataBlock != NULL)
9949
26.9k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
18.5k
  else if (ctxt->sax->characters != NULL)
9951
18.5k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
45.4k
    }
9953
9954
93.2k
out:
9955
93.2k
    if (ctxt->instate != XML_PARSER_EOF)
9956
93.2k
        ctxt->instate = XML_PARSER_CONTENT;
9957
93.2k
    xmlFree(buf);
9958
93.2k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
716k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
716k
    int nameNr = ctxt->nameNr;
9971
9972
716k
    GROW;
9973
71.4M
    while ((RAW != 0) &&
9974
71.4M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
70.8M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
70.8M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
52.5k
      xmlParsePI(ctxt);
9982
52.5k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
70.8M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
93.2k
      xmlParseCDSect(ctxt);
9990
93.2k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
70.7M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
70.7M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
633k
      xmlParseComment(ctxt);
9998
633k
      ctxt->instate = XML_PARSER_CONTENT;
9999
633k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
70.1M
  else if (*cur == '<') {
10005
32.7M
            if (NXT(1) == '/') {
10006
12.6M
                if (ctxt->nameNr <= nameNr)
10007
146k
                    break;
10008
12.5M
          xmlParseElementEnd(ctxt);
10009
20.1M
            } else {
10010
20.1M
          xmlParseElementStart(ctxt);
10011
20.1M
            }
10012
32.7M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
37.3M
  else if (*cur == '&') {
10020
4.91M
      xmlParseReference(ctxt);
10021
4.91M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
32.4M
  else {
10027
32.4M
      xmlParseCharData(ctxt, 0);
10028
32.4M
  }
10029
10030
70.7M
  GROW;
10031
70.7M
  SHRINK;
10032
70.7M
    }
10033
716k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
324k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
324k
    int nameNr = ctxt->nameNr;
10047
10048
324k
    xmlParseContentInternal(ctxt);
10049
10050
324k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
6.71k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
6.71k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
6.71k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
6.71k
                "Premature end of data in tag %s line %d\n",
10055
6.71k
    name, line, NULL);
10056
6.71k
    }
10057
324k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
644k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
644k
    if (xmlParseElementStart(ctxt) != 0)
10078
253k
        return;
10079
10080
391k
    xmlParseContentInternal(ctxt);
10081
391k
    if (ctxt->instate == XML_PARSER_EOF)
10082
693
  return;
10083
10084
390k
    if (CUR == 0) {
10085
249k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
249k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
249k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
249k
                "Premature end of data in tag %s line %d\n",
10089
249k
    name, line, NULL);
10090
249k
        return;
10091
249k
    }
10092
10093
141k
    xmlParseElementEnd(ctxt);
10094
141k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
20.7M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
20.7M
    const xmlChar *name;
10108
20.7M
    const xmlChar *prefix = NULL;
10109
20.7M
    const xmlChar *URI = NULL;
10110
20.7M
    xmlParserNodeInfo node_info;
10111
20.7M
    int line, tlen = 0;
10112
20.7M
    xmlNodePtr ret;
10113
20.7M
    int nsNr = ctxt->nsNr;
10114
10115
20.7M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
20.7M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
20.7M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
20.7M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
20.7M
    else if (*ctxt->space == -2)
10134
2.96M
  spacePush(ctxt, -1);
10135
17.7M
    else
10136
17.7M
  spacePush(ctxt, *ctxt->space);
10137
10138
20.7M
    line = ctxt->input->line;
10139
20.7M
#ifdef LIBXML_SAX1_ENABLED
10140
20.7M
    if (ctxt->sax2)
10141
13.2M
#endif /* LIBXML_SAX1_ENABLED */
10142
13.2M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
7.51M
#ifdef LIBXML_SAX1_ENABLED
10144
7.51M
    else
10145
7.51M
  name = xmlParseStartTag(ctxt);
10146
20.7M
#endif /* LIBXML_SAX1_ENABLED */
10147
20.7M
    if (ctxt->instate == XML_PARSER_EOF)
10148
363
  return(-1);
10149
20.7M
    if (name == NULL) {
10150
1.28M
  spacePop(ctxt);
10151
1.28M
        return(-1);
10152
1.28M
    }
10153
19.4M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
19.4M
    ret = ctxt->node;
10155
10156
19.4M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
19.4M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
19.4M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
19.4M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
19.4M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
5.61M
        SKIP(2);
10172
5.61M
  if (ctxt->sax2) {
10173
3.71M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
3.71M
    (!ctxt->disableSAX))
10175
2.71M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
3.71M
#ifdef LIBXML_SAX1_ENABLED
10177
3.71M
  } else {
10178
1.90M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
1.90M
    (!ctxt->disableSAX))
10180
1.42M
    ctxt->sax->endElement(ctxt->userData, name);
10181
1.90M
#endif /* LIBXML_SAX1_ENABLED */
10182
1.90M
  }
10183
5.61M
  namePop(ctxt);
10184
5.61M
  spacePop(ctxt);
10185
5.61M
  if (nsNr != ctxt->nsNr)
10186
9.40k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
5.61M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
5.61M
  return(1);
10195
5.61M
    }
10196
13.8M
    if (RAW == '>') {
10197
13.3M
        NEXT1;
10198
13.3M
    } else {
10199
540k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
540k
         "Couldn't find end of Start Tag %s line %d\n",
10201
540k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
540k
  nodePop(ctxt);
10207
540k
  namePop(ctxt);
10208
540k
  spacePop(ctxt);
10209
540k
  if (nsNr != ctxt->nsNr)
10210
38.3k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
540k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
540k
  return(-1);
10223
540k
    }
10224
10225
13.3M
    return(0);
10226
13.8M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
12.6M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
12.6M
    xmlParserNodeInfo node_info;
10237
12.6M
    xmlNodePtr ret = ctxt->node;
10238
10239
12.6M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
12.6M
    if (ctxt->sax2) {
10249
8.03M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
8.03M
  namePop(ctxt);
10251
8.03M
    }
10252
4.63M
#ifdef LIBXML_SAX1_ENABLED
10253
4.63M
    else
10254
4.63M
  xmlParseEndTag1(ctxt, 0);
10255
12.6M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
12.6M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
12.6M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
1.25M
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
1.25M
    xmlChar *buf = NULL;
10286
1.25M
    int len = 0;
10287
1.25M
    int size = 10;
10288
1.25M
    xmlChar cur;
10289
10290
1.25M
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
1.25M
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
1.25M
    cur = CUR;
10296
1.25M
    if (!((cur >= '0') && (cur <= '9'))) {
10297
9.76k
  xmlFree(buf);
10298
9.76k
  return(NULL);
10299
9.76k
    }
10300
1.24M
    buf[len++] = cur;
10301
1.24M
    NEXT;
10302
1.24M
    cur=CUR;
10303
1.24M
    if (cur != '.') {
10304
17.0k
  xmlFree(buf);
10305
17.0k
  return(NULL);
10306
17.0k
    }
10307
1.22M
    buf[len++] = cur;
10308
1.22M
    NEXT;
10309
1.22M
    cur=CUR;
10310
3.90M
    while ((cur >= '0') && (cur <= '9')) {
10311
2.67M
  if (len + 1 >= size) {
10312
12.1k
      xmlChar *tmp;
10313
10314
12.1k
      size *= 2;
10315
12.1k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
12.1k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
12.1k
      buf = tmp;
10322
12.1k
  }
10323
2.67M
  buf[len++] = cur;
10324
2.67M
  NEXT;
10325
2.67M
  cur=CUR;
10326
2.67M
    }
10327
1.22M
    buf[len] = 0;
10328
1.22M
    return(buf);
10329
1.22M
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
1.38M
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
1.38M
    xmlChar *version = NULL;
10349
10350
1.38M
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
1.28M
  SKIP(7);
10352
1.28M
  SKIP_BLANKS;
10353
1.28M
  if (RAW != '=') {
10354
15.9k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
15.9k
      return(NULL);
10356
15.9k
        }
10357
1.26M
  NEXT;
10358
1.26M
  SKIP_BLANKS;
10359
1.26M
  if (RAW == '"') {
10360
1.11M
      NEXT;
10361
1.11M
      version = xmlParseVersionNum(ctxt);
10362
1.11M
      if (RAW != '"') {
10363
46.8k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
46.8k
      } else
10365
1.06M
          NEXT;
10366
1.11M
  } else if (RAW == '\''){
10367
139k
      NEXT;
10368
139k
      version = xmlParseVersionNum(ctxt);
10369
139k
      if (RAW != '\'') {
10370
6.08k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
6.08k
      } else
10372
133k
          NEXT;
10373
139k
  } else {
10374
13.7k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
13.7k
  }
10376
1.26M
    }
10377
1.36M
    return(version);
10378
1.38M
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
585k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
585k
    xmlChar *buf = NULL;
10395
585k
    int len = 0;
10396
585k
    int size = 10;
10397
585k
    xmlChar cur;
10398
10399
585k
    cur = CUR;
10400
585k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
585k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
580k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
580k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
580k
  buf[len++] = cur;
10409
580k
  NEXT;
10410
580k
  cur = CUR;
10411
6.41M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
6.41M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
6.41M
         ((cur >= '0') && (cur <= '9')) ||
10414
6.41M
         (cur == '.') || (cur == '_') ||
10415
6.41M
         (cur == '-')) {
10416
5.83M
      if (len + 1 >= size) {
10417
295k
          xmlChar *tmp;
10418
10419
295k
    size *= 2;
10420
295k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
295k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
295k
    buf = tmp;
10427
295k
      }
10428
5.83M
      buf[len++] = cur;
10429
5.83M
      NEXT;
10430
5.83M
      cur = CUR;
10431
5.83M
      if (cur == 0) {
10432
4.13k
          SHRINK;
10433
4.13k
    GROW;
10434
4.13k
    cur = CUR;
10435
4.13k
      }
10436
5.83M
        }
10437
580k
  buf[len] = 0;
10438
580k
    } else {
10439
5.09k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
5.09k
    }
10441
585k
    return(buf);
10442
585k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
998k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
998k
    xmlChar *encoding = NULL;
10462
10463
998k
    SKIP_BLANKS;
10464
998k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
593k
  SKIP(8);
10466
593k
  SKIP_BLANKS;
10467
593k
  if (RAW != '=') {
10468
3.69k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
3.69k
      return(NULL);
10470
3.69k
        }
10471
589k
  NEXT;
10472
589k
  SKIP_BLANKS;
10473
589k
  if (RAW == '"') {
10474
495k
      NEXT;
10475
495k
      encoding = xmlParseEncName(ctxt);
10476
495k
      if (RAW != '"') {
10477
22.2k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
22.2k
    xmlFree((xmlChar *) encoding);
10479
22.2k
    return(NULL);
10480
22.2k
      } else
10481
472k
          NEXT;
10482
495k
  } else if (RAW == '\''){
10483
90.2k
      NEXT;
10484
90.2k
      encoding = xmlParseEncName(ctxt);
10485
90.2k
      if (RAW != '\'') {
10486
3.57k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
3.57k
    xmlFree((xmlChar *) encoding);
10488
3.57k
    return(NULL);
10489
3.57k
      } else
10490
86.7k
          NEXT;
10491
90.2k
  } else {
10492
4.10k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
4.10k
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
563k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
268k
      xmlFree((xmlChar *) encoding);
10500
268k
            return(NULL);
10501
268k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
295k
        if ((encoding != NULL) &&
10508
295k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
292k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
2.60k
      if ((ctxt->encoding == NULL) &&
10517
2.60k
          (ctxt->input->buf != NULL) &&
10518
2.60k
          (ctxt->input->buf->encoder == NULL)) {
10519
2.60k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
2.60k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
2.60k
      }
10522
2.60k
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
2.60k
      ctxt->encoding = encoding;
10525
2.60k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
292k
        else if ((encoding != NULL) &&
10530
292k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
290k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
122k
      if (ctxt->encoding != NULL)
10533
2
    xmlFree((xmlChar *) ctxt->encoding);
10534
122k
      ctxt->encoding = encoding;
10535
122k
  }
10536
170k
  else if (encoding != NULL) {
10537
167k
      xmlCharEncodingHandlerPtr handler;
10538
10539
167k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
167k
      ctxt->input->encoding = encoding;
10542
10543
167k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
167k
      if (handler != NULL) {
10545
163k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
600
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
600
        return(NULL);
10549
600
    }
10550
163k
      } else {
10551
4.18k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
4.18k
      "Unsupported encoding %s\n", encoding);
10553
4.18k
    return(NULL);
10554
4.18k
      }
10555
167k
  }
10556
295k
    }
10557
695k
    return(encoding);
10558
998k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
855k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
855k
    int standalone = -2;
10596
10597
855k
    SKIP_BLANKS;
10598
855k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
218k
  SKIP(10);
10600
218k
        SKIP_BLANKS;
10601
218k
  if (RAW != '=') {
10602
2.94k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
2.94k
      return(standalone);
10604
2.94k
        }
10605
215k
  NEXT;
10606
215k
  SKIP_BLANKS;
10607
215k
        if (RAW == '\''){
10608
87.2k
      NEXT;
10609
87.2k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
75.6k
          standalone = 0;
10611
75.6k
                SKIP(2);
10612
75.6k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
11.6k
                 (NXT(2) == 's')) {
10614
7.95k
          standalone = 1;
10615
7.95k
    SKIP(3);
10616
7.95k
            } else {
10617
3.68k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
3.68k
      }
10619
87.2k
      if (RAW != '\'') {
10620
5.07k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
5.07k
      } else
10622
82.2k
          NEXT;
10623
128k
  } else if (RAW == '"'){
10624
126k
      NEXT;
10625
126k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
59.0k
          standalone = 0;
10627
59.0k
    SKIP(2);
10628
67.2k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
67.2k
                 (NXT(2) == 's')) {
10630
61.3k
          standalone = 1;
10631
61.3k
                SKIP(3);
10632
61.3k
            } else {
10633
5.83k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
5.83k
      }
10635
126k
      if (RAW != '"') {
10636
8.26k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
8.26k
      } else
10638
118k
          NEXT;
10639
126k
  } else {
10640
2.25k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
2.25k
        }
10642
215k
    }
10643
852k
    return(standalone);
10644
855k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
1.34M
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
1.34M
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
1.34M
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
1.34M
    SKIP(5);
10672
10673
1.34M
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
1.34M
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
1.34M
    version = xmlParseVersionInfo(ctxt);
10683
1.34M
    if (version == NULL) {
10684
153k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
1.19M
    } else {
10686
1.19M
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
19.9k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
6.63k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
6.63k
                "Unsupported version '%s'\n",
10693
6.63k
                version);
10694
13.3k
      } else {
10695
13.3k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
11.0k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
11.0k
                      "Unsupported version '%s'\n",
10698
11.0k
          version, NULL);
10699
11.0k
    } else {
10700
2.29k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
2.29k
              "Unsupported version '%s'\n",
10702
2.29k
              version);
10703
2.29k
    }
10704
13.3k
      }
10705
19.9k
  }
10706
1.19M
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
1.19M
  ctxt->version = version;
10709
1.19M
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
1.34M
    if (!IS_BLANK_CH(RAW)) {
10715
569k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
383k
      SKIP(2);
10717
383k
      return;
10718
383k
  }
10719
185k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
185k
    }
10721
964k
    xmlParseEncodingDecl(ctxt);
10722
964k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
964k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
4.23k
        return;
10728
4.23k
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
960k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
110k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
104k
      SKIP(2);
10736
104k
      return;
10737
104k
  }
10738
5.87k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
5.87k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
855k
    GROW;
10745
10746
855k
    SKIP_BLANKS;
10747
855k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
855k
    SKIP_BLANKS;
10750
855k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
549k
        SKIP(2);
10752
549k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
6.70k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
6.70k
  NEXT;
10756
298k
    } else {
10757
298k
        int c;
10758
10759
298k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
23.9M
        while ((c = CUR) != 0) {
10761
23.8M
            NEXT;
10762
23.8M
            if (c == '>')
10763
243k
                break;
10764
23.8M
        }
10765
298k
    }
10766
855k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
1.91M
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
2.14M
    while (ctxt->instate != XML_PARSER_EOF) {
10782
2.14M
        SKIP_BLANKS;
10783
2.14M
        GROW;
10784
2.14M
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
130k
      xmlParsePI(ctxt);
10786
2.01M
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
102k
      xmlParseComment(ctxt);
10788
1.91M
        } else {
10789
1.91M
            break;
10790
1.91M
        }
10791
2.14M
    }
10792
1.91M
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
1.00M
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
1.00M
    xmlChar start[4];
10812
1.00M
    xmlCharEncoding enc;
10813
10814
1.00M
    xmlInitParser();
10815
10816
1.00M
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
1.00M
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
1.00M
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
1.00M
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
1.00M
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
1.00M
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
1.00M
    if ((ctxt->encoding == NULL) &&
10835
1.00M
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
994k
  start[0] = RAW;
10842
994k
  start[1] = NXT(1);
10843
994k
  start[2] = NXT(2);
10844
994k
  start[3] = NXT(3);
10845
994k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
994k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
502k
      xmlSwitchEncoding(ctxt, enc);
10848
502k
  }
10849
994k
    }
10850
10851
10852
1.00M
    if (CUR == 0) {
10853
5.04k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
5.04k
  return(-1);
10855
5.04k
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
997k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
95.3k
       GROW;
10865
95.3k
    }
10866
997k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
450k
  xmlParseXMLDecl(ctxt);
10872
450k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
450k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
1.41k
      return(-1);
10878
1.41k
  }
10879
449k
  ctxt->standalone = ctxt->input->standalone;
10880
449k
  SKIP_BLANKS;
10881
546k
    } else {
10882
546k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
546k
    }
10884
995k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
944k
        ctxt->sax->startDocument(ctxt->userData);
10886
995k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
995k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
995k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
995k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
995k
    GROW;
10903
995k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
426k
  ctxt->inSubset = 1;
10906
426k
  xmlParseDocTypeDecl(ctxt);
10907
426k
  if (RAW == '[') {
10908
318k
      ctxt->instate = XML_PARSER_DTD;
10909
318k
      xmlParseInternalSubset(ctxt);
10910
318k
      if (ctxt->instate == XML_PARSER_EOF)
10911
135k
    return(-1);
10912
318k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
290k
  ctxt->inSubset = 2;
10918
290k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
290k
      (!ctxt->disableSAX))
10920
252k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
252k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
290k
  if (ctxt->instate == XML_PARSER_EOF)
10923
20.2k
      return(-1);
10924
270k
  ctxt->inSubset = 0;
10925
10926
270k
        xmlCleanSpecialAttr(ctxt);
10927
10928
270k
  ctxt->instate = XML_PARSER_PROLOG;
10929
270k
  xmlParseMisc(ctxt);
10930
270k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
840k
    GROW;
10936
840k
    if (RAW != '<') {
10937
196k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
196k
           "Start tag expected, '<' not found\n");
10939
644k
    } else {
10940
644k
  ctxt->instate = XML_PARSER_CONTENT;
10941
644k
  xmlParseElement(ctxt);
10942
644k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
644k
  xmlParseMisc(ctxt);
10949
10950
644k
  if (RAW != 0) {
10951
240k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
240k
  }
10953
644k
  ctxt->instate = XML_PARSER_EOF;
10954
644k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
840k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
840k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
840k
    if ((ctxt->myDoc != NULL) &&
10966
840k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
2.33k
  xmlFreeDoc(ctxt->myDoc);
10968
2.33k
  ctxt->myDoc = NULL;
10969
2.33k
    }
10970
10971
840k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
56.0k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
56.0k
  if (ctxt->valid)
10974
46.4k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
56.0k
  if (ctxt->nsWellFormed)
10976
50.2k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
56.0k
  if (ctxt->options & XML_PARSE_OLD10)
10978
6.18k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
56.0k
    }
10980
840k
    if (! ctxt->wellFormed) {
10981
784k
  ctxt->valid = 0;
10982
784k
  return(-1);
10983
784k
    }
10984
56.0k
    return(0);
10985
840k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
19.7M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
19.7M
    const xmlChar *cur;
11110
11111
19.7M
    if (ctxt->checkIndex == 0) {
11112
19.0M
        cur = ctxt->input->cur + 1;
11113
19.0M
    } else {
11114
696k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
696k
    }
11116
11117
19.7M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
752k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
752k
        return(0);
11120
18.9M
    } else {
11121
18.9M
        ctxt->checkIndex = 0;
11122
18.9M
        return(1);
11123
18.9M
    }
11124
19.7M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
6.14M
                     const char *str, size_t strLen) {
11138
6.14M
    const xmlChar *cur, *term;
11139
11140
6.14M
    if (ctxt->checkIndex == 0) {
11141
3.25M
        cur = ctxt->input->cur + startDelta;
11142
3.25M
    } else {
11143
2.88M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
2.88M
    }
11145
11146
6.14M
    term = BAD_CAST strstr((const char *) cur, str);
11147
6.14M
    if (term == NULL) {
11148
3.70M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
3.70M
        if ((size_t) (end - cur) < strLen)
11152
99.2k
            end = cur;
11153
3.60M
        else
11154
3.60M
            end -= strLen - 1;
11155
3.70M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
3.70M
    } else {
11157
2.43M
        ctxt->checkIndex = 0;
11158
2.43M
    }
11159
11160
6.14M
    return(term);
11161
6.14M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
33.0M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
33.0M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
33.0M
    const xmlChar *end = ctxt->input->end;
11173
11174
647M
    while (cur < end) {
11175
643M
        if ((*cur == '<') || (*cur == '&')) {
11176
28.9M
            ctxt->checkIndex = 0;
11177
28.9M
            return(1);
11178
28.9M
        }
11179
614M
        cur++;
11180
614M
    }
11181
11182
4.09M
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
4.09M
    return(0);
11184
33.0M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
31.3M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
31.3M
    const xmlChar *cur;
11196
31.3M
    const xmlChar *end = ctxt->input->end;
11197
31.3M
    int state = ctxt->endCheckState;
11198
11199
31.3M
    if (ctxt->checkIndex == 0)
11200
24.1M
        cur = ctxt->input->cur + 1;
11201
7.21M
    else
11202
7.21M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
1.49G
    while (cur < end) {
11205
1.49G
        if (state) {
11206
844M
            if (*cur == state)
11207
25.9M
                state = 0;
11208
844M
        } else if (*cur == '\'' || *cur == '"') {
11209
26.2M
            state = *cur;
11210
619M
        } else if (*cur == '>') {
11211
23.6M
            ctxt->checkIndex = 0;
11212
23.6M
            ctxt->endCheckState = 0;
11213
23.6M
            return(1);
11214
23.6M
        }
11215
1.46G
        cur++;
11216
1.46G
    }
11217
11218
7.68M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
7.68M
    ctxt->endCheckState = state;
11220
7.68M
    return(0);
11221
31.3M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
2.61M
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
2.61M
    const xmlChar *cur, *start;
11240
2.61M
    const xmlChar *end = ctxt->input->end;
11241
2.61M
    int state = ctxt->endCheckState;
11242
11243
2.61M
    if (ctxt->checkIndex == 0) {
11244
574k
        cur = ctxt->input->cur + 1;
11245
2.03M
    } else {
11246
2.03M
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
2.03M
    }
11248
2.61M
    start = cur;
11249
11250
478M
    while (cur < end) {
11251
476M
        if (state == '-') {
11252
89.9M
            if ((*cur == '-') &&
11253
89.9M
                (cur[1] == '-') &&
11254
89.9M
                (cur[2] == '>')) {
11255
668k
                state = 0;
11256
668k
                cur += 3;
11257
668k
                start = cur;
11258
668k
                continue;
11259
668k
            }
11260
89.9M
        }
11261
386M
        else if (state == ']') {
11262
637k
            if (*cur == '>') {
11263
424k
                ctxt->checkIndex = 0;
11264
424k
                ctxt->endCheckState = 0;
11265
424k
                return(1);
11266
424k
            }
11267
213k
            if (IS_BLANK_CH(*cur)) {
11268
145k
                state = ' ';
11269
145k
            } else if (*cur != ']') {
11270
32.1k
                state = 0;
11271
32.1k
                start = cur;
11272
32.1k
                continue;
11273
32.1k
            }
11274
213k
        }
11275
385M
        else if (state == ' ') {
11276
386k
            if (*cur == '>') {
11277
4.16k
                ctxt->checkIndex = 0;
11278
4.16k
                ctxt->endCheckState = 0;
11279
4.16k
                return(1);
11280
4.16k
            }
11281
381k
            if (!IS_BLANK_CH(*cur)) {
11282
140k
                state = 0;
11283
140k
                start = cur;
11284
140k
                continue;
11285
140k
            }
11286
381k
        }
11287
385M
        else if (state != 0) {
11288
173M
            if (*cur == state) {
11289
4.17M
                state = 0;
11290
4.17M
                start = cur + 1;
11291
4.17M
            }
11292
173M
        }
11293
212M
        else if (*cur == '<') {
11294
6.04M
            if ((cur[1] == '!') &&
11295
6.04M
                (cur[2] == '-') &&
11296
6.04M
                (cur[3] == '-')) {
11297
682k
                state = '-';
11298
682k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
682k
                start = cur;
11301
682k
                continue;
11302
682k
            }
11303
6.04M
        }
11304
206M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
4.82M
            state = *cur;
11306
4.82M
        }
11307
11308
474M
        cur++;
11309
474M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
2.18M
    if ((state == 0) || (state == '-')) {
11316
1.33M
        if (cur - start < 3)
11317
107k
            cur = start;
11318
1.23M
        else
11319
1.23M
            cur -= 3;
11320
1.33M
    }
11321
2.18M
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
2.18M
    ctxt->endCheckState = state;
11323
2.18M
    return(0);
11324
2.61M
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
1.15M
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
1.15M
    int ix;
11340
1.15M
    unsigned char c;
11341
1.15M
    int codepoint;
11342
11343
1.15M
    if ((utf == NULL) || (len <= 0))
11344
2.84k
        return(0);
11345
11346
55.0M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
54.8M
        c = utf[ix];
11348
54.8M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
47.4M
      if (c >= 0x20)
11350
44.5M
    ix++;
11351
2.92M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
2.82M
          ix++;
11353
104k
      else
11354
104k
          return(-ix);
11355
47.4M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
2.58M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
2.58M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
345k
          return(-ix);
11359
2.23M
      codepoint = (utf[ix] & 0x1f) << 6;
11360
2.23M
      codepoint |= utf[ix+1] & 0x3f;
11361
2.23M
      if (!xmlIsCharQ(codepoint))
11362
21.0k
          return(-ix);
11363
2.21M
      ix += 2;
11364
4.74M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
2.12M
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
2.10M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
2.10M
          ((utf[ix+2] & 0xc0) != 0x80))
11368
110k
        return(-ix);
11369
1.99M
      codepoint = (utf[ix] & 0xf) << 12;
11370
1.99M
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
1.99M
      codepoint |= utf[ix+2] & 0x3f;
11372
1.99M
      if (!xmlIsCharQ(codepoint))
11373
132
          return(-ix);
11374
1.99M
      ix += 3;
11375
2.62M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
2.50M
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
2.49M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
2.49M
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
2.49M
    ((utf[ix+3] & 0xc0) != 0x80))
11380
163k
        return(-ix);
11381
2.33M
      codepoint = (utf[ix] & 0x7) << 18;
11382
2.33M
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
2.33M
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
2.33M
      codepoint |= utf[ix+3] & 0x3f;
11385
2.33M
      if (!xmlIsCharQ(codepoint))
11386
16.5k
          return(-ix);
11387
2.31M
      ix += 4;
11388
2.31M
  } else       /* unknown encoding */
11389
122k
      return(-ix);
11390
54.8M
      }
11391
238k
      return(ix);
11392
1.15M
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
21.8M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
21.8M
    int ret = 0;
11406
21.8M
    int avail, tlen;
11407
21.8M
    xmlChar cur, next;
11408
11409
21.8M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
21.8M
    if ((ctxt->input != NULL) &&
11466
21.8M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
323k
        xmlParserInputShrink(ctxt->input);
11468
323k
    }
11469
11470
158M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
158M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
615k
      return(0);
11473
11474
157M
  if (ctxt->input == NULL) break;
11475
157M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
157M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
157M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
157M
          (ctxt->input->buf->raw != NULL) &&
11488
157M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
277k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
277k
                                                 ctxt->input);
11491
277k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
277k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
277k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
277k
                                      base, current);
11496
277k
      }
11497
157M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
157M
        (ctxt->input->cur - ctxt->input->base);
11499
157M
  }
11500
157M
        if (avail < 1)
11501
1.14M
      goto done;
11502
156M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
4.89M
            case XML_PARSER_START:
11509
4.89M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
1.52M
        xmlChar start[4];
11511
1.52M
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
1.52M
        if (avail < 4)
11517
37.0k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
1.48M
        start[0] = RAW;
11527
1.48M
        start[1] = NXT(1);
11528
1.48M
        start[2] = NXT(2);
11529
1.48M
        start[3] = NXT(3);
11530
1.48M
        enc = xmlDetectCharEncoding(start, 4);
11531
1.48M
        xmlSwitchEncoding(ctxt, enc);
11532
1.48M
        break;
11533
1.52M
    }
11534
11535
3.37M
    if (avail < 2)
11536
1.79k
        goto done;
11537
3.37M
    cur = ctxt->input->cur[0];
11538
3.37M
    next = ctxt->input->cur[1];
11539
3.37M
    if (cur == 0) {
11540
7.33k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
7.33k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
7.33k
                  &xmlDefaultSAXLocator);
11543
7.33k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
7.33k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
7.33k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
7.33k
      ctxt->sax->endDocument(ctxt->userData);
11551
7.33k
        goto done;
11552
7.33k
    }
11553
3.36M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
2.39M
        if (avail < 5) goto done;
11556
2.38M
        if ((!terminate) &&
11557
2.38M
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
1.39M
      goto done;
11559
998k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
998k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
998k
                  &xmlDefaultSAXLocator);
11562
998k
        if ((ctxt->input->cur[2] == 'x') &&
11563
998k
      (ctxt->input->cur[3] == 'm') &&
11564
998k
      (ctxt->input->cur[4] == 'l') &&
11565
998k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
897k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
897k
      xmlParseXMLDecl(ctxt);
11572
897k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
2.82k
          xmlHaltParser(ctxt);
11578
2.82k
          return(0);
11579
2.82k
      }
11580
895k
      ctxt->standalone = ctxt->input->standalone;
11581
895k
      if ((ctxt->encoding == NULL) &&
11582
895k
          (ctxt->input->encoding != NULL))
11583
108k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
895k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
895k
          (!ctxt->disableSAX))
11586
794k
          ctxt->sax->startDocument(ctxt->userData);
11587
895k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
895k
        } else {
11593
100k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
100k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
100k
          (!ctxt->disableSAX))
11596
100k
          ctxt->sax->startDocument(ctxt->userData);
11597
100k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
100k
        }
11603
998k
    } else {
11604
974k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
974k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
974k
                  &xmlDefaultSAXLocator);
11607
974k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
974k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
974k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
974k
            (!ctxt->disableSAX))
11614
974k
      ctxt->sax->startDocument(ctxt->userData);
11615
974k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
974k
    }
11621
1.97M
    break;
11622
32.5M
            case XML_PARSER_START_TAG: {
11623
32.5M
          const xmlChar *name;
11624
32.5M
    const xmlChar *prefix = NULL;
11625
32.5M
    const xmlChar *URI = NULL;
11626
32.5M
                int line = ctxt->input->line;
11627
32.5M
    int nsNr = ctxt->nsNr;
11628
11629
32.5M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
32.5M
    cur = ctxt->input->cur[0];
11632
32.5M
          if (cur != '<') {
11633
233k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
233k
        xmlHaltParser(ctxt);
11635
233k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
233k
      ctxt->sax->endDocument(ctxt->userData);
11637
233k
        goto done;
11638
233k
    }
11639
32.2M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
6.89M
                    goto done;
11641
25.4M
    if (ctxt->spaceNr == 0)
11642
207k
        spacePush(ctxt, -1);
11643
25.1M
    else if (*ctxt->space == -2)
11644
2.23M
        spacePush(ctxt, -1);
11645
22.9M
    else
11646
22.9M
        spacePush(ctxt, *ctxt->space);
11647
25.4M
#ifdef LIBXML_SAX1_ENABLED
11648
25.4M
    if (ctxt->sax2)
11649
15.9M
#endif /* LIBXML_SAX1_ENABLED */
11650
15.9M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
9.42M
#ifdef LIBXML_SAX1_ENABLED
11652
9.42M
    else
11653
9.42M
        name = xmlParseStartTag(ctxt);
11654
25.4M
#endif /* LIBXML_SAX1_ENABLED */
11655
25.4M
    if (ctxt->instate == XML_PARSER_EOF)
11656
720
        goto done;
11657
25.4M
    if (name == NULL) {
11658
120k
        spacePop(ctxt);
11659
120k
        xmlHaltParser(ctxt);
11660
120k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
120k
      ctxt->sax->endDocument(ctxt->userData);
11662
120k
        goto done;
11663
120k
    }
11664
25.2M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
25.2M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
25.2M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
25.2M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
25.2M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
8.23M
        SKIP(2);
11680
11681
8.23M
        if (ctxt->sax2) {
11682
5.47M
      if ((ctxt->sax != NULL) &&
11683
5.47M
          (ctxt->sax->endElementNs != NULL) &&
11684
5.47M
          (!ctxt->disableSAX))
11685
5.46M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
5.46M
                                  prefix, URI);
11687
5.47M
      if (ctxt->nsNr - nsNr > 0)
11688
17.6k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
5.47M
#ifdef LIBXML_SAX1_ENABLED
11690
5.47M
        } else {
11691
2.76M
      if ((ctxt->sax != NULL) &&
11692
2.76M
          (ctxt->sax->endElement != NULL) &&
11693
2.76M
          (!ctxt->disableSAX))
11694
2.76M
          ctxt->sax->endElement(ctxt->userData, name);
11695
2.76M
#endif /* LIBXML_SAX1_ENABLED */
11696
2.76M
        }
11697
8.23M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
8.23M
        spacePop(ctxt);
11700
8.23M
        if (ctxt->nameNr == 0) {
11701
50.7k
      ctxt->instate = XML_PARSER_EPILOG;
11702
8.18M
        } else {
11703
8.18M
      ctxt->instate = XML_PARSER_CONTENT;
11704
8.18M
        }
11705
8.23M
        break;
11706
8.23M
    }
11707
17.0M
    if (RAW == '>') {
11708
16.0M
        NEXT;
11709
16.0M
    } else {
11710
1.02M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
1.02M
           "Couldn't find end of Start Tag %s\n",
11712
1.02M
           name);
11713
1.02M
        nodePop(ctxt);
11714
1.02M
        spacePop(ctxt);
11715
1.02M
    }
11716
17.0M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
17.0M
    ctxt->instate = XML_PARSER_CONTENT;
11719
17.0M
                break;
11720
25.2M
      }
11721
95.2M
            case XML_PARSER_CONTENT: {
11722
95.2M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
412k
        goto done;
11724
94.8M
    cur = ctxt->input->cur[0];
11725
94.8M
    next = ctxt->input->cur[1];
11726
11727
94.8M
    if ((cur == '<') && (next == '/')) {
11728
14.8M
        ctxt->instate = XML_PARSER_END_TAG;
11729
14.8M
        break;
11730
79.9M
          } else if ((cur == '<') && (next == '?')) {
11731
240k
        if ((!terminate) &&
11732
240k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
155k
      goto done;
11734
84.2k
        xmlParsePI(ctxt);
11735
84.2k
        ctxt->instate = XML_PARSER_CONTENT;
11736
79.7M
    } else if ((cur == '<') && (next != '!')) {
11737
24.2M
        ctxt->instate = XML_PARSER_START_TAG;
11738
24.2M
        break;
11739
55.4M
    } else if ((cur == '<') && (next == '!') &&
11740
55.4M
               (ctxt->input->cur[2] == '-') &&
11741
55.4M
         (ctxt->input->cur[3] == '-')) {
11742
1.64M
        if ((!terminate) &&
11743
1.64M
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
748k
      goto done;
11745
894k
        xmlParseComment(ctxt);
11746
894k
        ctxt->instate = XML_PARSER_CONTENT;
11747
53.7M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
53.7M
        (ctxt->input->cur[2] == '[') &&
11749
53.7M
        (ctxt->input->cur[3] == 'C') &&
11750
53.7M
        (ctxt->input->cur[4] == 'D') &&
11751
53.7M
        (ctxt->input->cur[5] == 'A') &&
11752
53.7M
        (ctxt->input->cur[6] == 'T') &&
11753
53.7M
        (ctxt->input->cur[7] == 'A') &&
11754
53.7M
        (ctxt->input->cur[8] == '[')) {
11755
121k
        SKIP(9);
11756
121k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
121k
        break;
11758
53.6M
    } else if ((cur == '<') && (next == '!') &&
11759
53.6M
               (avail < 9)) {
11760
59.5k
        goto done;
11761
53.6M
    } else if (cur == '<') {
11762
1.50M
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
1.50M
                    "detected an error in element content\n");
11764
1.50M
                    SKIP(1);
11765
52.1M
    } else if (cur == '&') {
11766
5.85M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
289k
      goto done;
11768
5.56M
        xmlParseReference(ctxt);
11769
46.2M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
46.2M
        if ((ctxt->inputNr == 1) &&
11783
46.2M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
34.6M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
4.09M
          goto done;
11786
34.6M
                    }
11787
42.1M
                    ctxt->checkIndex = 0;
11788
42.1M
        xmlParseCharData(ctxt, 0);
11789
42.1M
    }
11790
50.2M
    break;
11791
94.8M
      }
11792
50.2M
            case XML_PARSER_END_TAG:
11793
15.3M
    if (avail < 2)
11794
0
        goto done;
11795
15.3M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
462k
        goto done;
11797
14.8M
    if (ctxt->sax2) {
11798
9.15M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
9.15M
        nameNsPop(ctxt);
11800
9.15M
    }
11801
5.69M
#ifdef LIBXML_SAX1_ENABLED
11802
5.69M
      else
11803
5.69M
        xmlParseEndTag1(ctxt, 0);
11804
14.8M
#endif /* LIBXML_SAX1_ENABLED */
11805
14.8M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
14.8M
    } else if (ctxt->nameNr == 0) {
11808
170k
        ctxt->instate = XML_PARSER_EPILOG;
11809
14.6M
    } else {
11810
14.6M
        ctxt->instate = XML_PARSER_CONTENT;
11811
14.6M
    }
11812
14.8M
    break;
11813
1.53M
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
1.53M
    const xmlChar *term;
11819
11820
1.53M
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
32.1k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
32.1k
                                           "]]>");
11827
1.50M
                } else {
11828
1.50M
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
1.50M
                }
11830
11831
1.53M
    if (term == NULL) {
11832
1.00M
        int tmp, size;
11833
11834
1.00M
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
23.9k
                        size = ctxt->input->end - ctxt->input->cur;
11837
986k
                    } else {
11838
986k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
382k
                            goto done;
11840
603k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
603k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
603k
                    }
11844
627k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
627k
                    if (tmp <= 0) {
11846
448k
                        tmp = -tmp;
11847
448k
                        ctxt->input->cur += tmp;
11848
448k
                        goto encoding_error;
11849
448k
                    }
11850
179k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
179k
                        if (ctxt->sax->cdataBlock != NULL)
11852
107k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
107k
                                                  ctxt->input->cur, tmp);
11854
72.1k
                        else if (ctxt->sax->characters != NULL)
11855
72.1k
                            ctxt->sax->characters(ctxt->userData,
11856
72.1k
                                                  ctxt->input->cur, tmp);
11857
179k
                    }
11858
179k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
179k
                    SKIPL(tmp);
11861
526k
    } else {
11862
526k
                    int base = term - CUR_PTR;
11863
526k
        int tmp;
11864
11865
526k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
526k
        if ((tmp < 0) || (tmp != base)) {
11867
456k
      tmp = -tmp;
11868
456k
      ctxt->input->cur += tmp;
11869
456k
      goto encoding_error;
11870
456k
        }
11871
69.9k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
69.9k
            (ctxt->sax->cdataBlock != NULL) &&
11873
69.9k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
1.39k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
1.39k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
1.39k
                     "<![CDATA[", 9)))
11882
1.38k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
1.38k
                                 BAD_CAST "", 0);
11884
68.5k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
68.5k
      (!ctxt->disableSAX)) {
11886
67.0k
      if (ctxt->sax->cdataBlock != NULL)
11887
42.9k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
42.9k
              ctxt->input->cur, base);
11889
24.1k
      else if (ctxt->sax->characters != NULL)
11890
24.1k
          ctxt->sax->characters(ctxt->userData,
11891
24.1k
              ctxt->input->cur, base);
11892
67.0k
        }
11893
69.9k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
69.9k
        SKIPL(base + 3);
11896
69.9k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
69.9k
    }
11902
249k
    break;
11903
1.53M
      }
11904
3.09M
            case XML_PARSER_MISC:
11905
3.85M
            case XML_PARSER_PROLOG:
11906
4.09M
            case XML_PARSER_EPILOG:
11907
4.09M
    SKIP_BLANKS;
11908
4.09M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
4.09M
    else
11912
4.09M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
4.09M
                (ctxt->input->cur - ctxt->input->base);
11914
4.09M
    if (avail < 2)
11915
171k
        goto done;
11916
3.92M
    cur = ctxt->input->cur[0];
11917
3.92M
    next = ctxt->input->cur[1];
11918
3.92M
          if ((cur == '<') && (next == '?')) {
11919
373k
        if ((!terminate) &&
11920
373k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
142k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
231k
        xmlParsePI(ctxt);
11927
231k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
3.55M
    } else if ((cur == '<') && (next == '!') &&
11930
3.55M
        (ctxt->input->cur[2] == '-') &&
11931
3.55M
        (ctxt->input->cur[3] == '-')) {
11932
461k
        if ((!terminate) &&
11933
461k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
285k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
175k
        xmlParseComment(ctxt);
11940
175k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
3.09M
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
3.09M
                    (cur == '<') && (next == '!') &&
11944
3.09M
        (ctxt->input->cur[2] == 'D') &&
11945
3.09M
        (ctxt->input->cur[3] == 'O') &&
11946
3.09M
        (ctxt->input->cur[4] == 'C') &&
11947
3.09M
        (ctxt->input->cur[5] == 'T') &&
11948
3.09M
        (ctxt->input->cur[6] == 'Y') &&
11949
3.09M
        (ctxt->input->cur[7] == 'P') &&
11950
3.09M
        (ctxt->input->cur[8] == 'E')) {
11951
1.59M
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
786k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
811k
        ctxt->inSubset = 1;
11958
811k
        xmlParseDocTypeDecl(ctxt);
11959
811k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
811k
        if (RAW == '[') {
11962
611k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
611k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
199k
      ctxt->inSubset = 2;
11972
199k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
199k
          (ctxt->sax->externalSubset != NULL))
11974
168k
          ctxt->sax->externalSubset(ctxt->userData,
11975
168k
            ctxt->intSubName, ctxt->extSubSystem,
11976
168k
            ctxt->extSubURI);
11977
199k
      ctxt->inSubset = 0;
11978
199k
      xmlCleanSpecialAttr(ctxt);
11979
199k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
199k
        }
11985
1.49M
    } else if ((cur == '<') && (next == '!') &&
11986
1.49M
               (avail <
11987
45.9k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
16.8k
        goto done;
11989
1.47M
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
61.8k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
61.8k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
61.8k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
61.8k
      ctxt->sax->endDocument(ctxt->userData);
11998
61.8k
        goto done;
11999
1.41M
                } else {
12000
1.41M
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
1.41M
    }
12006
2.63M
    break;
12007
2.78M
            case XML_PARSER_DTD: {
12008
2.78M
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
2.18M
                    goto done;
12010
603k
    xmlParseInternalSubset(ctxt);
12011
603k
    if (ctxt->instate == XML_PARSER_EOF)
12012
248k
        goto done;
12013
354k
    ctxt->inSubset = 2;
12014
354k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
354k
        (ctxt->sax->externalSubset != NULL))
12016
334k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
334k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
354k
    ctxt->inSubset = 0;
12019
354k
    xmlCleanSpecialAttr(ctxt);
12020
354k
    if (ctxt->instate == XML_PARSER_EOF)
12021
14.6k
        goto done;
12022
340k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
340k
                break;
12028
354k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
156M
  }
12102
156M
    }
12103
20.3M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
20.3M
    return(ret);
12108
904k
encoding_error:
12109
904k
    {
12110
904k
        char buffer[150];
12111
12112
904k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
904k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
904k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
904k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
904k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
904k
         BAD_CAST buffer, NULL);
12118
904k
    }
12119
904k
    return(0);
12120
21.8M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
29.8M
              int terminate) {
12136
29.8M
    int end_in_lf = 0;
12137
29.8M
    int remain = 0;
12138
12139
29.8M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
29.8M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
8.03M
        return(ctxt->errNo);
12143
21.7M
    if (ctxt->instate == XML_PARSER_EOF)
12144
1.81k
        return(-1);
12145
21.7M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
21.7M
    ctxt->progressive = 1;
12149
21.7M
    if (ctxt->instate == XML_PARSER_START)
12150
3.33M
        xmlDetectSAX2(ctxt);
12151
21.7M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
21.7M
        (chunk[size - 1] == '\r')) {
12153
118k
  end_in_lf = 1;
12154
118k
  size--;
12155
118k
    }
12156
12157
21.8M
xmldecl_done:
12158
12159
21.8M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
21.8M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
20.4M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
20.4M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
20.4M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
20.4M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
20.4M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
142k
            unsigned int len = 45;
12173
12174
142k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
142k
                               BAD_CAST "UTF-16")) ||
12176
142k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
37.7k
                               BAD_CAST "UTF16")))
12178
104k
                len = 90;
12179
37.7k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
37.7k
                                    BAD_CAST "UCS-4")) ||
12181
37.7k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
35.5k
                                    BAD_CAST "UCS4")))
12183
2.14k
                len = 180;
12184
12185
142k
            if (ctxt->input->buf->rawconsumed < len)
12186
21.8k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
142k
            if ((unsigned int) size > len) {
12194
94.2k
                remain = size - len;
12195
94.2k
                size = len;
12196
94.2k
            } else {
12197
47.7k
                remain = 0;
12198
47.7k
            }
12199
142k
        }
12200
20.4M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
20.4M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
20.4M
  if (res < 0) {
12203
5.39k
      ctxt->errNo = XML_PARSER_EOF;
12204
5.39k
      xmlHaltParser(ctxt);
12205
5.39k
      return (XML_PARSER_EOF);
12206
5.39k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
20.4M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
1.44M
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
1.44M
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
1.44M
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
1.44M
        (in->raw != NULL)) {
12216
85.7k
    int nbchars;
12217
85.7k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
85.7k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
85.7k
    nbchars = xmlCharEncInput(in, terminate);
12221
85.7k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
85.7k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
3.86k
        xmlGenericError(xmlGenericErrorContext,
12225
3.86k
            "xmlParseChunk: encoder error\n");
12226
3.86k
                    xmlHaltParser(ctxt);
12227
3.86k
        return(XML_ERR_INVALID_ENCODING);
12228
3.86k
    }
12229
85.7k
      }
12230
1.44M
  }
12231
1.44M
    }
12232
12233
21.8M
    if (remain != 0) {
12234
91.7k
        xmlParseTryOrFinish(ctxt, 0);
12235
21.7M
    } else {
12236
21.7M
        xmlParseTryOrFinish(ctxt, terminate);
12237
21.7M
    }
12238
21.8M
    if (ctxt->instate == XML_PARSER_EOF)
12239
691k
        return(ctxt->errNo);
12240
12241
21.1M
    if ((ctxt->input != NULL) &&
12242
21.1M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
21.1M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
21.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
21.1M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
630k
        return(ctxt->errNo);
12250
12251
20.5M
    if (remain != 0) {
12252
87.0k
        chunk += size;
12253
87.0k
        size = remain;
12254
87.0k
        remain = 0;
12255
87.0k
        goto xmldecl_done;
12256
87.0k
    }
12257
20.4M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
20.4M
        (ctxt->input->buf != NULL)) {
12259
115k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
115k
           ctxt->input);
12261
115k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
115k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
115k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
115k
            base, current);
12267
115k
    }
12268
20.4M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
490k
  int cur_avail = 0;
12273
12274
490k
  if (ctxt->input != NULL) {
12275
490k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
490k
      else
12279
490k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
490k
                    (ctxt->input->cur - ctxt->input->base);
12281
490k
  }
12282
12283
490k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
490k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
359k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
359k
  }
12287
490k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
2.39k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
2.39k
  }
12290
490k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
490k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
490k
    ctxt->sax->endDocument(ctxt->userData);
12293
490k
  }
12294
490k
  ctxt->instate = XML_PARSER_EOF;
12295
490k
    }
12296
20.4M
    if (ctxt->wellFormed == 0)
12297
6.20M
  return((xmlParserErrors) ctxt->errNo);
12298
14.2M
    else
12299
14.2M
        return(0);
12300
20.4M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
2.00M
                        const char *chunk, int size, const char *filename) {
12330
2.00M
    xmlParserCtxtPtr ctxt;
12331
2.00M
    xmlParserInputPtr inputStream;
12332
2.00M
    xmlParserInputBufferPtr buf;
12333
2.00M
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
2.00M
    if ((chunk != NULL) && (size >= 4))
12339
994k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
2.00M
    buf = xmlAllocParserInputBuffer(enc);
12342
2.00M
    if (buf == NULL) return(NULL);
12343
12344
2.00M
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
2.00M
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
2.00M
    ctxt->dictNames = 1;
12351
2.00M
    if (filename == NULL) {
12352
1.00M
  ctxt->directory = NULL;
12353
1.00M
    } else {
12354
1.00M
        ctxt->directory = xmlParserGetDirectory(filename);
12355
1.00M
    }
12356
12357
2.00M
    inputStream = xmlNewInputStream(ctxt);
12358
2.00M
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
2.00M
    if (filename == NULL)
12365
1.00M
  inputStream->filename = NULL;
12366
1.00M
    else {
12367
1.00M
  inputStream->filename = (char *)
12368
1.00M
      xmlCanonicPath((const xmlChar *) filename);
12369
1.00M
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
1.00M
    }
12376
2.00M
    inputStream->buf = buf;
12377
2.00M
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
2.00M
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
2.00M
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
2.00M
    if ((size != 0) && (chunk != NULL) &&
12388
2.00M
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
994k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
994k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
994k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
994k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
994k
    }
12399
12400
2.00M
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
502k
        xmlSwitchEncoding(ctxt, enc);
12402
502k
    }
12403
12404
2.00M
    return(ctxt);
12405
2.00M
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
1.90M
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
1.90M
    if (ctxt == NULL)
12418
0
        return;
12419
1.90M
    ctxt->instate = XML_PARSER_EOF;
12420
1.90M
    ctxt->disableSAX = 1;
12421
1.94M
    while (ctxt->inputNr > 1)
12422
42.8k
        xmlFreeInputStream(inputPop(ctxt));
12423
1.90M
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
1.90M
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
1.90M
        if (ctxt->input->buf != NULL) {
12433
1.57M
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
1.57M
            ctxt->input->buf = NULL;
12435
1.57M
        }
12436
1.90M
  ctxt->input->cur = BAD_CAST"";
12437
1.90M
        ctxt->input->length = 0;
12438
1.90M
  ctxt->input->base = ctxt->input->cur;
12439
1.90M
        ctxt->input->end = ctxt->input->cur;
12440
1.90M
    }
12441
1.90M
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
1.00M
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
1.00M
    if (ctxt == NULL)
12452
0
        return;
12453
1.00M
    xmlHaltParser(ctxt);
12454
1.00M
    ctxt->errNo = XML_ERR_USER_STOP;
12455
1.00M
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
441k
          const xmlChar *ID, xmlNodePtr *list) {
12832
441k
    xmlParserCtxtPtr ctxt;
12833
441k
    xmlDocPtr newDoc;
12834
441k
    xmlNodePtr newRoot;
12835
441k
    xmlParserErrors ret = XML_ERR_OK;
12836
441k
    xmlChar start[4];
12837
441k
    xmlCharEncoding enc;
12838
12839
441k
    if (((depth > 40) &&
12840
441k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
441k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
441k
    if (list != NULL)
12848
74.2k
        *list = NULL;
12849
441k
    if ((URL == NULL) && (ID == NULL))
12850
1.09k
  return(XML_ERR_INTERNAL_ERROR);
12851
440k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
440k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
440k
                                             oldctxt);
12856
440k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
52.1k
    if (oldctxt != NULL) {
12858
52.1k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
52.1k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
52.1k
    }
12861
52.1k
    xmlDetectSAX2(ctxt);
12862
12863
52.1k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
52.1k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
52.1k
    newDoc->properties = XML_DOC_INTERNAL;
12869
52.1k
    if (doc) {
12870
52.1k
        newDoc->intSubset = doc->intSubset;
12871
52.1k
        newDoc->extSubset = doc->extSubset;
12872
52.1k
        if (doc->dict) {
12873
34.9k
            newDoc->dict = doc->dict;
12874
34.9k
            xmlDictReference(newDoc->dict);
12875
34.9k
        }
12876
52.1k
        if (doc->URL != NULL) {
12877
34.3k
            newDoc->URL = xmlStrdup(doc->URL);
12878
34.3k
        }
12879
52.1k
    }
12880
52.1k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
52.1k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
52.1k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
52.1k
    nodePush(ctxt, newDoc->children);
12891
52.1k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
52.1k
    } else {
12894
52.1k
        ctxt->myDoc = doc;
12895
52.1k
        newRoot->doc = doc;
12896
52.1k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
52.1k
    GROW;
12904
52.1k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
50.0k
  start[0] = RAW;
12906
50.0k
  start[1] = NXT(1);
12907
50.0k
  start[2] = NXT(2);
12908
50.0k
  start[3] = NXT(3);
12909
50.0k
  enc = xmlDetectCharEncoding(start, 4);
12910
50.0k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
7.50k
      xmlSwitchEncoding(ctxt, enc);
12912
7.50k
  }
12913
50.0k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
52.1k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
5.03k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
5.03k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
5.03k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
275
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
275
                           "Version mismatch between document and entity\n");
12927
275
        }
12928
5.03k
    }
12929
12930
52.1k
    ctxt->instate = XML_PARSER_CONTENT;
12931
52.1k
    ctxt->depth = depth;
12932
52.1k
    if (oldctxt != NULL) {
12933
52.1k
  ctxt->_private = oldctxt->_private;
12934
52.1k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
52.1k
  ctxt->validate = oldctxt->validate;
12936
52.1k
  ctxt->valid = oldctxt->valid;
12937
52.1k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
52.1k
        if (oldctxt->validate) {
12939
22.6k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
22.6k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
22.6k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
22.6k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
22.6k
        }
12944
52.1k
  ctxt->external = oldctxt->external;
12945
52.1k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
52.1k
        ctxt->dict = oldctxt->dict;
12947
52.1k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
52.1k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
52.1k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
52.1k
        ctxt->dictNames = oldctxt->dictNames;
12951
52.1k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
52.1k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
52.1k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
52.1k
  ctxt->record_info = oldctxt->record_info;
12955
52.1k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
52.1k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
52.1k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
52.1k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
52.1k
    xmlParseContent(ctxt);
12970
12971
52.1k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
4.20k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
47.9k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
52.1k
    if (ctxt->node != newDoc->children) {
12977
13.4k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
13.4k
    }
12979
12980
52.1k
    if (!ctxt->wellFormed) {
12981
32.3k
  ret = (xmlParserErrors)ctxt->errNo;
12982
32.3k
        if (oldctxt != NULL) {
12983
32.3k
            oldctxt->errNo = ctxt->errNo;
12984
32.3k
            oldctxt->wellFormed = 0;
12985
32.3k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
32.3k
        }
12987
32.3k
    } else {
12988
19.7k
  if (list != NULL) {
12989
6.49k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
6.49k
      cur = newDoc->children->children;
12996
6.49k
      *list = cur;
12997
66.4k
      while (cur != NULL) {
12998
60.0k
    cur->parent = NULL;
12999
60.0k
    cur = cur->next;
13000
60.0k
      }
13001
6.49k
            newDoc->children->children = NULL;
13002
6.49k
  }
13003
19.7k
  ret = XML_ERR_OK;
13004
19.7k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
52.1k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
52.1k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
52.1k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
52.1k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
52.1k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
52.1k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
52.1k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
52.1k
    }
13020
13021
52.1k
    if (oldctxt != NULL) {
13022
52.1k
        ctxt->dict = NULL;
13023
52.1k
        ctxt->attsDefault = NULL;
13024
52.1k
        ctxt->attsSpecial = NULL;
13025
52.1k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
52.1k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
52.1k
        oldctxt->validate = ctxt->validate;
13028
52.1k
        oldctxt->valid = ctxt->valid;
13029
52.1k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
52.1k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
52.1k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
52.1k
    }
13033
52.1k
    ctxt->node_seq.maximum = 0;
13034
52.1k
    ctxt->node_seq.length = 0;
13035
52.1k
    ctxt->node_seq.buffer = NULL;
13036
52.1k
    xmlFreeParserCtxt(ctxt);
13037
52.1k
    newDoc->intSubset = NULL;
13038
52.1k
    newDoc->extSubset = NULL;
13039
52.1k
    xmlFreeDoc(newDoc);
13040
13041
52.1k
    return(ret);
13042
52.1k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
274k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
274k
    xmlParserCtxtPtr ctxt;
13125
274k
    xmlDocPtr newDoc = NULL;
13126
274k
    xmlNodePtr newRoot;
13127
274k
    xmlSAXHandlerPtr oldsax = NULL;
13128
274k
    xmlNodePtr content = NULL;
13129
274k
    xmlNodePtr last = NULL;
13130
274k
    int size;
13131
274k
    xmlParserErrors ret = XML_ERR_OK;
13132
274k
#ifdef SAX2
13133
274k
    int i;
13134
274k
#endif
13135
13136
274k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
274k
        (oldctxt->depth >  100)) {
13138
357
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
357
                       "Maximum entity nesting depth exceeded");
13140
357
  return(XML_ERR_ENTITY_LOOP);
13141
357
    }
13142
13143
13144
273k
    if (lst != NULL)
13145
273k
        *lst = NULL;
13146
273k
    if (string == NULL)
13147
464
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
273k
    size = xmlStrlen(string);
13150
13151
273k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
273k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
272k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
272k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
272k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
272k
    else
13158
272k
  ctxt->userData = ctxt;
13159
272k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
272k
    ctxt->dict = oldctxt->dict;
13161
272k
    ctxt->input_id = oldctxt->input_id;
13162
272k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
272k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
272k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
272k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
272k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
39
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
39
    }
13171
272k
#endif
13172
13173
272k
    oldsax = ctxt->sax;
13174
272k
    ctxt->sax = oldctxt->sax;
13175
272k
    xmlDetectSAX2(ctxt);
13176
272k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
272k
    ctxt->options = oldctxt->options;
13178
13179
272k
    ctxt->_private = oldctxt->_private;
13180
272k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
272k
    } else {
13193
272k
  ctxt->myDoc = oldctxt->myDoc;
13194
272k
        content = ctxt->myDoc->children;
13195
272k
  last = ctxt->myDoc->last;
13196
272k
    }
13197
272k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
272k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
272k
    ctxt->myDoc->children = NULL;
13208
272k
    ctxt->myDoc->last = NULL;
13209
272k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
272k
    nodePush(ctxt, ctxt->myDoc->children);
13211
272k
    ctxt->instate = XML_PARSER_CONTENT;
13212
272k
    ctxt->depth = oldctxt->depth;
13213
13214
272k
    ctxt->validate = 0;
13215
272k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
272k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
207k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
207k
    }
13222
272k
    ctxt->dictNames = oldctxt->dictNames;
13223
272k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
272k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
272k
    xmlParseContent(ctxt);
13227
272k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
858
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
271k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
272k
    if (ctxt->node != ctxt->myDoc->children) {
13233
4.78k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
4.78k
    }
13235
13236
272k
    if (!ctxt->wellFormed) {
13237
29.1k
  ret = (xmlParserErrors)ctxt->errNo;
13238
29.1k
        oldctxt->errNo = ctxt->errNo;
13239
29.1k
        oldctxt->wellFormed = 0;
13240
29.1k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
243k
    } else {
13242
243k
        ret = XML_ERR_OK;
13243
243k
    }
13244
13245
272k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
243k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
243k
  cur = ctxt->myDoc->children->children;
13253
243k
  *lst = cur;
13254
606k
  while (cur != NULL) {
13255
362k
#ifdef LIBXML_VALID_ENABLED
13256
362k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
362k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
362k
    (cur->type == XML_ELEMENT_NODE)) {
13259
31.8k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
31.8k
      oldctxt->myDoc, cur);
13261
31.8k
      }
13262
362k
#endif /* LIBXML_VALID_ENABLED */
13263
362k
      cur->parent = NULL;
13264
362k
      cur = cur->next;
13265
362k
  }
13266
243k
  ctxt->myDoc->children->children = NULL;
13267
243k
    }
13268
272k
    if (ctxt->myDoc != NULL) {
13269
272k
  xmlFreeNode(ctxt->myDoc->children);
13270
272k
        ctxt->myDoc->children = content;
13271
272k
        ctxt->myDoc->last = last;
13272
272k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
272k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
272k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
272k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
272k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
272k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
272k
    }
13285
13286
272k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
272k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
272k
    ctxt->sax = oldsax;
13289
272k
    ctxt->dict = NULL;
13290
272k
    ctxt->attsDefault = NULL;
13291
272k
    ctxt->attsSpecial = NULL;
13292
272k
    xmlFreeParserCtxt(ctxt);
13293
272k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
272k
    return(ret);
13298
272k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
440k
        xmlParserCtxtPtr pctx) {
13783
440k
    xmlParserCtxtPtr ctxt;
13784
440k
    xmlParserInputPtr inputStream;
13785
440k
    char *directory = NULL;
13786
440k
    xmlChar *uri;
13787
13788
440k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
440k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
440k
    if (pctx != NULL) {
13794
440k
        ctxt->options = pctx->options;
13795
440k
        ctxt->_private = pctx->_private;
13796
440k
  ctxt->input_id = pctx->input_id;
13797
440k
    }
13798
13799
    /* Don't read from stdin. */
13800
440k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
440k
    uri = xmlBuildURI(URL, base);
13804
13805
440k
    if (uri == NULL) {
13806
2.28k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
2.28k
  if (inputStream == NULL) {
13808
2.28k
      xmlFreeParserCtxt(ctxt);
13809
2.28k
      return(NULL);
13810
2.28k
  }
13811
13812
0
  inputPush(ctxt, inputStream);
13813
13814
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
0
      directory = xmlParserGetDirectory((char *)URL);
13816
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
0
      ctxt->directory = directory;
13818
437k
    } else {
13819
437k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
437k
  if (inputStream == NULL) {
13821
385k
      xmlFree(uri);
13822
385k
      xmlFreeParserCtxt(ctxt);
13823
385k
      return(NULL);
13824
385k
  }
13825
13826
52.1k
  inputPush(ctxt, inputStream);
13827
13828
52.1k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
52.1k
      directory = xmlParserGetDirectory((char *)uri);
13830
52.1k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
52.1k
      ctxt->directory = directory;
13832
52.1k
  xmlFree(uri);
13833
52.1k
    }
13834
52.1k
    return(ctxt);
13835
440k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
1.27M
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
1.27M
    xmlParserCtxtPtr ctxt;
14178
1.27M
    xmlParserInputPtr input;
14179
1.27M
    xmlParserInputBufferPtr buf;
14180
14181
1.27M
    if (buffer == NULL)
14182
0
  return(NULL);
14183
1.27M
    if (size <= 0)
14184
1.03k
  return(NULL);
14185
14186
1.27M
    ctxt = xmlNewParserCtxt();
14187
1.27M
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
1.27M
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
1.27M
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
1.27M
    input = xmlNewInputStream(ctxt);
14197
1.27M
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
1.27M
    input->filename = NULL;
14204
1.27M
    input->buf = buf;
14205
1.27M
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
1.27M
    inputPush(ctxt, input);
14208
1.27M
    return(ctxt);
14209
1.27M
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
352M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
352M
    if (xmlParserInitialized != 0)
14525
352M
  return;
14526
14527
246
#ifdef LIBXML_THREAD_ENABLED
14528
246
    __xmlGlobalInitMutexLock();
14529
246
    if (xmlParserInitialized == 0) {
14530
246
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
246
  xmlInitThreadsInternal();
14537
246
  xmlInitGlobalsInternal();
14538
246
  xmlInitMemoryInternal();
14539
246
        __xmlInitializeDict();
14540
246
  xmlInitEncodingInternal();
14541
246
  xmlRegisterDefaultInputCallbacks();
14542
246
#ifdef LIBXML_OUTPUT_ENABLED
14543
246
  xmlRegisterDefaultOutputCallbacks();
14544
246
#endif /* LIBXML_OUTPUT_ENABLED */
14545
246
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
246
  xmlInitXPathInternal();
14547
246
#endif
14548
246
  xmlParserInitialized = 1;
14549
246
#ifdef LIBXML_THREAD_ENABLED
14550
246
    }
14551
246
    __xmlGlobalInitMutexUnlock();
14552
246
#endif
14553
246
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
3.00M
{
14843
3.00M
    if (ctxt == NULL)
14844
0
        return(-1);
14845
3.00M
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
3.00M
    if (options & XML_PARSE_RECOVER) {
14851
1.35M
        ctxt->recovery = 1;
14852
1.35M
        options -= XML_PARSE_RECOVER;
14853
1.35M
  ctxt->options |= XML_PARSE_RECOVER;
14854
1.35M
    } else
14855
1.64M
        ctxt->recovery = 0;
14856
3.00M
    if (options & XML_PARSE_DTDLOAD) {
14857
1.92M
        ctxt->loadsubset = XML_DETECT_IDS;
14858
1.92M
        options -= XML_PARSE_DTDLOAD;
14859
1.92M
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
1.92M
    } else
14861
1.08M
        ctxt->loadsubset = 0;
14862
3.00M
    if (options & XML_PARSE_DTDATTR) {
14863
1.03M
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
1.03M
        options -= XML_PARSE_DTDATTR;
14865
1.03M
  ctxt->options |= XML_PARSE_DTDATTR;
14866
1.03M
    }
14867
3.00M
    if (options & XML_PARSE_NOENT) {
14868
1.74M
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
1.74M
        options -= XML_PARSE_NOENT;
14871
1.74M
  ctxt->options |= XML_PARSE_NOENT;
14872
1.74M
    } else
14873
1.26M
        ctxt->replaceEntities = 0;
14874
3.00M
    if (options & XML_PARSE_PEDANTIC) {
14875
332k
        ctxt->pedantic = 1;
14876
332k
        options -= XML_PARSE_PEDANTIC;
14877
332k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
332k
    } else
14879
2.67M
        ctxt->pedantic = 0;
14880
3.00M
    if (options & XML_PARSE_NOBLANKS) {
14881
1.07M
        ctxt->keepBlanks = 0;
14882
1.07M
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
1.07M
        options -= XML_PARSE_NOBLANKS;
14884
1.07M
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
1.07M
    } else
14886
1.93M
        ctxt->keepBlanks = 1;
14887
3.00M
    if (options & XML_PARSE_DTDVALID) {
14888
993k
        ctxt->validate = 1;
14889
993k
        if (options & XML_PARSE_NOWARNING)
14890
518k
            ctxt->vctxt.warning = NULL;
14891
993k
        if (options & XML_PARSE_NOERROR)
14892
751k
            ctxt->vctxt.error = NULL;
14893
993k
        options -= XML_PARSE_DTDVALID;
14894
993k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
993k
    } else
14896
2.01M
        ctxt->validate = 0;
14897
3.00M
    if (options & XML_PARSE_NOWARNING) {
14898
1.22M
        ctxt->sax->warning = NULL;
14899
1.22M
        options -= XML_PARSE_NOWARNING;
14900
1.22M
    }
14901
3.00M
    if (options & XML_PARSE_NOERROR) {
14902
1.62M
        ctxt->sax->error = NULL;
14903
1.62M
        ctxt->sax->fatalError = NULL;
14904
1.62M
        options -= XML_PARSE_NOERROR;
14905
1.62M
    }
14906
3.00M
#ifdef LIBXML_SAX1_ENABLED
14907
3.00M
    if (options & XML_PARSE_SAX1) {
14908
1.04M
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
1.04M
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
1.04M
        ctxt->sax->startElementNs = NULL;
14911
1.04M
        ctxt->sax->endElementNs = NULL;
14912
1.04M
        ctxt->sax->initialized = 1;
14913
1.04M
        options -= XML_PARSE_SAX1;
14914
1.04M
  ctxt->options |= XML_PARSE_SAX1;
14915
1.04M
    }
14916
3.00M
#endif /* LIBXML_SAX1_ENABLED */
14917
3.00M
    if (options & XML_PARSE_NODICT) {
14918
941k
        ctxt->dictNames = 0;
14919
941k
        options -= XML_PARSE_NODICT;
14920
941k
  ctxt->options |= XML_PARSE_NODICT;
14921
2.06M
    } else {
14922
2.06M
        ctxt->dictNames = 1;
14923
2.06M
    }
14924
3.00M
    if (options & XML_PARSE_NOCDATA) {
14925
1.19M
        ctxt->sax->cdataBlock = NULL;
14926
1.19M
        options -= XML_PARSE_NOCDATA;
14927
1.19M
  ctxt->options |= XML_PARSE_NOCDATA;
14928
1.19M
    }
14929
3.00M
    if (options & XML_PARSE_NSCLEAN) {
14930
1.63M
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
1.63M
        options -= XML_PARSE_NSCLEAN;
14932
1.63M
    }
14933
3.00M
    if (options & XML_PARSE_NONET) {
14934
1.03M
  ctxt->options |= XML_PARSE_NONET;
14935
1.03M
        options -= XML_PARSE_NONET;
14936
1.03M
    }
14937
3.00M
    if (options & XML_PARSE_COMPACT) {
14938
1.70M
  ctxt->options |= XML_PARSE_COMPACT;
14939
1.70M
        options -= XML_PARSE_COMPACT;
14940
1.70M
    }
14941
3.00M
    if (options & XML_PARSE_OLD10) {
14942
1.00M
  ctxt->options |= XML_PARSE_OLD10;
14943
1.00M
        options -= XML_PARSE_OLD10;
14944
1.00M
    }
14945
3.00M
    if (options & XML_PARSE_NOBASEFIX) {
14946
1.16M
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
1.16M
        options -= XML_PARSE_NOBASEFIX;
14948
1.16M
    }
14949
3.00M
    if (options & XML_PARSE_HUGE) {
14950
1.01M
  ctxt->options |= XML_PARSE_HUGE;
14951
1.01M
        options -= XML_PARSE_HUGE;
14952
1.01M
        if (ctxt->dict != NULL)
14953
1.01M
            xmlDictSetLimit(ctxt->dict, 0);
14954
1.01M
    }
14955
3.00M
    if (options & XML_PARSE_OLDSAX) {
14956
955k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
955k
        options -= XML_PARSE_OLDSAX;
14958
955k
    }
14959
3.00M
    if (options & XML_PARSE_IGNORE_ENC) {
14960
1.61M
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
1.61M
        options -= XML_PARSE_IGNORE_ENC;
14962
1.61M
    }
14963
3.00M
    if (options & XML_PARSE_BIG_LINES) {
14964
1.18M
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
1.18M
        options -= XML_PARSE_BIG_LINES;
14966
1.18M
    }
14967
3.00M
    ctxt->linenumbers = 1;
14968
3.00M
    return (options);
14969
3.00M
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
2.00M
{
14984
2.00M
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
2.00M
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
1.00M
{
15003
1.00M
    xmlDocPtr ret;
15004
15005
1.00M
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
1.00M
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
1.00M
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
1.00M
        (ctxt->input->filename == NULL))
15015
1.00M
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
1.00M
    xmlParseDocument(ctxt);
15017
1.00M
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
502k
        ret = ctxt->myDoc;
15019
499k
    else {
15020
499k
        ret = NULL;
15021
499k
  if (ctxt->myDoc != NULL) {
15022
446k
      xmlFreeDoc(ctxt->myDoc);
15023
446k
  }
15024
499k
    }
15025
1.00M
    ctxt->myDoc = NULL;
15026
1.00M
    if (!reuse) {
15027
1.00M
  xmlFreeParserCtxt(ctxt);
15028
1.00M
    }
15029
15030
1.00M
    return (ret);
15031
1.00M
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
1.00M
{
15096
1.00M
    xmlParserCtxtPtr ctxt;
15097
15098
1.00M
    xmlInitParser();
15099
1.00M
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
1.00M
    if (ctxt == NULL)
15101
687
        return (NULL);
15102
1.00M
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
1.00M
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387