Coverage Report

Created: 2024-08-27 12:12

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
10.1M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
766
#define XML_PARSER_NON_LINEAR 10
129
130
61.1M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
72.0M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
6.88G
#define XML_PARSER_BUFFER_SIZE 100
147
620k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
35.6M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
31.4k
{
215
31.4k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
31.4k
        (ctxt->instate == XML_PARSER_EOF))
217
114
  return;
218
31.3k
    if (ctxt != NULL)
219
31.3k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
31.3k
    if (prefix == NULL)
222
22.4k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
22.4k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
22.4k
                        (const char *) localname, NULL, NULL, 0, 0,
225
22.4k
                        "Attribute %s redefined\n", localname);
226
8.85k
    else
227
8.85k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
8.85k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
8.85k
                        (const char *) prefix, (const char *) localname,
230
8.85k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
8.85k
                        localname);
232
31.3k
    if (ctxt != NULL) {
233
31.3k
  ctxt->wellFormed = 0;
234
31.3k
  if (ctxt->recovery == 0)
235
15.0k
      ctxt->disableSAX = 1;
236
31.3k
    }
237
31.3k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
2.05M
{
250
2.05M
    const char *errmsg;
251
252
2.05M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
2.05M
        (ctxt->instate == XML_PARSER_EOF))
254
13.1k
  return;
255
2.04M
    switch (error) {
256
21.2k
        case XML_ERR_INVALID_HEX_CHARREF:
257
21.2k
            errmsg = "CharRef: invalid hexadecimal value";
258
21.2k
            break;
259
37.1k
        case XML_ERR_INVALID_DEC_CHARREF:
260
37.1k
            errmsg = "CharRef: invalid decimal value";
261
37.1k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
465k
        case XML_ERR_INTERNAL_ERROR:
266
465k
            errmsg = "internal error";
267
465k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
765k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
765k
            errmsg = "PEReference: expecting ';'";
282
765k
            break;
283
1.53k
        case XML_ERR_ENTITY_LOOP:
284
1.53k
            errmsg = "Detected an entity reference loop";
285
1.53k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
6.03k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
6.03k
            errmsg = "PEReferences forbidden in internal subset";
291
6.03k
            break;
292
1.85k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
1.85k
            errmsg = "EntityValue: \" or ' expected";
294
1.85k
            break;
295
32.2k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
32.2k
            errmsg = "AttValue: \" or ' expected";
297
32.2k
            break;
298
94.5k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
94.5k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
94.5k
            break;
301
12.1k
        case XML_ERR_LITERAL_NOT_STARTED:
302
12.1k
            errmsg = "SystemLiteral \" or ' expected";
303
12.1k
            break;
304
13.9k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
13.9k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
13.9k
            break;
307
14.4k
        case XML_ERR_MISPLACED_CDATA_END:
308
14.4k
            errmsg = "Sequence ']]>' not allowed in content";
309
14.4k
            break;
310
10.5k
        case XML_ERR_URI_REQUIRED:
311
10.5k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
10.5k
            break;
313
1.84k
        case XML_ERR_PUBID_REQUIRED:
314
1.84k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
1.84k
            break;
316
22.4k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
22.4k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
22.4k
            break;
319
12.1k
        case XML_ERR_PI_NOT_STARTED:
320
12.1k
            errmsg = "xmlParsePI : no target name";
321
12.1k
            break;
322
3.16k
        case XML_ERR_RESERVED_XML_NAME:
323
3.16k
            errmsg = "Invalid PI name";
324
3.16k
            break;
325
2.08k
        case XML_ERR_NOTATION_NOT_STARTED:
326
2.08k
            errmsg = "NOTATION: Name expected here";
327
2.08k
            break;
328
10.4k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
10.4k
            errmsg = "'>' required to close NOTATION declaration";
330
10.4k
            break;
331
8.21k
        case XML_ERR_VALUE_REQUIRED:
332
8.21k
            errmsg = "Entity value required";
333
8.21k
            break;
334
2.36k
        case XML_ERR_URI_FRAGMENT:
335
2.36k
            errmsg = "Fragment not allowed";
336
2.36k
            break;
337
8.67k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
8.67k
            errmsg = "'(' required to start ATTLIST enumeration";
339
8.67k
            break;
340
1.66k
        case XML_ERR_NMTOKEN_REQUIRED:
341
1.66k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
1.66k
            break;
343
3.41k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
3.41k
            errmsg = "')' required to finish ATTLIST enumeration";
345
3.41k
            break;
346
2.40k
        case XML_ERR_MIXED_NOT_STARTED:
347
2.40k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
2.40k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
8.08k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
8.08k
            errmsg = "ContentDecl : Name or '(' expected";
354
8.08k
            break;
355
9.21k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
9.21k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
9.21k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
91.3k
        case XML_ERR_GT_REQUIRED:
363
91.3k
            errmsg = "expected '>'";
364
91.3k
            break;
365
126
        case XML_ERR_CONDSEC_INVALID:
366
126
            errmsg = "XML conditional section '[' expected";
367
126
            break;
368
8.99k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
8.99k
            errmsg = "Content error in the external subset";
370
8.99k
            break;
371
828
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
828
            errmsg =
373
828
                "conditional section INCLUDE or IGNORE keyword expected";
374
828
            break;
375
1.35k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
1.35k
            errmsg = "XML conditional section not closed";
377
1.35k
            break;
378
196
        case XML_ERR_XMLDECL_NOT_STARTED:
379
196
            errmsg = "Text declaration '<?xml' required";
380
196
            break;
381
76.5k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
76.5k
            errmsg = "parsing XML declaration: '?>' expected";
383
76.5k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
89.8k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
89.8k
            errmsg = "EntityRef: expecting ';'";
389
89.8k
            break;
390
6.74k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
6.74k
            errmsg = "DOCTYPE improperly terminated";
392
6.74k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
5.81k
        case XML_ERR_EQUAL_REQUIRED:
397
5.81k
            errmsg = "expected '='";
398
5.81k
            break;
399
18.2k
        case XML_ERR_STRING_NOT_CLOSED:
400
18.2k
            errmsg = "String not closed expecting \" or '";
401
18.2k
            break;
402
5.15k
        case XML_ERR_STRING_NOT_STARTED:
403
5.15k
            errmsg = "String not started expecting ' or \"";
404
5.15k
            break;
405
2.28k
        case XML_ERR_ENCODING_NAME:
406
2.28k
            errmsg = "Invalid XML encoding name";
407
2.28k
            break;
408
720
        case XML_ERR_STANDALONE_VALUE:
409
720
            errmsg = "standalone accepts only 'yes' or 'no'";
410
720
            break;
411
16.8k
        case XML_ERR_DOCUMENT_EMPTY:
412
16.8k
            errmsg = "Document is empty";
413
16.8k
            break;
414
104k
        case XML_ERR_DOCUMENT_END:
415
104k
            errmsg = "Extra content at the end of the document";
416
104k
            break;
417
17.4k
        case XML_ERR_NOT_WELL_BALANCED:
418
17.4k
            errmsg = "chunk is not well balanced";
419
17.4k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
18.8k
        case XML_ERR_VERSION_MISSING:
424
18.8k
            errmsg = "Malformed declaration expecting version";
425
18.8k
            break;
426
157
        case XML_ERR_NAME_TOO_LONG:
427
157
            errmsg = "Name too long";
428
157
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
2.22k
        default:
435
2.22k
            errmsg = "Unregistered error message";
436
2.04M
    }
437
2.04M
    if (ctxt != NULL)
438
2.04M
  ctxt->errNo = error;
439
2.04M
    if (info == NULL) {
440
1.57M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
1.57M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
1.57M
                        errmsg);
443
1.57M
    } else {
444
465k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
465k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
465k
                        errmsg, info);
447
465k
    }
448
2.04M
    if (ctxt != NULL) {
449
2.04M
  ctxt->wellFormed = 0;
450
2.04M
  if (ctxt->recovery == 0)
451
604k
      ctxt->disableSAX = 1;
452
2.04M
    }
453
2.04M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
1.62M
{
467
1.62M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
1.62M
        (ctxt->instate == XML_PARSER_EOF))
469
485
  return;
470
1.62M
    if (ctxt != NULL)
471
1.62M
  ctxt->errNo = error;
472
1.62M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
1.62M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
1.62M
    if (ctxt != NULL) {
475
1.62M
  ctxt->wellFormed = 0;
476
1.62M
  if (ctxt->recovery == 0)
477
473k
      ctxt->disableSAX = 1;
478
1.62M
    }
479
1.62M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
5.75M
{
495
5.75M
    xmlStructuredErrorFunc schannel = NULL;
496
497
5.75M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
5.75M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
5.75M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
5.75M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
3.97M
        schannel = ctxt->sax->serror;
503
5.75M
    if (ctxt != NULL) {
504
5.75M
        __xmlRaiseError(schannel,
505
5.75M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
5.75M
                    ctxt->userData,
507
5.75M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
5.75M
                    XML_ERR_WARNING, NULL, 0,
509
5.75M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
5.75M
        msg, (const char *) str1, (const char *) str2);
511
5.75M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
5.75M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
1.48M
{
533
1.48M
    xmlStructuredErrorFunc schannel = NULL;
534
535
1.48M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
1.48M
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
1.48M
    if (ctxt != NULL) {
539
1.48M
  ctxt->errNo = error;
540
1.48M
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
643k
      schannel = ctxt->sax->serror;
542
1.48M
    }
543
1.48M
    if (ctxt != NULL) {
544
1.48M
        __xmlRaiseError(schannel,
545
1.48M
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
1.48M
                    ctxt, NULL, XML_FROM_DTD, error,
547
1.48M
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
1.48M
        (const char *) str2, NULL, 0, 0,
549
1.48M
        msg, (const char *) str1, (const char *) str2);
550
1.48M
  ctxt->valid = 0;
551
1.48M
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
1.48M
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
1.37M
{
573
1.37M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
1.37M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
1.37M
    if (ctxt != NULL)
577
1.37M
  ctxt->errNo = error;
578
1.37M
    __xmlRaiseError(NULL, NULL, NULL,
579
1.37M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
1.37M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
1.37M
    if (ctxt != NULL) {
582
1.37M
  ctxt->wellFormed = 0;
583
1.37M
  if (ctxt->recovery == 0)
584
251k
      ctxt->disableSAX = 1;
585
1.37M
    }
586
1.37M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
512k
{
604
512k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
512k
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
512k
    if (ctxt != NULL)
608
512k
  ctxt->errNo = error;
609
512k
    __xmlRaiseError(NULL, NULL, NULL,
610
512k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
512k
                    NULL, 0, (const char *) str1, (const char *) str2,
612
512k
        NULL, val, 0, msg, str1, val, str2);
613
512k
    if (ctxt != NULL) {
614
512k
  ctxt->wellFormed = 0;
615
512k
  if (ctxt->recovery == 0)
616
175k
      ctxt->disableSAX = 1;
617
512k
    }
618
512k
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
3.43M
{
633
3.43M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
3.43M
        (ctxt->instate == XML_PARSER_EOF))
635
46
  return;
636
3.43M
    if (ctxt != NULL)
637
3.43M
  ctxt->errNo = error;
638
3.43M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
3.43M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
3.43M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
3.43M
                    val);
642
3.43M
    if (ctxt != NULL) {
643
3.43M
  ctxt->wellFormed = 0;
644
3.43M
  if (ctxt->recovery == 0)
645
508k
      ctxt->disableSAX = 1;
646
3.43M
    }
647
3.43M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
443k
{
662
443k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
443k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
443k
    if (ctxt != NULL)
666
443k
  ctxt->errNo = error;
667
443k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
443k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
443k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
443k
                    val);
671
443k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
392k
{
689
392k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
392k
        (ctxt->instate == XML_PARSER_EOF))
691
133
  return;
692
392k
    if (ctxt != NULL)
693
392k
  ctxt->errNo = error;
694
392k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
392k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
392k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
392k
                    info1, info2, info3);
698
392k
    if (ctxt != NULL)
699
392k
  ctxt->nsWellFormed = 0;
700
392k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
8.98k
{
718
8.98k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
8.98k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
8.98k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
8.98k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
8.98k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
8.98k
                    info1, info2, info3);
725
8.98k
}
726
727
static void
728
217M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
217M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
217M
    else
732
217M
        *dst += val;
733
217M
}
734
735
static void
736
62.4M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
62.4M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
62.4M
    else
740
62.4M
        *dst += val;
741
62.4M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
61.1M
{
770
61.1M
    unsigned long consumed;
771
61.1M
    xmlParserInputPtr input = ctxt->input;
772
61.1M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
61.1M
    consumed = input->parentConsumed;
779
61.1M
    if ((entity == NULL) ||
780
61.1M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
31.9M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
31.9M
        xmlSaturatedAdd(&consumed, input->consumed);
783
31.9M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
31.9M
    }
785
61.1M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
61.1M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
61.1M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
61.1M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
61.1M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
766
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
766
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
766
                       "Maximum entity amplification factor exceeded");
803
766
        xmlHaltParser(ctxt);
804
766
        return(1);
805
766
    }
806
807
61.1M
    return(0);
808
61.1M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
886k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
886k
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
886k
    (void) sax;
1048
1049
886k
    if (ctxt == NULL) return;
1050
886k
    sax = ctxt->sax;
1051
886k
#ifdef LIBXML_SAX1_ENABLED
1052
886k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
886k
        ((sax->startElementNs != NULL) ||
1054
544k
         (sax->endElementNs != NULL) ||
1055
544k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
544k
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
886k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
886k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
886k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
886k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
886k
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
886k
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
155k
{
1103
155k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
181k
    while (*src == 0x20) src++;
1107
2.50M
    while (*src != 0) {
1108
2.35M
  if (*src == 0x20) {
1109
519k
      while (*src == 0x20) src++;
1110
179k
      if (*src != 0)
1111
164k
    *dst++ = 0x20;
1112
2.17M
  } else {
1113
2.17M
      *dst++ = *src++;
1114
2.17M
  }
1115
2.35M
    }
1116
155k
    *dst = 0;
1117
155k
    if (dst == src)
1118
133k
       return(NULL);
1119
22.1k
    return(dst);
1120
155k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
26.5k
{
1136
26.5k
    int i;
1137
26.5k
    int remove_head = 0;
1138
26.5k
    int need_realloc = 0;
1139
26.5k
    const xmlChar *cur;
1140
1141
26.5k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
26.5k
    i = *len;
1144
26.5k
    if (i <= 0)
1145
1.43k
        return(NULL);
1146
1147
25.0k
    cur = src;
1148
39.3k
    while (*cur == 0x20) {
1149
14.2k
        cur++;
1150
14.2k
  remove_head++;
1151
14.2k
    }
1152
790k
    while (*cur != 0) {
1153
771k
  if (*cur == 0x20) {
1154
48.4k
      cur++;
1155
48.4k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
5.97k
          need_realloc = 1;
1157
5.97k
    break;
1158
5.97k
      }
1159
48.4k
  } else
1160
722k
      cur++;
1161
771k
    }
1162
25.0k
    if (need_realloc) {
1163
5.97k
        xmlChar *ret;
1164
1165
5.97k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
5.97k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
5.97k
  xmlAttrNormalizeSpace(ret, ret);
1171
5.97k
  *len = strlen((const char *)ret);
1172
5.97k
        return(ret);
1173
19.1k
    } else if (remove_head) {
1174
1.19k
        *len -= remove_head;
1175
1.19k
        memmove(src, src + remove_head, 1 + *len);
1176
1.19k
  return(src);
1177
1.19k
    }
1178
17.9k
    return(NULL);
1179
25.0k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
187k
               const xmlChar *value) {
1195
187k
    xmlDefAttrsPtr defaults;
1196
187k
    int len;
1197
187k
    const xmlChar *name;
1198
187k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
187k
    if (ctxt->attsSpecial != NULL) {
1204
165k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
47.2k
      return;
1206
165k
    }
1207
1208
140k
    if (ctxt->attsDefault == NULL) {
1209
29.2k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
29.2k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
29.2k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
140k
    name = xmlSplitQName3(fullname, &len);
1219
140k
    if (name == NULL) {
1220
132k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
132k
  prefix = NULL;
1222
132k
    } else {
1223
8.69k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
8.69k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
8.69k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
140k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
140k
    if (defaults == NULL) {
1232
78.1k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
78.1k
                     (4 * 5) * sizeof(const xmlChar *));
1234
78.1k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
78.1k
  defaults->nbAttrs = 0;
1237
78.1k
  defaults->maxAttrs = 4;
1238
78.1k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
78.1k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
78.1k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
3.83k
        xmlDefAttrsPtr temp;
1245
1246
3.83k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
3.83k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
3.83k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
3.83k
  defaults = temp;
1251
3.83k
  defaults->maxAttrs *= 2;
1252
3.83k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
3.83k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
3.83k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
140k
    name = xmlSplitQName3(fullattr, &len);
1264
140k
    if (name == NULL) {
1265
120k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
120k
  prefix = NULL;
1267
120k
    } else {
1268
20.3k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
20.3k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
20.3k
    }
1271
1272
140k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
140k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
140k
    len = xmlStrlen(value);
1276
140k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
140k
    if (value == NULL)
1278
0
        goto mem_error;
1279
140k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
140k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
140k
    if (ctxt->external)
1282
30.7k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
109k
    else
1284
109k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
140k
    defaults->nbAttrs++;
1286
1287
140k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
140k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
2.53M
{
1309
2.53M
    if (ctxt->attsSpecial == NULL) {
1310
51.6k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
51.6k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
51.6k
    }
1314
1315
2.53M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
769k
        return;
1317
1318
1.76M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
1.76M
                     (void *) (ptrdiff_t) type);
1320
1.76M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
2.53M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
1.25M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
1.25M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
1.25M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
517k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
517k
    }
1341
1.25M
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
228k
{
1354
228k
    if (ctxt->attsSpecial == NULL)
1355
187k
        return;
1356
1357
40.4k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
40.4k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
11.0k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
11.0k
        ctxt->attsSpecial = NULL;
1362
11.0k
    }
1363
40.4k
    return;
1364
228k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
23.8k
{
1427
23.8k
    const xmlChar *cur = lang, *nxt;
1428
1429
23.8k
    if (cur == NULL)
1430
481
        return (0);
1431
23.3k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
23.3k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
23.3k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
23.3k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
1.70k
        cur += 2;
1441
10.8k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
10.8k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
9.10k
            cur++;
1444
1.70k
        return(cur[0] == 0);
1445
1.70k
    }
1446
21.6k
    nxt = cur;
1447
79.9k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
79.9k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
58.2k
           nxt++;
1450
21.6k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
933
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
716
            return(0);
1456
217
        return(1);
1457
933
    }
1458
20.7k
    if (nxt - cur < 2)
1459
1.11k
        return(0);
1460
    /* we got an ISO 639 code */
1461
19.6k
    if (nxt[0] == 0)
1462
6.42k
        return(1);
1463
13.2k
    if (nxt[0] != '-')
1464
1.24k
        return(0);
1465
1466
11.9k
    nxt++;
1467
11.9k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
11.9k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
1.57k
        goto region_m49;
1471
1472
47.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
47.5k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
37.1k
           nxt++;
1475
10.3k
    if (nxt - cur == 4)
1476
3.31k
        goto script;
1477
7.07k
    if (nxt - cur == 2)
1478
2.13k
        goto region;
1479
4.94k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
934
        goto variant;
1481
4.00k
    if (nxt - cur != 3)
1482
1.18k
        return(0);
1483
    /* we parsed an extlang */
1484
2.81k
    if (nxt[0] == 0)
1485
409
        return(1);
1486
2.41k
    if (nxt[0] != '-')
1487
502
        return(0);
1488
1489
1.90k
    nxt++;
1490
1.90k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
1.90k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
295
        goto region_m49;
1494
1495
24.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
24.5k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
22.9k
           nxt++;
1498
1.61k
    if (nxt - cur == 2)
1499
444
        goto region;
1500
1.16k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
263
        goto variant;
1502
906
    if (nxt - cur != 4)
1503
569
        return(0);
1504
    /* we parsed a script */
1505
3.65k
script:
1506
3.65k
    if (nxt[0] == 0)
1507
544
        return(1);
1508
3.10k
    if (nxt[0] != '-')
1509
885
        return(0);
1510
1511
2.22k
    nxt++;
1512
2.22k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
2.22k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
745
        goto region_m49;
1516
1517
12.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
12.5k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
11.0k
           nxt++;
1520
1521
1.47k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
426
        goto variant;
1523
1.05k
    if (nxt - cur != 2)
1524
665
        return(0);
1525
    /* we parsed a region */
1526
4.31k
region:
1527
4.31k
    if (nxt[0] == 0)
1528
1.31k
        return(1);
1529
2.99k
    if (nxt[0] != '-')
1530
1.89k
        return(0);
1531
1532
1.10k
    nxt++;
1533
1.10k
    cur = nxt;
1534
    /* now we can just have a variant */
1535
11.2k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
11.2k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
10.1k
           nxt++;
1538
1539
1.10k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
530
        return(0);
1541
1542
    /* we parsed a variant */
1543
2.19k
variant:
1544
2.19k
    if (nxt[0] == 0)
1545
221
        return(1);
1546
1.97k
    if (nxt[0] != '-')
1547
1.41k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
559
    return (1);
1550
1551
2.61k
region_m49:
1552
2.61k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
2.61k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
1.34k
        nxt += 3;
1555
1.34k
        goto region;
1556
1.34k
    }
1557
1.27k
    return(0);
1558
2.61k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
153k
{
1584
153k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
86.0k
        int i;
1586
170k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
116k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
32.7k
          if (ctxt->nsTab[i + 1] == URL)
1590
9.94k
        return(-2);
1591
    /* out of scope keep it */
1592
22.8k
    break;
1593
32.7k
      }
1594
116k
  }
1595
86.0k
    }
1596
143k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
47.2k
  ctxt->nsMax = 10;
1598
47.2k
  ctxt->nsNr = 0;
1599
47.2k
  ctxt->nsTab = (const xmlChar **)
1600
47.2k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
47.2k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
96.1k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
4.86k
        const xmlChar ** tmp;
1608
4.86k
        ctxt->nsMax *= 2;
1609
4.86k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
4.86k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
4.86k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
4.86k
  ctxt->nsTab = tmp;
1617
4.86k
    }
1618
143k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
143k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
143k
    return (ctxt->nsNr);
1621
143k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
49.8k
{
1634
49.8k
    int i;
1635
1636
49.8k
    if (ctxt->nsTab == NULL) return(0);
1637
49.8k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
49.8k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
190k
    for (i = 0;i < nr;i++) {
1645
140k
         ctxt->nsNr--;
1646
140k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
140k
    }
1648
49.8k
    return(nr);
1649
49.8k
}
1650
#endif
1651
1652
static int
1653
109k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
109k
    const xmlChar **atts;
1655
109k
    int *attallocs;
1656
109k
    int maxatts;
1657
1658
109k
    if (nr + 5 > ctxt->maxatts) {
1659
109k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
109k
  atts = (const xmlChar **) xmlMalloc(
1661
109k
             maxatts * sizeof(const xmlChar *));
1662
109k
  if (atts == NULL) goto mem_error;
1663
109k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
109k
                               (maxatts / 5) * sizeof(int));
1665
109k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
109k
        if (ctxt->maxatts > 0)
1670
535
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
109k
        xmlFree(ctxt->atts);
1672
109k
  ctxt->atts = atts;
1673
109k
  ctxt->attallocs = attallocs;
1674
109k
  ctxt->maxatts = maxatts;
1675
109k
    }
1676
109k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
109k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
29.8M
{
1694
29.8M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
29.8M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
1.63k
        size_t newSize = ctxt->inputMax * 2;
1698
1.63k
        xmlParserInputPtr *tmp;
1699
1700
1.63k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
1.63k
                                               newSize * sizeof(*tmp));
1702
1.63k
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
1.63k
        ctxt->inputTab = tmp;
1707
1.63k
        ctxt->inputMax = newSize;
1708
1.63k
    }
1709
29.8M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
29.8M
    ctxt->input = value;
1711
29.8M
    return (ctxt->inputNr++);
1712
29.8M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
32.2M
{
1724
32.2M
    xmlParserInputPtr ret;
1725
1726
32.2M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
32.2M
    if (ctxt->inputNr <= 0)
1729
2.43M
        return (NULL);
1730
29.8M
    ctxt->inputNr--;
1731
29.8M
    if (ctxt->inputNr > 0)
1732
29.2M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
625k
    else
1734
625k
        ctxt->input = NULL;
1735
29.8M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
29.8M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
29.8M
    return (ret);
1738
32.2M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
9.26M
{
1751
9.26M
    if (ctxt == NULL) return(0);
1752
9.26M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
10.2k
        xmlNodePtr *tmp;
1754
1755
10.2k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
10.2k
                                      ctxt->nodeMax * 2 *
1757
10.2k
                                      sizeof(ctxt->nodeTab[0]));
1758
10.2k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
10.2k
        ctxt->nodeTab = tmp;
1763
10.2k
  ctxt->nodeMax *= 2;
1764
10.2k
    }
1765
9.26M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
9.26M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
9.26M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
9.26M
    ctxt->node = value;
1775
9.26M
    return (ctxt->nodeNr++);
1776
9.26M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
8.74M
{
1789
8.74M
    xmlNodePtr ret;
1790
1791
8.74M
    if (ctxt == NULL) return(NULL);
1792
8.74M
    if (ctxt->nodeNr <= 0)
1793
115k
        return (NULL);
1794
8.62M
    ctxt->nodeNr--;
1795
8.62M
    if (ctxt->nodeNr > 0)
1796
8.27M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
356k
    else
1798
356k
        ctxt->node = NULL;
1799
8.62M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
8.62M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
8.62M
    return (ret);
1802
8.74M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
7.82M
{
1821
7.82M
    xmlStartTag *tag;
1822
1823
7.82M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
23.5k
        const xmlChar * *tmp;
1825
23.5k
        xmlStartTag *tmp2;
1826
23.5k
        ctxt->nameMax *= 2;
1827
23.5k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
23.5k
                                    ctxt->nameMax *
1829
23.5k
                                    sizeof(ctxt->nameTab[0]));
1830
23.5k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
23.5k
  ctxt->nameTab = tmp;
1835
23.5k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
23.5k
                                    ctxt->nameMax *
1837
23.5k
                                    sizeof(ctxt->pushTab[0]));
1838
23.5k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
23.5k
  ctxt->pushTab = tmp2;
1843
7.80M
    } else if (ctxt->pushTab == NULL) {
1844
344k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
344k
                                            sizeof(ctxt->pushTab[0]));
1846
344k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
344k
    }
1849
7.82M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
7.82M
    ctxt->name = value;
1851
7.82M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
7.82M
    tag->prefix = prefix;
1853
7.82M
    tag->URI = URI;
1854
7.82M
    tag->line = line;
1855
7.82M
    tag->nsNr = nsNr;
1856
7.82M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
7.82M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
1.64M
{
1873
1.64M
    const xmlChar *ret;
1874
1875
1.64M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
1.64M
    ctxt->nameNr--;
1878
1.64M
    if (ctxt->nameNr > 0)
1879
1.60M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
32.2k
    else
1881
32.2k
        ctxt->name = NULL;
1882
1.64M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
1.64M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
1.64M
    return (ret);
1885
1.64M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
5.33M
{
1931
5.33M
    const xmlChar *ret;
1932
1933
5.33M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
5.33M
    ctxt->nameNr--;
1936
5.33M
    if (ctxt->nameNr > 0)
1937
5.05M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
276k
    else
1939
276k
        ctxt->name = NULL;
1940
5.33M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
5.33M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
5.33M
    return (ret);
1943
5.33M
}
1944
1945
10.6M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
10.6M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
15.8k
        int *tmp;
1948
1949
15.8k
  ctxt->spaceMax *= 2;
1950
15.8k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
15.8k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
15.8k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
15.8k
  ctxt->spaceTab = tmp;
1958
15.8k
    }
1959
10.6M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
10.6M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
10.6M
    return(ctxt->spaceNr++);
1962
10.6M
}
1963
1964
10.1M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
10.1M
    int ret;
1966
10.1M
    if (ctxt->spaceNr <= 0) return(0);
1967
10.1M
    ctxt->spaceNr--;
1968
10.1M
    if (ctxt->spaceNr > 0)
1969
10.1M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
58.9k
    else
1971
58.9k
        ctxt->space = &ctxt->spaceTab[0];
1972
10.1M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
10.1M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
10.1M
    return(ret);
1975
10.1M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
405M
#define RAW (*ctxt->input->cur)
2013
333M
#define CUR (*ctxt->input->cur)
2014
266M
#define NXT(val) ctxt->input->cur[(val)]
2015
21.8M
#define CUR_PTR ctxt->input->cur
2016
552k
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
99.1M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
49.7M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
90.1M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
75.5M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
63.8M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
50.8M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
21.9M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
21.9M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
128k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
128k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
113M
#define SKIP(val) do {             \
2037
113M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
113M
    if (*ctxt->input->cur == 0)           \
2039
113M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
113M
  } while (0)
2041
2042
51.1k
#define SKIPL(val) do {             \
2043
51.1k
    int skipl;                \
2044
12.0M
    for(skipl=0; skipl<val; skipl++) {         \
2045
11.9M
  if (*(ctxt->input->cur) == '\n') {       \
2046
239k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
11.7M
  } else ctxt->input->col++;         \
2048
11.9M
  ctxt->input->cur++;           \
2049
11.9M
    }                  \
2050
51.1k
    if (*ctxt->input->cur == 0)           \
2051
51.1k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
51.1k
  } while (0)
2053
2054
114M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
114M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
114M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
114M
  xmlSHRINK (ctxt);
2058
2059
1.66M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
1.66M
    if ((ctxt->input->buf) &&
2062
1.66M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
8.66k
        xmlParserInputShrink(ctxt->input);
2064
1.66M
    if (*ctxt->input->cur == 0)
2065
70.2k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
1.66M
}
2067
2068
419M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
419M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
419M
  xmlGROW (ctxt);
2071
2072
72.1M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
72.1M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
72.1M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
72.1M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
72.1M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
72.1M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
72.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
72.1M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
72.1M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
72.1M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
72.1M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
943k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
72.1M
}
2095
2096
116M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
247M
#define NEXT xmlNextChar(ctxt)
2099
2100
18.1M
#define NEXT1 {               \
2101
18.1M
  ctxt->input->col++;           \
2102
18.1M
  ctxt->input->cur++;           \
2103
18.1M
  if (*ctxt->input->cur == 0)         \
2104
18.1M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
18.1M
    }
2106
2107
241M
#define NEXTL(l) do {             \
2108
241M
    if (*(ctxt->input->cur) == '\n') {         \
2109
4.94M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
236M
    } else ctxt->input->col++;           \
2111
241M
    ctxt->input->cur += l;        \
2112
241M
  } while (0)
2113
2114
250M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
2.36G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
2.51G
    if (l == 1) b[i++] = v;           \
2119
2.51G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
116M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
116M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
116M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
116M
        (ctxt->instate == XML_PARSER_START)) {
2141
46.4M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
46.4M
  cur = ctxt->input->cur;
2146
46.4M
  while (IS_BLANK_CH(*cur)) {
2147
19.5M
      if (*cur == '\n') {
2148
952k
    ctxt->input->line++; ctxt->input->col = 1;
2149
18.5M
      } else {
2150
18.5M
    ctxt->input->col++;
2151
18.5M
      }
2152
19.5M
      cur++;
2153
19.5M
      if (res < INT_MAX)
2154
19.5M
    res++;
2155
19.5M
      if (*cur == 0) {
2156
79.0k
    ctxt->input->cur = cur;
2157
79.0k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
79.0k
    cur = ctxt->input->cur;
2159
79.0k
      }
2160
19.5M
  }
2161
46.4M
  ctxt->input->cur = cur;
2162
70.2M
    } else {
2163
70.2M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
240M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
240M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
104M
    NEXT;
2168
136M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
37.4M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
574k
                    break;
2174
36.8M
          xmlParsePEReference(ctxt);
2175
98.8M
            } else if (CUR == 0) {
2176
29.2M
                unsigned long consumed;
2177
29.2M
                xmlEntityPtr ent;
2178
2179
29.2M
                if (ctxt->inputNr <= 1)
2180
34.2k
                    break;
2181
2182
29.2M
                consumed = ctxt->input->consumed;
2183
29.2M
                xmlSaturatedAddSizeT(&consumed,
2184
29.2M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
29.2M
                ent = ctxt->input->entity;
2191
29.2M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
29.2M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
18.0k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
18.0k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
18.0k
                }
2197
2198
29.2M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
29.2M
                xmlPopInput(ctxt);
2201
69.5M
            } else {
2202
69.5M
                break;
2203
69.5M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
170M
      if (res < INT_MAX)
2213
170M
    res++;
2214
170M
        }
2215
70.2M
    }
2216
116M
    return(res);
2217
116M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
29.2M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
29.2M
    xmlParserInputPtr input;
2237
2238
29.2M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
29.2M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
29.2M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
29.2M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
29.2M
    input = inputPop(ctxt);
2247
29.2M
    if (input->entity != NULL)
2248
29.2M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
29.2M
    xmlFreeInputStream(input);
2250
29.2M
    if (*ctxt->input->cur == 0)
2251
13.8M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
29.2M
    return(CUR);
2253
29.2M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
29.2M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
29.2M
    int ret;
2267
29.2M
    if (input == NULL) return(-1);
2268
2269
29.2M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
29.2M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
29.2M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
29.2M
    ret = inputPush(ctxt, input);
2285
29.2M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
29.2M
    GROW;
2288
29.2M
    return(ret);
2289
29.2M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
487k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
487k
    int val = 0;
2311
487k
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
487k
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
487k
        (NXT(2) == 'x')) {
2318
192k
  SKIP(3);
2319
192k
  GROW;
2320
643k
  while (RAW != ';') { /* loop blocked by count */
2321
470k
      if (count++ > 20) {
2322
20.4k
    count = 0;
2323
20.4k
    GROW;
2324
20.4k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
20.4k
      }
2327
470k
      if ((RAW >= '0') && (RAW <= '9'))
2328
291k
          val = val * 16 + (CUR - '0');
2329
178k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
139k
          val = val * 16 + (CUR - 'a') + 10;
2331
39.1k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
19.6k
          val = val * 16 + (CUR - 'A') + 10;
2333
19.5k
      else {
2334
19.5k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
19.5k
    val = 0;
2336
19.5k
    break;
2337
19.5k
      }
2338
450k
      if (val > 0x110000)
2339
225k
          val = 0x110000;
2340
2341
450k
      NEXT;
2342
450k
      count++;
2343
450k
  }
2344
192k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
172k
      ctxt->input->col++;
2347
172k
      ctxt->input->cur++;
2348
172k
  }
2349
295k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
295k
  SKIP(2);
2351
295k
  GROW;
2352
1.30M
  while (RAW != ';') { /* loop blocked by count */
2353
1.04M
      if (count++ > 20) {
2354
22.4k
    count = 0;
2355
22.4k
    GROW;
2356
22.4k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
22.4k
      }
2359
1.04M
      if ((RAW >= '0') && (RAW <= '9'))
2360
1.01M
          val = val * 10 + (CUR - '0');
2361
33.5k
      else {
2362
33.5k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
33.5k
    val = 0;
2364
33.5k
    break;
2365
33.5k
      }
2366
1.01M
      if (val > 0x110000)
2367
244k
          val = 0x110000;
2368
2369
1.01M
      NEXT;
2370
1.01M
      count++;
2371
1.01M
  }
2372
295k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
261k
      ctxt->input->col++;
2375
261k
      ctxt->input->cur++;
2376
261k
  }
2377
295k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
487k
    if (val >= 0x110000) {
2389
1.58k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
1.58k
                "xmlParseCharRef: character reference out of bounds\n",
2391
1.58k
          val);
2392
485k
    } else if (IS_CHAR(val)) {
2393
427k
        return(val);
2394
427k
    } else {
2395
58.3k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
58.3k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
58.3k
                    val);
2398
58.3k
    }
2399
59.9k
    return(0);
2400
487k
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
252k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
252k
    const xmlChar *ptr;
2423
252k
    xmlChar cur;
2424
252k
    int val = 0;
2425
2426
252k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
252k
    ptr = *str;
2428
252k
    cur = *ptr;
2429
252k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
43.3k
  ptr += 3;
2431
43.3k
  cur = *ptr;
2432
111k
  while (cur != ';') { /* Non input consuming loop */
2433
69.7k
      if ((cur >= '0') && (cur <= '9'))
2434
27.0k
          val = val * 16 + (cur - '0');
2435
42.6k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
9.98k
          val = val * 16 + (cur - 'a') + 10;
2437
32.7k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
30.9k
          val = val * 16 + (cur - 'A') + 10;
2439
1.71k
      else {
2440
1.71k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
1.71k
    val = 0;
2442
1.71k
    break;
2443
1.71k
      }
2444
67.9k
      if (val > 0x110000)
2445
16.4k
          val = 0x110000;
2446
2447
67.9k
      ptr++;
2448
67.9k
      cur = *ptr;
2449
67.9k
  }
2450
43.3k
  if (cur == ';')
2451
41.5k
      ptr++;
2452
209k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
209k
  ptr += 2;
2454
209k
  cur = *ptr;
2455
711k
  while (cur != ';') { /* Non input consuming loops */
2456
505k
      if ((cur >= '0') && (cur <= '9'))
2457
501k
          val = val * 10 + (cur - '0');
2458
3.59k
      else {
2459
3.59k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
3.59k
    val = 0;
2461
3.59k
    break;
2462
3.59k
      }
2463
501k
      if (val > 0x110000)
2464
2.46k
          val = 0x110000;
2465
2466
501k
      ptr++;
2467
501k
      cur = *ptr;
2468
501k
  }
2469
209k
  if (cur == ';')
2470
206k
      ptr++;
2471
209k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
252k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
252k
    if (val >= 0x110000) {
2483
386
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
386
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
386
                val);
2486
252k
    } else if (IS_CHAR(val)) {
2487
246k
        return(val);
2488
246k
    } else {
2489
6.56k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
6.56k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
6.56k
        val);
2492
6.56k
    }
2493
6.95k
    return(0);
2494
252k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
1.86M
#define growBuffer(buffer, n) {           \
2593
1.86M
    xmlChar *tmp;             \
2594
1.86M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
1.86M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
1.86M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
1.86M
    if (tmp == NULL) goto mem_error;         \
2598
1.86M
    buffer = tmp;             \
2599
1.86M
    buffer##_size = new_size;                                           \
2600
1.86M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
30.4M
                           int check) {
2617
30.4M
    xmlChar *buffer = NULL;
2618
30.4M
    size_t buffer_size = 0;
2619
30.4M
    size_t nbchars = 0;
2620
2621
30.4M
    xmlChar *current = NULL;
2622
30.4M
    xmlChar *rep = NULL;
2623
30.4M
    const xmlChar *last;
2624
30.4M
    xmlEntityPtr ent;
2625
30.4M
    int c,l;
2626
2627
30.4M
    if (str == NULL)
2628
15.2k
        return(NULL);
2629
30.4M
    last = str + len;
2630
2631
30.4M
    if (((ctxt->depth > 40) &&
2632
30.4M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
30.4M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
30.4M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
30.4M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
30.4M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
30.4M
    if (str < last)
2651
30.3M
  c = CUR_SCHAR(str, l);
2652
87.0k
    else
2653
87.0k
        c = 0;
2654
1.78G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
1.78G
           (c != end2) && (c != end3) &&
2656
1.78G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
1.75G
  if (c == 0) break;
2659
1.75G
        if ((c == '&') && (str[1] == '#')) {
2660
252k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
252k
      if (val == 0)
2662
6.95k
                goto int_error;
2663
246k
      COPY_BUF(0,buffer,nbchars,val);
2664
246k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
288
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
288
      }
2667
1.75G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
29.4M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
29.4M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
29.4M
      if ((ent != NULL) &&
2674
29.4M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
160k
    if (ent->content != NULL) {
2676
160k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
160k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
8.03k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
8.03k
        }
2680
160k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
29.2M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
27.5M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
209
                    goto int_error;
2688
2689
27.5M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
376
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
376
                    xmlHaltParser(ctxt);
2692
376
                    ent->content[0] = 0;
2693
376
                    goto int_error;
2694
376
                }
2695
2696
27.5M
                ent->flags |= XML_ENT_EXPANDING;
2697
27.5M
    ctxt->depth++;
2698
27.5M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
27.5M
                        ent->length, what, 0, 0, 0, check);
2700
27.5M
    ctxt->depth--;
2701
27.5M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
27.5M
    if (rep == NULL) {
2704
3.73k
                    ent->content[0] = 0;
2705
3.73k
                    goto int_error;
2706
3.73k
                }
2707
2708
27.5M
                current = rep;
2709
4.25G
                while (*current != 0) { /* non input consuming loop */
2710
4.22G
                    buffer[nbchars++] = *current++;
2711
4.22G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
3.10M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
3.10M
                    }
2714
4.22G
                }
2715
27.5M
                xmlFree(rep);
2716
27.5M
                rep = NULL;
2717
27.5M
      } else if (ent != NULL) {
2718
99.8k
    int i = xmlStrlen(ent->name);
2719
99.8k
    const xmlChar *cur = ent->name;
2720
2721
99.8k
    buffer[nbchars++] = '&';
2722
99.8k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
1.68k
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
1.68k
    }
2725
399k
    for (;i > 0;i--)
2726
299k
        buffer[nbchars++] = *cur++;
2727
99.8k
    buffer[nbchars++] = ';';
2728
99.8k
      }
2729
1.72G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
706k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
706k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
706k
      if (ent != NULL) {
2735
445k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
4.99k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
4.99k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
4.99k
      (ctxt->validate != 0)) {
2745
4.38k
      xmlLoadEntityContent(ctxt, ent);
2746
4.38k
        } else {
2747
608
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
608
      "not validating will not read content for PE entity %s\n",
2749
608
                          ent->name, NULL);
2750
608
        }
2751
4.99k
    }
2752
2753
445k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
99
                    goto int_error;
2755
2756
444k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
243
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
243
                    xmlHaltParser(ctxt);
2759
243
                    if (ent->content != NULL)
2760
114
                        ent->content[0] = 0;
2761
243
                    goto int_error;
2762
243
                }
2763
2764
444k
                ent->flags |= XML_ENT_EXPANDING;
2765
444k
    ctxt->depth++;
2766
444k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
444k
                        ent->length, what, 0, 0, 0, check);
2768
444k
    ctxt->depth--;
2769
444k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
444k
    if (rep == NULL) {
2772
3.33k
                    if (ent->content != NULL)
2773
324
                        ent->content[0] = 0;
2774
3.33k
                    goto int_error;
2775
3.33k
                }
2776
441k
                current = rep;
2777
907M
                while (*current != 0) { /* non input consuming loop */
2778
907M
                    buffer[nbchars++] = *current++;
2779
907M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
93.7k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
93.7k
                    }
2782
907M
                }
2783
441k
                xmlFree(rep);
2784
441k
                rep = NULL;
2785
441k
      }
2786
1.72G
  } else {
2787
1.72G
      COPY_BUF(l,buffer,nbchars,c);
2788
1.72G
      str += l;
2789
1.72G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
388k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
388k
      }
2792
1.72G
  }
2793
1.75G
  if (str < last)
2794
1.72G
      c = CUR_SCHAR(str, l);
2795
30.3M
  else
2796
30.3M
      c = 0;
2797
1.75G
    }
2798
30.3M
    buffer[nbchars] = 0;
2799
30.3M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
14.9k
int_error:
2804
14.9k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
14.9k
    if (buffer != NULL)
2807
14.9k
        xmlFree(buffer);
2808
14.9k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
11.8k
                           xmlChar end3) {
2836
11.8k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
11.8k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
11.8k
                                      end, end2, end3, 0));
2840
11.8k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
210k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
210k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
210k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
210k
                                      end, end2, end3, 0));
2868
210k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
5.03M
                     int blank_chars) {
2890
5.03M
    int i, ret;
2891
5.03M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
5.03M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
374k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
4.66M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
4.66M
        (*(ctxt->space) == -2))
2905
1.93M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
2.72M
    if (blank_chars == 0) {
2911
5.84M
  for (i = 0;i < len;i++)
2912
5.05M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
1.14M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
2.37M
    if (ctxt->node == NULL) return(0);
2919
2.30M
    if (ctxt->myDoc != NULL) {
2920
2.30M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
2.30M
        if (ret == 0) return(1);
2922
2.13M
        if (ret == 1) return(0);
2923
2.13M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
2.11M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
2.08M
    if ((ctxt->node->children == NULL) &&
2930
2.08M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
2.08M
    lastChild = xmlGetLastChild(ctxt->node);
2933
2.08M
    if (lastChild == NULL) {
2934
416k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
416k
            (ctxt->node->content != NULL)) return(0);
2936
1.66M
    } else if (xmlNodeIsText(lastChild))
2937
29.2k
        return(0);
2938
1.63M
    else if ((ctxt->node->children != NULL) &&
2939
1.63M
             (xmlNodeIsText(ctxt->node->children)))
2940
19.5k
        return(0);
2941
2.03M
    return(1);
2942
2.08M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
9.96M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
9.96M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
9.96M
    xmlChar *buffer = NULL;
2973
9.96M
    int len = 0;
2974
9.96M
    int max = XML_MAX_NAMELEN;
2975
9.96M
    xmlChar *ret = NULL;
2976
9.96M
    const xmlChar *cur = name;
2977
9.96M
    int c;
2978
2979
9.96M
    if (prefix == NULL) return(NULL);
2980
9.96M
    *prefix = NULL;
2981
2982
9.96M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
9.96M
    if (cur[0] == ':')
2993
4.74k
  return(xmlStrdup(name));
2994
2995
9.95M
    c = *cur++;
2996
48.3M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
38.3M
  buf[len++] = c;
2998
38.3M
  c = *cur++;
2999
38.3M
    }
3000
9.95M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
4.69k
  max = len * 2;
3006
3007
4.69k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
4.69k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
4.69k
  memcpy(buffer, buf, len);
3013
4.93M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
4.92M
      if (len + 10 > max) {
3015
5.66k
          xmlChar *tmp;
3016
3017
5.66k
    max *= 2;
3018
5.66k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
5.66k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
5.66k
    buffer = tmp;
3025
5.66k
      }
3026
4.92M
      buffer[len++] = c;
3027
4.92M
      c = *cur++;
3028
4.92M
  }
3029
4.69k
  buffer[len] = 0;
3030
4.69k
    }
3031
3032
9.95M
    if ((c == ':') && (*cur == 0)) {
3033
9.51k
        if (buffer != NULL)
3034
234
      xmlFree(buffer);
3035
9.51k
  *prefix = NULL;
3036
9.51k
  return(xmlStrdup(name));
3037
9.51k
    }
3038
3039
9.94M
    if (buffer == NULL)
3040
9.94M
  ret = xmlStrndup(buf, len);
3041
4.45k
    else {
3042
4.45k
  ret = buffer;
3043
4.45k
  buffer = NULL;
3044
4.45k
  max = XML_MAX_NAMELEN;
3045
4.45k
    }
3046
3047
3048
9.94M
    if (c == ':') {
3049
658k
  c = *cur;
3050
658k
        *prefix = ret;
3051
658k
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
658k
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
658k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
658k
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
658k
        (c == '_') || (c == ':'))) {
3063
9.94k
      int l;
3064
9.94k
      int first = CUR_SCHAR(cur, l);
3065
3066
9.94k
      if (!IS_LETTER(first) && (first != '_')) {
3067
5.42k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
5.42k
          "Name %s is not XML Namespace compliant\n",
3069
5.42k
          name);
3070
5.42k
      }
3071
9.94k
  }
3072
658k
  cur++;
3073
3074
4.25M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
3.59M
      buf[len++] = c;
3076
3.59M
      c = *cur++;
3077
3.59M
  }
3078
658k
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
3.69k
      max = len * 2;
3084
3085
3.69k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
3.69k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
3.69k
      memcpy(buffer, buf, len);
3091
2.62M
      while (c != 0) { /* tested bigname2.xml */
3092
2.62M
    if (len + 10 > max) {
3093
3.17k
        xmlChar *tmp;
3094
3095
3.17k
        max *= 2;
3096
3.17k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
3.17k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
3.17k
        buffer = tmp;
3103
3.17k
    }
3104
2.62M
    buffer[len++] = c;
3105
2.62M
    c = *cur++;
3106
2.62M
      }
3107
3.69k
      buffer[len] = 0;
3108
3.69k
  }
3109
3110
658k
  if (buffer == NULL)
3111
655k
      ret = xmlStrndup(buf, len);
3112
3.69k
  else {
3113
3.69k
      ret = buffer;
3114
3.69k
  }
3115
658k
    }
3116
3117
9.94M
    return(ret);
3118
9.94M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
31.0M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
31.0M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
29.4M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
29.4M
      (((c >= 'a') && (c <= 'z')) ||
3160
29.4M
       ((c >= 'A') && (c <= 'Z')) ||
3161
29.4M
       (c == '_') || (c == ':') ||
3162
29.4M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
29.4M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
29.4M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
29.4M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
29.4M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
29.4M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
29.4M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
29.4M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
29.4M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
29.4M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
29.4M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
29.4M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
29.2M
      return(1);
3175
29.4M
    } else {
3176
1.62M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
1.51M
      return(1);
3178
1.62M
    }
3179
342k
    return(0);
3180
31.0M
}
3181
3182
static int
3183
595M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
595M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
584M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
584M
      (((c >= 'a') && (c <= 'z')) ||
3191
584M
       ((c >= 'A') && (c <= 'Z')) ||
3192
584M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
584M
       (c == '_') || (c == ':') ||
3194
584M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
584M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
584M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
584M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
584M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
584M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
584M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
584M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
584M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
584M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
584M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
584M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
584M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
584M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
584M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
554M
       return(1);
3210
584M
    } else {
3211
10.6M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
10.6M
            (c == '.') || (c == '-') ||
3213
10.6M
      (c == '_') || (c == ':') ||
3214
10.6M
      (IS_COMBINING(c)) ||
3215
10.6M
      (IS_EXTENDER(c)))
3216
8.59M
      return(1);
3217
10.6M
    }
3218
32.1M
    return(0);
3219
595M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
1.23M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
1.23M
    int len = 0, l;
3227
1.23M
    int c;
3228
1.23M
    int count = 0;
3229
1.23M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
546k
                    XML_MAX_TEXT_LENGTH :
3231
1.23M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
1.23M
    GROW;
3241
1.23M
    if (ctxt->instate == XML_PARSER_EOF)
3242
92
        return(NULL);
3243
1.23M
    c = CUR_CHAR(l);
3244
1.23M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
757k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
757k
      (!(((c >= 'a') && (c <= 'z')) ||
3251
733k
         ((c >= 'A') && (c <= 'Z')) ||
3252
733k
         (c == '_') || (c == ':') ||
3253
733k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
733k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
733k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
733k
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
733k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
733k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
733k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
733k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
733k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
733k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
733k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
733k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
256k
      return(NULL);
3266
256k
  }
3267
501k
  len += l;
3268
501k
  NEXTL(l);
3269
501k
  c = CUR_CHAR(l);
3270
5.30M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
5.30M
         (((c >= 'a') && (c <= 'z')) ||
3272
5.28M
          ((c >= 'A') && (c <= 'Z')) ||
3273
5.28M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
5.28M
          (c == '_') || (c == ':') ||
3275
5.28M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
5.28M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
5.28M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
5.28M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
5.28M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
5.28M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
5.28M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
5.28M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
5.28M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
5.28M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
5.28M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
5.28M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
5.28M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
5.28M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
5.28M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
5.28M
    )) {
3291
4.80M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
33.3k
    count = 0;
3293
33.3k
    GROW;
3294
33.3k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
33.3k
      }
3297
4.80M
            if (len <= INT_MAX - l)
3298
4.80M
          len += l;
3299
4.80M
      NEXTL(l);
3300
4.80M
      c = CUR_CHAR(l);
3301
4.80M
  }
3302
501k
    } else {
3303
474k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
474k
      (!IS_LETTER(c) && (c != '_') &&
3305
451k
       (c != ':'))) {
3306
250k
      return(NULL);
3307
250k
  }
3308
224k
  len += l;
3309
224k
  NEXTL(l);
3310
224k
  c = CUR_CHAR(l);
3311
3312
4.27M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
4.27M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
4.24M
    (c == '.') || (c == '-') ||
3315
4.24M
    (c == '_') || (c == ':') ||
3316
4.24M
    (IS_COMBINING(c)) ||
3317
4.24M
    (IS_EXTENDER(c)))) {
3318
4.05M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
29.4k
    count = 0;
3320
29.4k
    GROW;
3321
29.4k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
29.4k
      }
3324
4.05M
            if (len <= INT_MAX - l)
3325
4.05M
          len += l;
3326
4.05M
      NEXTL(l);
3327
4.05M
      c = CUR_CHAR(l);
3328
4.05M
  }
3329
224k
    }
3330
725k
    if (len > maxLength) {
3331
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
0
        return(NULL);
3333
0
    }
3334
725k
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
725k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
3.37k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
722k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
725k
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
64.5M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
64.5M
    const xmlChar *in;
3370
64.5M
    const xmlChar *ret;
3371
64.5M
    size_t count = 0;
3372
64.5M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
20.6M
                       XML_MAX_TEXT_LENGTH :
3374
64.5M
                       XML_MAX_NAME_LENGTH;
3375
3376
64.5M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
64.5M
    in = ctxt->input->cur;
3386
64.5M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
64.5M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
64.5M
  (*in == '_') || (*in == ':')) {
3389
63.9M
  in++;
3390
287M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
287M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
287M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
287M
         (*in == '_') || (*in == '-') ||
3394
287M
         (*in == ':') || (*in == '.'))
3395
223M
      in++;
3396
63.9M
  if ((*in > 0) && (*in < 0x80)) {
3397
63.2M
      count = in - ctxt->input->cur;
3398
63.2M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
63.2M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
63.2M
      ctxt->input->cur = in;
3404
63.2M
      ctxt->input->col += count;
3405
63.2M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
63.2M
      return(ret);
3408
63.2M
  }
3409
63.9M
    }
3410
    /* accelerator for special cases */
3411
1.23M
    return(xmlParseNameComplex(ctxt));
3412
64.5M
}
3413
3414
static const xmlChar *
3415
455k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
455k
    int len = 0, l;
3417
455k
    int c;
3418
455k
    int count = 0;
3419
455k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
185k
                    XML_MAX_TEXT_LENGTH :
3421
455k
                    XML_MAX_NAME_LENGTH;
3422
455k
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
455k
    GROW;
3432
455k
    startPosition = CUR_PTR - BASE_PTR;
3433
455k
    c = CUR_CHAR(l);
3434
455k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
455k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
358k
  return(NULL);
3437
358k
    }
3438
3439
3.50M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
3.50M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
3.40M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
27.6k
      count = 0;
3443
27.6k
      GROW;
3444
27.6k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
27.6k
  }
3447
3.40M
        if (len <= INT_MAX - l)
3448
3.40M
      len += l;
3449
3.40M
  NEXTL(l);
3450
3.40M
  c = CUR_CHAR(l);
3451
3.40M
  if (c == 0) {
3452
11.5k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
11.5k
      ctxt->input->cur -= l;
3459
11.5k
      GROW;
3460
11.5k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
11.5k
      ctxt->input->cur += l;
3463
11.5k
      c = CUR_CHAR(l);
3464
11.5k
  }
3465
3.40M
    }
3466
97.0k
    if (len > maxLength) {
3467
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
0
        return(NULL);
3469
0
    }
3470
97.0k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
97.0k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
15.1M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
15.1M
    const xmlChar *in, *e;
3491
15.1M
    const xmlChar *ret;
3492
15.1M
    size_t count = 0;
3493
15.1M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
4.27M
                       XML_MAX_TEXT_LENGTH :
3495
15.1M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
15.1M
    in = ctxt->input->cur;
3505
15.1M
    e = ctxt->input->end;
3506
15.1M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
15.1M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
15.1M
   (*in == '_')) && (in < e)) {
3509
14.7M
  in++;
3510
54.7M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
54.7M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
54.7M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
54.7M
          (*in == '_') || (*in == '-') ||
3514
54.7M
          (*in == '.')) && (in < e))
3515
40.0M
      in++;
3516
14.7M
  if (in >= e)
3517
2.68k
      goto complex;
3518
14.7M
  if ((*in > 0) && (*in < 0x80)) {
3519
14.6M
      count = in - ctxt->input->cur;
3520
14.6M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
14.6M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
14.6M
      ctxt->input->cur = in;
3526
14.6M
      ctxt->input->col += count;
3527
14.6M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
14.6M
      return(ret);
3531
14.6M
  }
3532
14.7M
    }
3533
455k
complex:
3534
455k
    return(xmlParseNCNameComplex(ctxt));
3535
15.1M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
4.79M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
4.79M
    register const xmlChar *cmp = other;
3551
4.79M
    register const xmlChar *in;
3552
4.79M
    const xmlChar *ret;
3553
3554
4.79M
    GROW;
3555
4.79M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
4.79M
    in = ctxt->input->cur;
3559
24.9M
    while (*in != 0 && *in == *cmp) {
3560
20.1M
  ++in;
3561
20.1M
  ++cmp;
3562
20.1M
    }
3563
4.79M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
4.60M
  ctxt->input->col += in - ctxt->input->cur;
3566
4.60M
  ctxt->input->cur = in;
3567
4.60M
  return (const xmlChar*) 1;
3568
4.60M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
198k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
198k
    if (ret == other) {
3573
10.4k
  return (const xmlChar*) 1;
3574
10.4k
    }
3575
187k
    return ret;
3576
198k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
30.6M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
30.6M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
30.6M
    const xmlChar *cur = *str;
3600
30.6M
    int len = 0, l;
3601
30.6M
    int c;
3602
30.6M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
4.13M
                    XML_MAX_TEXT_LENGTH :
3604
30.6M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
30.6M
    c = CUR_SCHAR(cur, l);
3611
30.6M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
20.8k
  return(NULL);
3613
20.8k
    }
3614
3615
30.6M
    COPY_BUF(l,buf,len,c);
3616
30.6M
    cur += l;
3617
30.6M
    c = CUR_SCHAR(cur, l);
3618
272M
    while (xmlIsNameChar(ctxt, c)) {
3619
243M
  COPY_BUF(l,buf,len,c);
3620
243M
  cur += l;
3621
243M
  c = CUR_SCHAR(cur, l);
3622
243M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
1.35M
      xmlChar *buffer;
3628
1.35M
      int max = len * 2;
3629
3630
1.35M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
1.35M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
1.35M
      memcpy(buffer, buf, len);
3636
308M
      while (xmlIsNameChar(ctxt, c)) {
3637
306M
    if (len + 10 > max) {
3638
1.35M
        xmlChar *tmp;
3639
3640
1.35M
        max *= 2;
3641
1.35M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
1.35M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
1.35M
        buffer = tmp;
3648
1.35M
    }
3649
306M
    COPY_BUF(l,buffer,len,c);
3650
306M
    cur += l;
3651
306M
    c = CUR_SCHAR(cur, l);
3652
306M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
306M
      }
3658
1.35M
      buffer[len] = 0;
3659
1.35M
      *str = cur;
3660
1.35M
      return(buffer);
3661
1.35M
  }
3662
243M
    }
3663
29.2M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
29.2M
    *str = cur;
3668
29.2M
    return(xmlStrndup(buf, len));
3669
29.2M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
1.48M
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
1.48M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
1.48M
    int len = 0, l;
3690
1.48M
    int c;
3691
1.48M
    int count = 0;
3692
1.48M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
591k
                    XML_MAX_TEXT_LENGTH :
3694
1.48M
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
1.48M
    GROW;
3701
1.48M
    if (ctxt->instate == XML_PARSER_EOF)
3702
5
        return(NULL);
3703
1.48M
    c = CUR_CHAR(l);
3704
3705
8.74M
    while (xmlIsNameChar(ctxt, c)) {
3706
7.25M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
7.25M
  COPY_BUF(l,buf,len,c);
3711
7.25M
  NEXTL(l);
3712
7.25M
  c = CUR_CHAR(l);
3713
7.25M
  if (c == 0) {
3714
1.83k
      count = 0;
3715
1.83k
      GROW;
3716
1.83k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
1.83k
            c = CUR_CHAR(l);
3719
1.83k
  }
3720
7.25M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
1.59k
      xmlChar *buffer;
3726
1.59k
      int max = len * 2;
3727
3728
1.59k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
1.59k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
1.59k
      memcpy(buffer, buf, len);
3734
2.13M
      while (xmlIsNameChar(ctxt, c)) {
3735
2.12M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
21.5k
        count = 0;
3737
21.5k
        GROW;
3738
21.5k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
21.5k
    }
3743
2.12M
    if (len + 10 > max) {
3744
2.88k
        xmlChar *tmp;
3745
3746
2.88k
        max *= 2;
3747
2.88k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
2.88k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
2.88k
        buffer = tmp;
3754
2.88k
    }
3755
2.12M
    COPY_BUF(l,buffer,len,c);
3756
2.12M
    NEXTL(l);
3757
2.12M
    c = CUR_CHAR(l);
3758
2.12M
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
2.12M
      }
3764
1.59k
      buffer[len] = 0;
3765
1.59k
      return(buffer);
3766
1.59k
  }
3767
7.25M
    }
3768
1.48M
    if (len == 0)
3769
13.6k
        return(NULL);
3770
1.46M
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
1.46M
    return(xmlStrndup(buf, len));
3775
1.46M
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
850k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
850k
    xmlChar *buf = NULL;
3795
850k
    int len = 0;
3796
850k
    int size = XML_PARSER_BUFFER_SIZE;
3797
850k
    int c, l;
3798
850k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
306k
                    XML_MAX_HUGE_LENGTH :
3800
850k
                    XML_MAX_TEXT_LENGTH;
3801
850k
    xmlChar stop;
3802
850k
    xmlChar *ret = NULL;
3803
850k
    const xmlChar *cur = NULL;
3804
850k
    xmlParserInputPtr input;
3805
3806
850k
    if (RAW == '"') stop = '"';
3807
224k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
850k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
850k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
850k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
850k
    input = ctxt->input;
3824
850k
    GROW;
3825
850k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
850k
    NEXT;
3828
850k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
48.3M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
48.3M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
47.5M
  if (len + 5 >= size) {
3841
135k
      xmlChar *tmp;
3842
3843
135k
      size *= 2;
3844
135k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
135k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
135k
      buf = tmp;
3850
135k
  }
3851
47.5M
  COPY_BUF(l,buf,len,c);
3852
47.5M
  NEXTL(l);
3853
3854
47.5M
  GROW;
3855
47.5M
  c = CUR_CHAR(l);
3856
47.5M
  if (c == 0) {
3857
1.25k
      GROW;
3858
1.25k
      c = CUR_CHAR(l);
3859
1.25k
  }
3860
3861
47.5M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
47.5M
    }
3867
850k
    buf[len] = 0;
3868
850k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
850k
    if (c != stop) {
3871
1.85k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
1.85k
        goto error;
3873
1.85k
    }
3874
848k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
848k
    cur = buf;
3882
38.3M
    while (*cur != 0) { /* non input consuming */
3883
37.5M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
512k
      xmlChar *name;
3885
512k
      xmlChar tmp = *cur;
3886
512k
            int nameOk = 0;
3887
3888
512k
      cur++;
3889
512k
      name = xmlParseStringName(ctxt, &cur);
3890
512k
            if (name != NULL) {
3891
508k
                nameOk = 1;
3892
508k
                xmlFree(name);
3893
508k
            }
3894
512k
            if ((nameOk == 0) || (*cur != ';')) {
3895
8.10k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
8.10k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
8.10k
                            tmp);
3898
8.10k
                goto error;
3899
8.10k
      }
3900
504k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
504k
    (ctxt->inputNr == 1)) {
3902
6.03k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
6.03k
                goto error;
3904
6.03k
      }
3905
498k
      if (*cur == 0)
3906
0
          break;
3907
498k
  }
3908
37.5M
  cur++;
3909
37.5M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
834k
    ++ctxt->depth;
3920
834k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
834k
                                     0, 0, 0, /* check */ 1);
3922
834k
    --ctxt->depth;
3923
3924
834k
    if (orig != NULL) {
3925
834k
        *orig = buf;
3926
834k
        buf = NULL;
3927
834k
    }
3928
3929
850k
error:
3930
850k
    if (buf != NULL)
3931
15.9k
        xmlFree(buf);
3932
850k
    return(ret);
3933
834k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
466k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
466k
    xmlChar limit = 0;
3950
466k
    xmlChar *buf = NULL;
3951
466k
    xmlChar *rep = NULL;
3952
466k
    size_t len = 0;
3953
466k
    size_t buf_size = 0;
3954
466k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
189k
                       XML_MAX_HUGE_LENGTH :
3956
466k
                       XML_MAX_TEXT_LENGTH;
3957
466k
    int c, l, in_space = 0;
3958
466k
    xmlChar *current = NULL;
3959
466k
    xmlEntityPtr ent;
3960
3961
466k
    if (NXT(0) == '"') {
3962
312k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
312k
  limit = '"';
3964
312k
        NEXT;
3965
312k
    } else if (NXT(0) == '\'') {
3966
154k
  limit = '\'';
3967
154k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
154k
        NEXT;
3969
154k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
466k
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
466k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
466k
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
466k
    c = CUR_CHAR(l);
3985
24.1M
    while (((NXT(0) != limit) && /* checked */
3986
24.1M
            (IS_CHAR(c)) && (c != '<')) &&
3987
24.1M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
23.6M
  if (c == '&') {
3989
2.61M
      in_space = 0;
3990
2.61M
      if (NXT(1) == '#') {
3991
239k
    int val = xmlParseCharRef(ctxt);
3992
3993
239k
    if (val == '&') {
3994
3.05k
        if (ctxt->replaceEntities) {
3995
1.09k
      if (len + 10 > buf_size) {
3996
178
          growBuffer(buf, 10);
3997
178
      }
3998
1.09k
      buf[len++] = '&';
3999
1.96k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
1.96k
      if (len + 10 > buf_size) {
4005
200
          growBuffer(buf, 10);
4006
200
      }
4007
1.96k
      buf[len++] = '&';
4008
1.96k
      buf[len++] = '#';
4009
1.96k
      buf[len++] = '3';
4010
1.96k
      buf[len++] = '8';
4011
1.96k
      buf[len++] = ';';
4012
1.96k
        }
4013
235k
    } else if (val != 0) {
4014
209k
        if (len + 10 > buf_size) {
4015
2.00k
      growBuffer(buf, 10);
4016
2.00k
        }
4017
209k
        len += xmlCopyChar(0, &buf[len], val);
4018
209k
    }
4019
2.37M
      } else {
4020
2.37M
    ent = xmlParseEntityRef(ctxt);
4021
2.37M
    if ((ent != NULL) &&
4022
2.37M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
54.5k
        if (len + 10 > buf_size) {
4024
208
      growBuffer(buf, 10);
4025
208
        }
4026
54.5k
        if ((ctxt->replaceEntities == 0) &&
4027
54.5k
            (ent->content[0] == '&')) {
4028
17.2k
      buf[len++] = '&';
4029
17.2k
      buf[len++] = '#';
4030
17.2k
      buf[len++] = '3';
4031
17.2k
      buf[len++] = '8';
4032
17.2k
      buf[len++] = ';';
4033
37.3k
        } else {
4034
37.3k
      buf[len++] = ent->content[0];
4035
37.3k
        }
4036
2.32M
    } else if ((ent != NULL) &&
4037
2.32M
               (ctxt->replaceEntities != 0)) {
4038
1.39M
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
1.39M
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
1.39M
      ++ctxt->depth;
4043
1.39M
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
1.39M
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
1.39M
                                /* check */ 1);
4046
1.39M
      --ctxt->depth;
4047
1.39M
      if (rep != NULL) {
4048
1.38M
          current = rep;
4049
288M
          while (*current != 0) { /* non input consuming */
4050
286M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
286M
                                    (*current == 0x9)) {
4052
637k
                                    buf[len++] = 0x20;
4053
637k
                                    current++;
4054
637k
                                } else
4055
286M
                                    buf[len++] = *current++;
4056
286M
        if (len + 10 > buf_size) {
4057
33.7k
            growBuffer(buf, 10);
4058
33.7k
        }
4059
286M
          }
4060
1.38M
          xmlFree(rep);
4061
1.38M
          rep = NULL;
4062
1.38M
      }
4063
1.39M
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
1.39M
    } else if (ent != NULL) {
4071
676k
        int i = xmlStrlen(ent->name);
4072
676k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
676k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
676k
      (ent->content != NULL)) {
4081
652k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
12.3k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
12.3k
                            ctxt->sizeentcopy = ent->length;
4085
4086
12.3k
                            ++ctxt->depth;
4087
12.3k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
12.3k
                                    ent->content, ent->length,
4089
12.3k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
12.3k
                                    /* check */ 1);
4091
12.3k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
12.3k
                            if (ctxt->inSubset == 0) {
4100
10.8k
                                ent->flags |= XML_ENT_CHECKED;
4101
10.8k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
10.8k
                            }
4103
4104
12.3k
                            if (rep != NULL) {
4105
12.2k
                                xmlFree(rep);
4106
12.2k
                                rep = NULL;
4107
12.2k
                            } else {
4108
158
                                ent->content[0] = 0;
4109
158
                            }
4110
4111
12.3k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
14
                                goto error;
4113
640k
                        } else {
4114
640k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
640k
                        }
4117
652k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
676k
        buf[len++] = '&';
4123
679k
        while (len + i + 10 > buf_size) {
4124
6.77k
      growBuffer(buf, i + 10);
4125
6.77k
        }
4126
1.63M
        for (;i > 0;i--)
4127
962k
      buf[len++] = *cur++;
4128
676k
        buf[len++] = ';';
4129
676k
    }
4130
2.37M
      }
4131
21.0M
  } else {
4132
21.0M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
2.79M
          if ((len != 0) || (!normalize)) {
4134
2.68M
        if ((!normalize) || (!in_space)) {
4135
2.37M
      COPY_BUF(l,buf,len,0x20);
4136
2.37M
      while (len + 10 > buf_size) {
4137
10.0k
          growBuffer(buf, 10);
4138
10.0k
      }
4139
2.37M
        }
4140
2.68M
        in_space = 1;
4141
2.68M
    }
4142
18.2M
      } else {
4143
18.2M
          in_space = 0;
4144
18.2M
    COPY_BUF(l,buf,len,c);
4145
18.2M
    if (len + 10 > buf_size) {
4146
81.0k
        growBuffer(buf, 10);
4147
81.0k
    }
4148
18.2M
      }
4149
21.0M
      NEXTL(l);
4150
21.0M
  }
4151
23.6M
  GROW;
4152
23.6M
  c = CUR_CHAR(l);
4153
23.6M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
23.6M
    }
4159
466k
    if (ctxt->instate == XML_PARSER_EOF)
4160
571
        goto error;
4161
4162
465k
    if ((in_space) && (normalize)) {
4163
25.2k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
9.14k
    }
4165
465k
    buf[len] = 0;
4166
465k
    if (RAW == '<') {
4167
94.5k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
371k
    } else if (RAW != limit) {
4169
71.2k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
30.6k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
30.6k
         "invalid character in attribute value\n");
4172
40.5k
  } else {
4173
40.5k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
40.5k
         "AttValue: ' expected\n");
4175
40.5k
        }
4176
71.2k
    } else
4177
299k
  NEXT;
4178
4179
465k
    if (attlen != NULL) *attlen = len;
4180
465k
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
585
error:
4185
585
    if (buf != NULL)
4186
585
        xmlFree(buf);
4187
585
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
585
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
3.63M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
3.63M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
3.63M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
3.63M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
351k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
351k
    xmlChar *buf = NULL;
4250
351k
    int len = 0;
4251
351k
    int size = XML_PARSER_BUFFER_SIZE;
4252
351k
    int cur, l;
4253
351k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
143k
                    XML_MAX_TEXT_LENGTH :
4255
351k
                    XML_MAX_NAME_LENGTH;
4256
351k
    xmlChar stop;
4257
351k
    int state = ctxt->instate;
4258
351k
    int count = 0;
4259
4260
351k
    SHRINK;
4261
351k
    if (RAW == '"') {
4262
319k
        NEXT;
4263
319k
  stop = '"';
4264
319k
    } else if (RAW == '\'') {
4265
22.0k
        NEXT;
4266
22.0k
  stop = '\'';
4267
22.0k
    } else {
4268
10.3k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
10.3k
  return(NULL);
4270
10.3k
    }
4271
4272
341k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
341k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
341k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
341k
    cur = CUR_CHAR(l);
4279
19.2M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
18.9M
  if (len + 5 >= size) {
4281
12.5k
      xmlChar *tmp;
4282
4283
12.5k
      size *= 2;
4284
12.5k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
12.5k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
12.5k
      buf = tmp;
4292
12.5k
  }
4293
18.9M
  count++;
4294
18.9M
  if (count > 50) {
4295
281k
      SHRINK;
4296
281k
      GROW;
4297
281k
      count = 0;
4298
281k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
281k
  }
4303
18.9M
  COPY_BUF(l,buf,len,cur);
4304
18.9M
  NEXTL(l);
4305
18.9M
  cur = CUR_CHAR(l);
4306
18.9M
  if (cur == 0) {
4307
3.05k
      GROW;
4308
3.05k
      SHRINK;
4309
3.05k
      cur = CUR_CHAR(l);
4310
3.05k
  }
4311
18.9M
        if (len > maxLength) {
4312
129
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
129
            xmlFree(buf);
4314
129
            ctxt->instate = (xmlParserInputState) state;
4315
129
            return(NULL);
4316
129
        }
4317
18.9M
    }
4318
341k
    buf[len] = 0;
4319
341k
    ctxt->instate = (xmlParserInputState) state;
4320
341k
    if (!IS_CHAR(cur)) {
4321
4.37k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
337k
    } else {
4323
337k
  NEXT;
4324
337k
    }
4325
341k
    return(buf);
4326
341k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
68.3k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
68.3k
    xmlChar *buf = NULL;
4344
68.3k
    int len = 0;
4345
68.3k
    int size = XML_PARSER_BUFFER_SIZE;
4346
68.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
26.8k
                    XML_MAX_TEXT_LENGTH :
4348
68.3k
                    XML_MAX_NAME_LENGTH;
4349
68.3k
    xmlChar cur;
4350
68.3k
    xmlChar stop;
4351
68.3k
    int count = 0;
4352
68.3k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
68.3k
    SHRINK;
4355
68.3k
    if (RAW == '"') {
4356
42.3k
        NEXT;
4357
42.3k
  stop = '"';
4358
42.3k
    } else if (RAW == '\'') {
4359
24.1k
        NEXT;
4360
24.1k
  stop = '\'';
4361
24.1k
    } else {
4362
1.82k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
1.82k
  return(NULL);
4364
1.82k
    }
4365
66.5k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
66.5k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
66.5k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
66.5k
    cur = CUR;
4372
4.23M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
4.17M
  if (len + 1 >= size) {
4374
3.82k
      xmlChar *tmp;
4375
4376
3.82k
      size *= 2;
4377
3.82k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
3.82k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
3.82k
      buf = tmp;
4384
3.82k
  }
4385
4.17M
  buf[len++] = cur;
4386
4.17M
  count++;
4387
4.17M
  if (count > 50) {
4388
57.3k
      SHRINK;
4389
57.3k
      GROW;
4390
57.3k
      count = 0;
4391
57.3k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
57.3k
  }
4396
4.17M
  NEXT;
4397
4.17M
  cur = CUR;
4398
4.17M
  if (cur == 0) {
4399
933
      GROW;
4400
933
      SHRINK;
4401
933
      cur = CUR;
4402
933
  }
4403
4.17M
        if (len > maxLength) {
4404
28
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
28
            xmlFree(buf);
4406
28
            return(NULL);
4407
28
        }
4408
4.17M
    }
4409
66.4k
    buf[len] = 0;
4410
66.4k
    if (cur != stop) {
4411
9.62k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
56.8k
    } else {
4413
56.8k
  NEXT;
4414
56.8k
    }
4415
66.4k
    ctxt->instate = oldstate;
4416
66.4k
    return(buf);
4417
66.5k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
16.8M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
16.8M
    const xmlChar *in;
4482
16.8M
    int nbchar = 0;
4483
16.8M
    int line = ctxt->input->line;
4484
16.8M
    int col = ctxt->input->col;
4485
16.8M
    int ccol;
4486
4487
16.8M
    SHRINK;
4488
16.8M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
16.8M
    in = ctxt->input->cur;
4494
21.3M
    do {
4495
26.8M
get_more_space:
4496
36.8M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
26.8M
        if (*in == 0xA) {
4498
5.62M
            do {
4499
5.62M
                ctxt->input->line++; ctxt->input->col = 1;
4500
5.62M
                in++;
4501
5.62M
            } while (*in == 0xA);
4502
5.42M
            goto get_more_space;
4503
5.42M
        }
4504
21.3M
        if (*in == '<') {
4505
4.59M
            nbchar = in - ctxt->input->cur;
4506
4.59M
            if (nbchar > 0) {
4507
4.59M
                const xmlChar *tmp = ctxt->input->cur;
4508
4.59M
                ctxt->input->cur = in;
4509
4510
4.59M
                if ((ctxt->sax != NULL) &&
4511
4.59M
                    (ctxt->sax->ignorableWhitespace !=
4512
4.59M
                     ctxt->sax->characters)) {
4513
1.94M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
1.47M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
1.47M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
1.47M
                                                   tmp, nbchar);
4517
1.47M
                    } else {
4518
476k
                        if (ctxt->sax->characters != NULL)
4519
476k
                            ctxt->sax->characters(ctxt->userData,
4520
476k
                                                  tmp, nbchar);
4521
476k
                        if (*ctxt->space == -1)
4522
114k
                            *ctxt->space = -2;
4523
476k
                    }
4524
2.64M
                } else if ((ctxt->sax != NULL) &&
4525
2.64M
                           (ctxt->sax->characters != NULL)) {
4526
2.64M
                    ctxt->sax->characters(ctxt->userData,
4527
2.64M
                                          tmp, nbchar);
4528
2.64M
                }
4529
4.59M
            }
4530
4.59M
            return;
4531
4.59M
        }
4532
4533
21.9M
get_more:
4534
21.9M
        ccol = ctxt->input->col;
4535
302M
        while (test_char_data[*in]) {
4536
280M
            in++;
4537
280M
            ccol++;
4538
280M
        }
4539
21.9M
        ctxt->input->col = ccol;
4540
21.9M
        if (*in == 0xA) {
4541
4.90M
            do {
4542
4.90M
                ctxt->input->line++; ctxt->input->col = 1;
4543
4.90M
                in++;
4544
4.90M
            } while (*in == 0xA);
4545
4.78M
            goto get_more;
4546
4.78M
        }
4547
17.1M
        if (*in == ']') {
4548
403k
            if ((in[1] == ']') && (in[2] == '>')) {
4549
8.61k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
8.61k
                ctxt->input->cur = in + 1;
4551
8.61k
                return;
4552
8.61k
            }
4553
394k
            in++;
4554
394k
            ctxt->input->col++;
4555
394k
            goto get_more;
4556
403k
        }
4557
16.7M
        nbchar = in - ctxt->input->cur;
4558
16.7M
        if (nbchar > 0) {
4559
11.9M
            if ((ctxt->sax != NULL) &&
4560
11.9M
                (ctxt->sax->ignorableWhitespace !=
4561
11.9M
                 ctxt->sax->characters) &&
4562
11.9M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
2.29M
                const xmlChar *tmp = ctxt->input->cur;
4564
2.29M
                ctxt->input->cur = in;
4565
4566
2.29M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
732k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
732k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
732k
                                                       tmp, nbchar);
4570
1.56M
                } else {
4571
1.56M
                    if (ctxt->sax->characters != NULL)
4572
1.56M
                        ctxt->sax->characters(ctxt->userData,
4573
1.56M
                                              tmp, nbchar);
4574
1.56M
                    if (*ctxt->space == -1)
4575
337k
                        *ctxt->space = -2;
4576
1.56M
                }
4577
2.29M
                line = ctxt->input->line;
4578
2.29M
                col = ctxt->input->col;
4579
9.60M
            } else if (ctxt->sax != NULL) {
4580
9.60M
                if (ctxt->sax->characters != NULL)
4581
9.60M
                    ctxt->sax->characters(ctxt->userData,
4582
9.60M
                                          ctxt->input->cur, nbchar);
4583
9.60M
                line = ctxt->input->line;
4584
9.60M
                col = ctxt->input->col;
4585
9.60M
            }
4586
11.9M
        }
4587
16.7M
        ctxt->input->cur = in;
4588
16.7M
        if (*in == 0xD) {
4589
4.57M
            in++;
4590
4.57M
            if (*in == 0xA) {
4591
4.54M
                ctxt->input->cur = in;
4592
4.54M
                in++;
4593
4.54M
                ctxt->input->line++; ctxt->input->col = 1;
4594
4.54M
                continue; /* while */
4595
4.54M
            }
4596
32.3k
            in--;
4597
32.3k
        }
4598
12.2M
        if (*in == '<') {
4599
9.51M
            return;
4600
9.51M
        }
4601
2.73M
        if (*in == '&') {
4602
927k
            return;
4603
927k
        }
4604
1.80M
        SHRINK;
4605
1.80M
        GROW;
4606
1.80M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
1.80M
        in = ctxt->input->cur;
4609
6.35M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
6.35M
             (*in == 0x09) || (*in == 0x0a));
4611
1.82M
    ctxt->input->line = line;
4612
1.82M
    ctxt->input->col = col;
4613
1.82M
    xmlParseCharDataComplex(ctxt);
4614
1.82M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
1.82M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
1.82M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
1.82M
    int nbchar = 0;
4631
1.82M
    int cur, l;
4632
1.82M
    int count = 0;
4633
4634
1.82M
    SHRINK;
4635
1.82M
    GROW;
4636
1.82M
    cur = CUR_CHAR(l);
4637
33.2M
    while ((cur != '<') && /* checked */
4638
33.2M
           (cur != '&') &&
4639
33.2M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
31.4M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
5.86k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
5.86k
  }
4643
31.4M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
31.4M
  NEXTL(l);
4646
31.4M
  cur = CUR_CHAR(l);
4647
31.4M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
75.5k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
75.5k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
61.3k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
296
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
296
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
296
                                     buf, nbchar);
4658
61.0k
    } else {
4659
61.0k
        if (ctxt->sax->characters != NULL)
4660
61.0k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
61.0k
        if ((ctxt->sax->characters !=
4662
61.0k
             ctxt->sax->ignorableWhitespace) &&
4663
61.0k
      (*ctxt->space == -1))
4664
1.94k
      *ctxt->space = -2;
4665
61.0k
    }
4666
61.3k
      }
4667
75.5k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
75.5k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
75.5k
  }
4672
31.4M
  count++;
4673
31.4M
  if (count > 50) {
4674
427k
      SHRINK;
4675
427k
      GROW;
4676
427k
      count = 0;
4677
427k
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
427k
  }
4680
31.4M
    }
4681
1.82M
    if (nbchar != 0) {
4682
865k
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
865k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
733k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
1.63k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
1.63k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
732k
      } else {
4691
732k
    if (ctxt->sax->characters != NULL)
4692
732k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
732k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
732k
        (*ctxt->space == -1))
4695
69.1k
        *ctxt->space = -2;
4696
732k
      }
4697
733k
  }
4698
865k
    }
4699
1.82M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
1.28M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
1.28M
                          "PCDATA invalid Char value %d\n",
4703
1.28M
                    cur ? cur : CUR);
4704
1.28M
  NEXT;
4705
1.28M
    }
4706
1.82M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
549k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
549k
    xmlChar *URI = NULL;
4735
4736
549k
    SHRINK;
4737
4738
549k
    *publicID = NULL;
4739
549k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
296k
        SKIP(6);
4741
296k
  if (SKIP_BLANKS == 0) {
4742
999
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
999
                     "Space required after 'SYSTEM'\n");
4744
999
  }
4745
296k
  URI = xmlParseSystemLiteral(ctxt);
4746
296k
  if (URI == NULL) {
4747
1.49k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
1.49k
        }
4749
296k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
68.3k
        SKIP(6);
4751
68.3k
  if (SKIP_BLANKS == 0) {
4752
1.91k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
1.91k
        "Space required after 'PUBLIC'\n");
4754
1.91k
  }
4755
68.3k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
68.3k
  if (*publicID == NULL) {
4757
1.84k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
1.84k
  }
4759
68.3k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
53.8k
      if (SKIP_BLANKS == 0) {
4764
8.05k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
8.05k
      "Space required after the Public Identifier\n");
4766
8.05k
      }
4767
53.8k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
14.4k
      if (SKIP_BLANKS == 0) return(NULL);
4775
2.12k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
2.12k
  }
4777
55.4k
  URI = xmlParseSystemLiteral(ctxt);
4778
55.4k
  if (URI == NULL) {
4779
9.02k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
9.02k
        }
4781
55.4k
    }
4782
536k
    return(URI);
4783
549k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
390k
                       size_t len, size_t size) {
4802
390k
    int q, ql;
4803
390k
    int r, rl;
4804
390k
    int cur, l;
4805
390k
    size_t count = 0;
4806
390k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
308k
                       XML_MAX_HUGE_LENGTH :
4808
390k
                       XML_MAX_TEXT_LENGTH;
4809
390k
    int inputid;
4810
4811
390k
    inputid = ctxt->input->id;
4812
4813
390k
    if (buf == NULL) {
4814
63.0k
        len = 0;
4815
63.0k
  size = XML_PARSER_BUFFER_SIZE;
4816
63.0k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
63.0k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
63.0k
    }
4822
390k
    GROW; /* Assure there's enough input data */
4823
390k
    q = CUR_CHAR(ql);
4824
390k
    if (q == 0)
4825
248k
        goto not_terminated;
4826
142k
    if (!IS_CHAR(q)) {
4827
7.86k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
7.86k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
7.86k
                    q);
4830
7.86k
  xmlFree (buf);
4831
7.86k
  return;
4832
7.86k
    }
4833
134k
    NEXTL(ql);
4834
134k
    r = CUR_CHAR(rl);
4835
134k
    if (r == 0)
4836
2.78k
        goto not_terminated;
4837
131k
    if (!IS_CHAR(r)) {
4838
1.19k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
1.19k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
1.19k
                    r);
4841
1.19k
  xmlFree (buf);
4842
1.19k
  return;
4843
1.19k
    }
4844
130k
    NEXTL(rl);
4845
130k
    cur = CUR_CHAR(l);
4846
130k
    if (cur == 0)
4847
2.13k
        goto not_terminated;
4848
33.1M
    while (IS_CHAR(cur) && /* checked */
4849
33.1M
           ((cur != '>') ||
4850
33.1M
      (r != '-') || (q != '-'))) {
4851
33.0M
  if ((r == '-') && (q == '-')) {
4852
22.4k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
22.4k
  }
4854
33.0M
  if (len + 5 >= size) {
4855
52.5k
      xmlChar *new_buf;
4856
52.5k
            size_t new_size;
4857
4858
52.5k
      new_size = size * 2;
4859
52.5k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
52.5k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
52.5k
      buf = new_buf;
4866
52.5k
            size = new_size;
4867
52.5k
  }
4868
33.0M
  COPY_BUF(ql,buf,len,q);
4869
33.0M
  q = r;
4870
33.0M
  ql = rl;
4871
33.0M
  r = cur;
4872
33.0M
  rl = l;
4873
4874
33.0M
  count++;
4875
33.0M
  if (count > 50) {
4876
617k
      SHRINK;
4877
617k
      GROW;
4878
617k
      count = 0;
4879
617k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
617k
  }
4884
33.0M
  NEXTL(l);
4885
33.0M
  cur = CUR_CHAR(l);
4886
33.0M
  if (cur == 0) {
4887
22.0k
      SHRINK;
4888
22.0k
      GROW;
4889
22.0k
      cur = CUR_CHAR(l);
4890
22.0k
  }
4891
4892
33.0M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
33.0M
    }
4899
128k
    buf[len] = 0;
4900
128k
    if (cur == 0) {
4901
22.0k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
22.0k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
105k
    } else if (!IS_CHAR(cur)) {
4904
5.21k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
5.21k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
5.21k
                    cur);
4907
100k
    } else {
4908
100k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
100k
        NEXT;
4914
100k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
100k
      (!ctxt->disableSAX))
4916
89.5k
      ctxt->sax->comment(ctxt->userData, buf);
4917
100k
    }
4918
128k
    xmlFree(buf);
4919
128k
    return;
4920
253k
not_terminated:
4921
253k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
253k
       "Comment not terminated\n", NULL);
4923
253k
    xmlFree(buf);
4924
253k
    return;
4925
128k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
28.3M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
28.3M
    xmlChar *buf = NULL;
4943
28.3M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
28.3M
    size_t len = 0;
4945
28.3M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
5.74M
                       XML_MAX_HUGE_LENGTH :
4947
28.3M
                       XML_MAX_TEXT_LENGTH;
4948
28.3M
    xmlParserInputState state;
4949
28.3M
    const xmlChar *in;
4950
28.3M
    size_t nbchar = 0;
4951
28.3M
    int ccol;
4952
28.3M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
28.3M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
28.3M
    SKIP(2);
4960
28.3M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
197
        return;
4962
28.3M
    state = ctxt->instate;
4963
28.3M
    ctxt->instate = XML_PARSER_COMMENT;
4964
28.3M
    inputid = ctxt->input->id;
4965
28.3M
    SKIP(2);
4966
28.3M
    SHRINK;
4967
28.3M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
28.3M
    in = ctxt->input->cur;
4974
28.3M
    do {
4975
28.3M
  if (*in == 0xA) {
4976
107k
      do {
4977
107k
    ctxt->input->line++; ctxt->input->col = 1;
4978
107k
    in++;
4979
107k
      } while (*in == 0xA);
4980
102k
  }
4981
33.0M
get_more:
4982
33.0M
        ccol = ctxt->input->col;
4983
216M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
216M
         ((*in >= 0x20) && (*in < '-')) ||
4985
216M
         (*in == 0x09)) {
4986
183M
        in++;
4987
183M
        ccol++;
4988
183M
  }
4989
33.0M
  ctxt->input->col = ccol;
4990
33.0M
  if (*in == 0xA) {
4991
1.15M
      do {
4992
1.15M
    ctxt->input->line++; ctxt->input->col = 1;
4993
1.15M
    in++;
4994
1.15M
      } while (*in == 0xA);
4995
1.09M
      goto get_more;
4996
1.09M
  }
4997
31.9M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
31.9M
  if (nbchar > 0) {
5002
4.66M
      if ((ctxt->sax != NULL) &&
5003
4.66M
    (ctxt->sax->comment != NULL)) {
5004
4.66M
    if (buf == NULL) {
5005
1.15M
        if ((*in == '-') && (in[1] == '-'))
5006
467k
            size = nbchar + 1;
5007
689k
        else
5008
689k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
1.15M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
1.15M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
1.15M
        len = 0;
5016
3.50M
    } else if (len + nbchar + 1 >= size) {
5017
440k
        xmlChar *new_buf;
5018
440k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
440k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
440k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
440k
        buf = new_buf;
5027
440k
    }
5028
4.66M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
4.66M
    len += nbchar;
5030
4.66M
    buf[len] = 0;
5031
4.66M
      }
5032
4.66M
  }
5033
31.9M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
31.9M
  ctxt->input->cur = in;
5040
31.9M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
31.9M
  if (*in == 0xD) {
5045
2.32M
      in++;
5046
2.32M
      if (*in == 0xA) {
5047
2.31M
    ctxt->input->cur = in;
5048
2.31M
    in++;
5049
2.31M
    ctxt->input->line++; ctxt->input->col = 1;
5050
2.31M
    goto get_more;
5051
2.31M
      }
5052
7.31k
      in--;
5053
7.31k
  }
5054
29.6M
  SHRINK;
5055
29.6M
  GROW;
5056
29.6M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
29.6M
  in = ctxt->input->cur;
5061
29.6M
  if (*in == '-') {
5062
29.2M
      if (in[1] == '-') {
5063
28.0M
          if (in[2] == '>') {
5064
28.0M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
28.0M
        SKIP(3);
5070
28.0M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
28.0M
            (!ctxt->disableSAX)) {
5072
25.5M
      if (buf != NULL)
5073
730k
          ctxt->sax->comment(ctxt->userData, buf);
5074
24.8M
      else
5075
24.8M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
25.5M
        }
5077
28.0M
        if (buf != NULL)
5078
829k
            xmlFree(buf);
5079
28.0M
        if (ctxt->instate != XML_PARSER_EOF)
5080
28.0M
      ctxt->instate = state;
5081
28.0M
        return;
5082
28.0M
    }
5083
58.2k
    if (buf != NULL) {
5084
32.9k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
32.9k
                          "Double hyphen within comment: "
5086
32.9k
                                      "<!--%.50s\n",
5087
32.9k
              buf);
5088
32.9k
    } else
5089
25.3k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
25.3k
                          "Double hyphen within comment\n", NULL);
5091
58.2k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
58.2k
    in++;
5096
58.2k
    ctxt->input->col++;
5097
58.2k
      }
5098
1.26M
      in++;
5099
1.26M
      ctxt->input->col++;
5100
1.26M
      goto get_more;
5101
29.2M
  }
5102
29.6M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
390k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
390k
    ctxt->instate = state;
5105
390k
    return;
5106
28.3M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
188k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
188k
    const xmlChar *name;
5125
5126
188k
    name = xmlParseName(ctxt);
5127
188k
    if ((name != NULL) &&
5128
188k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
188k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
188k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
54.1k
  int i;
5132
54.1k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
54.1k
      (name[2] == 'l') && (name[3] == 0)) {
5134
9.72k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
9.72k
     "XML declaration allowed only at the start of the document\n");
5136
9.72k
      return(name);
5137
44.4k
  } else if (name[3] == 0) {
5138
3.16k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
3.16k
      return(name);
5140
3.16k
  }
5141
88.1k
  for (i = 0;;i++) {
5142
88.1k
      if (xmlW3CPIs[i] == NULL) break;
5143
64.8k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
17.9k
          return(name);
5145
64.8k
  }
5146
23.3k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
23.3k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
23.3k
          NULL, NULL);
5149
23.3k
    }
5150
157k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
2.92k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
2.92k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
2.92k
    }
5154
157k
    return(name);
5155
188k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
152
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
152
    xmlChar *URL = NULL;
5176
152
    const xmlChar *tmp, *base;
5177
152
    xmlChar marker;
5178
5179
152
    tmp = catalog;
5180
152
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
152
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
31
  goto error;
5183
121
    tmp += 7;
5184
954
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
121
    if (*tmp != '=') {
5186
33
  return;
5187
33
    }
5188
88
    tmp++;
5189
435
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
88
    marker = *tmp;
5191
88
    if ((marker != '\'') && (marker != '"'))
5192
33
  goto error;
5193
55
    tmp++;
5194
55
    base = tmp;
5195
460
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
55
    if (*tmp == 0)
5197
3
  goto error;
5198
52
    URL = xmlStrndup(base, tmp - base);
5199
52
    tmp++;
5200
195
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
52
    if (*tmp != 0)
5202
49
  goto error;
5203
5204
3
    if (URL != NULL) {
5205
3
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
3
  xmlFree(URL);
5207
3
    }
5208
3
    return;
5209
5210
116
error:
5211
116
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
116
            "Catalog PI syntax error: %s\n",
5213
116
      catalog, NULL);
5214
116
    if (URL != NULL)
5215
49
  xmlFree(URL);
5216
116
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
188k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
188k
    xmlChar *buf = NULL;
5235
188k
    size_t len = 0;
5236
188k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
188k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
78.5k
                       XML_MAX_HUGE_LENGTH :
5239
188k
                       XML_MAX_TEXT_LENGTH;
5240
188k
    int cur, l;
5241
188k
    const xmlChar *target;
5242
188k
    xmlParserInputState state;
5243
188k
    int count = 0;
5244
5245
188k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
188k
  int inputid = ctxt->input->id;
5247
188k
  state = ctxt->instate;
5248
188k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
188k
  SKIP(2);
5253
188k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
188k
        target = xmlParsePITarget(ctxt);
5260
188k
  if (target != NULL) {
5261
175k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
30.6k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
30.6k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
30.6k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
30.6k
        (ctxt->sax->processingInstruction != NULL))
5274
22.5k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
22.5k
                                         target, NULL);
5276
30.6k
    if (ctxt->instate != XML_PARSER_EOF)
5277
30.6k
        ctxt->instate = state;
5278
30.6k
    return;
5279
30.6k
      }
5280
145k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
145k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
145k
      if (SKIP_BLANKS == 0) {
5287
31.5k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
31.5k
        "ParsePI: PI %s space expected\n", target);
5289
31.5k
      }
5290
145k
      cur = CUR_CHAR(l);
5291
39.0M
      while (IS_CHAR(cur) && /* checked */
5292
39.0M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
38.8M
    if (len + 5 >= size) {
5294
50.5k
        xmlChar *tmp;
5295
50.5k
                    size_t new_size = size * 2;
5296
50.5k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
50.5k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
50.5k
        buf = tmp;
5304
50.5k
                    size = new_size;
5305
50.5k
    }
5306
38.8M
    count++;
5307
38.8M
    if (count > 50) {
5308
718k
        SHRINK;
5309
718k
        GROW;
5310
718k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
718k
        count = 0;
5315
718k
    }
5316
38.8M
    COPY_BUF(l,buf,len,cur);
5317
38.8M
    NEXTL(l);
5318
38.8M
    cur = CUR_CHAR(l);
5319
38.8M
    if (cur == 0) {
5320
21.1k
        SHRINK;
5321
21.1k
        GROW;
5322
21.1k
        cur = CUR_CHAR(l);
5323
21.1k
    }
5324
38.8M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
38.8M
      }
5332
145k
      buf[len] = 0;
5333
145k
      if (cur != '?') {
5334
27.8k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
27.8k
          "ParsePI: PI %s never end ...\n", target);
5336
117k
      } else {
5337
117k
    if (inputid != ctxt->input->id) {
5338
553
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
553
                             "PI declaration doesn't start and stop in"
5340
553
                                   " the same entity\n");
5341
553
    }
5342
117k
    SKIP(2);
5343
5344
117k
#ifdef LIBXML_CATALOG_ENABLED
5345
117k
    if (((state == XML_PARSER_MISC) ||
5346
117k
               (state == XML_PARSER_START)) &&
5347
117k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
152
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
152
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
152
      (allow == XML_CATA_ALLOW_ALL))
5351
152
      xmlParseCatalogPI(ctxt, buf);
5352
152
    }
5353
117k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
117k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
117k
        (ctxt->sax->processingInstruction != NULL))
5361
93.1k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
93.1k
                                         target, buf);
5363
117k
      }
5364
145k
      xmlFree(buf);
5365
145k
  } else {
5366
12.1k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
12.1k
  }
5368
157k
  if (ctxt->instate != XML_PARSER_EOF)
5369
157k
      ctxt->instate = state;
5370
157k
    }
5371
188k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
28.8k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
28.8k
    const xmlChar *name;
5394
28.8k
    xmlChar *Pubid;
5395
28.8k
    xmlChar *Systemid;
5396
5397
28.8k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
28.8k
    SKIP(2);
5400
5401
28.8k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
28.2k
  int inputid = ctxt->input->id;
5403
28.2k
  SHRINK;
5404
28.2k
  SKIP(8);
5405
28.2k
  if (SKIP_BLANKS == 0) {
5406
740
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
740
         "Space required after '<!NOTATION'\n");
5408
740
      return;
5409
740
  }
5410
5411
27.5k
        name = xmlParseName(ctxt);
5412
27.5k
  if (name == NULL) {
5413
1.50k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
1.50k
      return;
5415
1.50k
  }
5416
26.0k
  if (xmlStrchr(name, ':') != NULL) {
5417
1.06k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
1.06k
         "colons are forbidden from notation names '%s'\n",
5419
1.06k
         name, NULL, NULL);
5420
1.06k
  }
5421
26.0k
  if (SKIP_BLANKS == 0) {
5422
2.24k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
2.24k
         "Space required after the NOTATION name'\n");
5424
2.24k
      return;
5425
2.24k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
23.7k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
23.7k
  SKIP_BLANKS;
5432
5433
23.7k
  if (RAW == '>') {
5434
17.6k
      if (inputid != ctxt->input->id) {
5435
11
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
11
                         "Notation declaration doesn't start and stop"
5437
11
                               " in the same entity\n");
5438
11
      }
5439
17.6k
      NEXT;
5440
17.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
17.6k
    (ctxt->sax->notationDecl != NULL))
5442
13.8k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
17.6k
  } else {
5444
6.12k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
6.12k
  }
5446
23.7k
  if (Systemid != NULL) xmlFree(Systemid);
5447
23.7k
  if (Pubid != NULL) xmlFree(Pubid);
5448
23.7k
    }
5449
28.8k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
1.09M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
1.09M
    const xmlChar *name = NULL;
5478
1.09M
    xmlChar *value = NULL;
5479
1.09M
    xmlChar *URI = NULL, *literal = NULL;
5480
1.09M
    const xmlChar *ndata = NULL;
5481
1.09M
    int isParameter = 0;
5482
1.09M
    xmlChar *orig = NULL;
5483
5484
1.09M
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
1.09M
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
1.09M
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
1.09M
  int inputid = ctxt->input->id;
5491
1.09M
  SHRINK;
5492
1.09M
  SKIP(6);
5493
1.09M
  if (SKIP_BLANKS == 0) {
5494
5.85k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
5.85k
         "Space required after '<!ENTITY'\n");
5496
5.85k
  }
5497
5498
1.09M
  if (RAW == '%') {
5499
503k
      NEXT;
5500
503k
      if (SKIP_BLANKS == 0) {
5501
802
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
802
             "Space required after '%%'\n");
5503
802
      }
5504
503k
      isParameter = 1;
5505
503k
  }
5506
5507
1.09M
        name = xmlParseName(ctxt);
5508
1.09M
  if (name == NULL) {
5509
6.08k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
6.08k
                     "xmlParseEntityDecl: no name\n");
5511
6.08k
            return;
5512
6.08k
  }
5513
1.08M
  if (xmlStrchr(name, ':') != NULL) {
5514
1.82k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
1.82k
         "colons are forbidden from entities names '%s'\n",
5516
1.82k
         name, NULL, NULL);
5517
1.82k
  }
5518
1.08M
  if (SKIP_BLANKS == 0) {
5519
5.19k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
5.19k
         "Space required after the entity name\n");
5521
5.19k
  }
5522
5523
1.08M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
1.08M
  if (isParameter) {
5528
502k
      if ((RAW == '"') || (RAW == '\'')) {
5529
451k
          value = xmlParseEntityValue(ctxt, &orig);
5530
451k
    if (value) {
5531
441k
        if ((ctxt->sax != NULL) &&
5532
441k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
386k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
386k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
386k
            NULL, NULL, value);
5536
441k
    }
5537
451k
      } else {
5538
50.3k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
50.3k
    if ((URI == NULL) && (literal == NULL)) {
5540
1.41k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
1.41k
    }
5542
50.3k
    if (URI) {
5543
47.8k
        xmlURIPtr uri;
5544
5545
47.8k
        uri = xmlParseURI((const char *) URI);
5546
47.8k
        if (uri == NULL) {
5547
1.66k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
1.66k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
46.1k
        } else {
5555
46.1k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
221
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
45.9k
      } else {
5562
45.9k
          if ((ctxt->sax != NULL) &&
5563
45.9k
        (!ctxt->disableSAX) &&
5564
45.9k
        (ctxt->sax->entityDecl != NULL))
5565
43.9k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
43.9k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
43.9k
              literal, URI, NULL);
5568
45.9k
      }
5569
46.1k
      xmlFreeURI(uri);
5570
46.1k
        }
5571
47.8k
    }
5572
50.3k
      }
5573
582k
  } else {
5574
582k
      if ((RAW == '"') || (RAW == '\'')) {
5575
398k
          value = xmlParseEntityValue(ctxt, &orig);
5576
398k
    if ((ctxt->sax != NULL) &&
5577
398k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
361k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
361k
        XML_INTERNAL_GENERAL_ENTITY,
5580
361k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
398k
    if ((ctxt->myDoc == NULL) ||
5585
398k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
4.13k
        if (ctxt->myDoc == NULL) {
5587
287
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
287
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
287
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
287
        }
5594
4.13k
        if (ctxt->myDoc->intSubset == NULL)
5595
287
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
287
              BAD_CAST "fake", NULL, NULL);
5597
5598
4.13k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
4.13k
                    NULL, NULL, value);
5600
4.13k
    }
5601
398k
      } else {
5602
184k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
184k
    if ((URI == NULL) && (literal == NULL)) {
5604
6.81k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
6.81k
    }
5606
184k
    if (URI) {
5607
171k
        xmlURIPtr uri;
5608
5609
171k
        uri = xmlParseURI((const char *)URI);
5610
171k
        if (uri == NULL) {
5611
6.50k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
6.50k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
165k
        } else {
5619
165k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
2.14k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
2.14k
      }
5626
165k
      xmlFreeURI(uri);
5627
165k
        }
5628
171k
    }
5629
184k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
8.45k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
8.45k
           "Space required before 'NDATA'\n");
5632
8.45k
    }
5633
184k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
13.4k
        SKIP(5);
5635
13.4k
        if (SKIP_BLANKS == 0) {
5636
1.02k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
1.02k
               "Space required after 'NDATA'\n");
5638
1.02k
        }
5639
13.4k
        ndata = xmlParseName(ctxt);
5640
13.4k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
13.4k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
11.9k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
11.9k
            literal, URI, ndata);
5644
170k
    } else {
5645
170k
        if ((ctxt->sax != NULL) &&
5646
170k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
160k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
160k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
160k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
170k
        if ((ctxt->replaceEntities != 0) &&
5655
170k
      ((ctxt->myDoc == NULL) ||
5656
95.9k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
844
      if (ctxt->myDoc == NULL) {
5658
136
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
136
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
136
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
136
      }
5665
5666
844
      if (ctxt->myDoc->intSubset == NULL)
5667
136
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
136
            BAD_CAST "fake", NULL, NULL);
5669
844
      xmlSAX2EntityDecl(ctxt, name,
5670
844
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
844
                  literal, URI, NULL);
5672
844
        }
5673
170k
    }
5674
184k
      }
5675
582k
  }
5676
1.08M
  if (ctxt->instate == XML_PARSER_EOF)
5677
353
      goto done;
5678
1.08M
  SKIP_BLANKS;
5679
1.08M
  if (RAW != '>') {
5680
14.7k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
14.7k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
14.7k
      xmlHaltParser(ctxt);
5683
1.06M
  } else {
5684
1.06M
      if (inputid != ctxt->input->id) {
5685
179
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
179
                         "Entity declaration doesn't start and stop in"
5687
179
                               " the same entity\n");
5688
179
      }
5689
1.06M
      NEXT;
5690
1.06M
  }
5691
1.08M
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
834k
      xmlEntityPtr cur = NULL;
5696
5697
834k
      if (isParameter) {
5698
442k
          if ((ctxt->sax != NULL) &&
5699
442k
        (ctxt->sax->getParameterEntity != NULL))
5700
442k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
442k
      } else {
5702
391k
          if ((ctxt->sax != NULL) &&
5703
391k
        (ctxt->sax->getEntity != NULL))
5704
391k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
391k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
27.9k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
27.9k
    }
5708
391k
      }
5709
834k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
614k
    cur->orig = orig;
5711
614k
                orig = NULL;
5712
614k
      }
5713
834k
  }
5714
5715
1.08M
done:
5716
1.08M
  if (value != NULL) xmlFree(value);
5717
1.08M
  if (URI != NULL) xmlFree(URI);
5718
1.08M
  if (literal != NULL) xmlFree(literal);
5719
1.08M
        if (orig != NULL) xmlFree(orig);
5720
1.08M
    }
5721
1.09M
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
4.12M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
4.12M
    int val;
5757
4.12M
    xmlChar *ret;
5758
5759
4.12M
    *value = NULL;
5760
4.12M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
278k
  SKIP(9);
5762
278k
  return(XML_ATTRIBUTE_REQUIRED);
5763
278k
    }
5764
3.84M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
3.52M
  SKIP(8);
5766
3.52M
  return(XML_ATTRIBUTE_IMPLIED);
5767
3.52M
    }
5768
317k
    val = XML_ATTRIBUTE_NONE;
5769
317k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
124k
  SKIP(6);
5771
124k
  val = XML_ATTRIBUTE_FIXED;
5772
124k
  if (SKIP_BLANKS == 0) {
5773
345
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
345
         "Space required after '#FIXED'\n");
5775
345
  }
5776
124k
    }
5777
317k
    ret = xmlParseAttValue(ctxt);
5778
317k
    ctxt->instate = XML_PARSER_DTD;
5779
317k
    if (ret == NULL) {
5780
6.16k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
6.16k
           "Attribute default value declaration error\n");
5782
6.16k
    } else
5783
311k
        *value = ret;
5784
317k
    return(val);
5785
3.84M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
11.1k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
11.1k
    const xmlChar *name;
5809
11.1k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
11.1k
    if (RAW != '(') {
5812
579
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
579
  return(NULL);
5814
579
    }
5815
10.5k
    SHRINK;
5816
28.0k
    do {
5817
28.0k
        NEXT;
5818
28.0k
  SKIP_BLANKS;
5819
28.0k
        name = xmlParseName(ctxt);
5820
28.0k
  if (name == NULL) {
5821
466
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
466
         "Name expected in NOTATION declaration\n");
5823
466
            xmlFreeEnumeration(ret);
5824
466
      return(NULL);
5825
466
  }
5826
27.5k
  tmp = ret;
5827
84.2k
  while (tmp != NULL) {
5828
60.4k
      if (xmlStrEqual(name, tmp->name)) {
5829
3.79k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
3.79k
    "standalone: attribute notation value token %s duplicated\n",
5831
3.79k
         name, NULL);
5832
3.79k
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
3.79k
    break;
5835
3.79k
      }
5836
56.7k
      tmp = tmp->next;
5837
56.7k
  }
5838
27.5k
  if (tmp == NULL) {
5839
23.7k
      cur = xmlCreateEnumeration(name);
5840
23.7k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
23.7k
      if (last == NULL) ret = last = cur;
5845
13.4k
      else {
5846
13.4k
    last->next = cur;
5847
13.4k
    last = cur;
5848
13.4k
      }
5849
23.7k
  }
5850
27.5k
  SKIP_BLANKS;
5851
27.5k
    } while (RAW == '|');
5852
10.1k
    if (RAW != ')') {
5853
4.29k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
4.29k
        xmlFreeEnumeration(ret);
5855
4.29k
  return(NULL);
5856
4.29k
    }
5857
5.82k
    NEXT;
5858
5.82k
    return(ret);
5859
10.1k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
483k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
483k
    xmlChar *name;
5881
483k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
483k
    if (RAW != '(') {
5884
8.70k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
8.70k
  return(NULL);
5886
8.70k
    }
5887
474k
    SHRINK;
5888
1.46M
    do {
5889
1.46M
        NEXT;
5890
1.46M
  SKIP_BLANKS;
5891
1.46M
        name = xmlParseNmtoken(ctxt);
5892
1.46M
  if (name == NULL) {
5893
1.66k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
1.66k
      return(ret);
5895
1.66k
  }
5896
1.46M
  tmp = ret;
5897
3.96M
  while (tmp != NULL) {
5898
2.50M
      if (xmlStrEqual(name, tmp->name)) {
5899
1.86k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
1.86k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
1.86k
         name, NULL);
5902
1.86k
    if (!xmlDictOwns(ctxt->dict, name))
5903
1.86k
        xmlFree(name);
5904
1.86k
    break;
5905
1.86k
      }
5906
2.50M
      tmp = tmp->next;
5907
2.50M
  }
5908
1.46M
  if (tmp == NULL) {
5909
1.46M
      cur = xmlCreateEnumeration(name);
5910
1.46M
      if (!xmlDictOwns(ctxt->dict, name))
5911
1.46M
    xmlFree(name);
5912
1.46M
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
1.46M
      if (last == NULL) ret = last = cur;
5917
986k
      else {
5918
986k
    last->next = cur;
5919
986k
    last = cur;
5920
986k
      }
5921
1.46M
  }
5922
1.46M
  SKIP_BLANKS;
5923
1.46M
    } while (RAW == '|');
5924
473k
    if (RAW != ')') {
5925
3.45k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
3.45k
  return(ret);
5927
3.45k
    }
5928
469k
    NEXT;
5929
469k
    return(ret);
5930
473k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
495k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
495k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
11.4k
  SKIP(8);
5953
11.4k
  if (SKIP_BLANKS == 0) {
5954
266
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
266
         "Space required after 'NOTATION'\n");
5956
266
      return(0);
5957
266
  }
5958
11.1k
  *tree = xmlParseNotationType(ctxt);
5959
11.1k
  if (*tree == NULL) return(0);
5960
5.82k
  return(XML_ATTRIBUTE_NOTATION);
5961
11.1k
    }
5962
483k
    *tree = xmlParseEnumerationType(ctxt);
5963
483k
    if (*tree == NULL) return(0);
5964
474k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
483k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
4.14M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
4.14M
    SHRINK;
6017
4.14M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
1.76M
  SKIP(5);
6019
1.76M
  return(XML_ATTRIBUTE_CDATA);
6020
2.38M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
13.2k
  SKIP(6);
6022
13.2k
  return(XML_ATTRIBUTE_IDREFS);
6023
2.36M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
37.4k
  SKIP(5);
6025
37.4k
  return(XML_ATTRIBUTE_IDREF);
6026
2.33M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
1.00M
        SKIP(2);
6028
1.00M
  return(XML_ATTRIBUTE_ID);
6029
1.32M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
17.2k
  SKIP(6);
6031
17.2k
  return(XML_ATTRIBUTE_ENTITY);
6032
1.31M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
1.78k
  SKIP(8);
6034
1.78k
  return(XML_ATTRIBUTE_ENTITIES);
6035
1.31M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
236k
  SKIP(8);
6037
236k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
1.07M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
579k
  SKIP(7);
6040
579k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
579k
     }
6042
495k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
4.14M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
1.29M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
1.29M
    const xmlChar *elemName;
6061
1.29M
    const xmlChar *attrName;
6062
1.29M
    xmlEnumerationPtr tree;
6063
6064
1.29M
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
1.29M
    SKIP(2);
6067
6068
1.29M
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
1.29M
  int inputid = ctxt->input->id;
6070
6071
1.29M
  SKIP(7);
6072
1.29M
  if (SKIP_BLANKS == 0) {
6073
2.92k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
2.92k
                     "Space required after '<!ATTLIST'\n");
6075
2.92k
  }
6076
1.29M
        elemName = xmlParseName(ctxt);
6077
1.29M
  if (elemName == NULL) {
6078
2.60k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
2.60k
         "ATTLIST: no name for Element\n");
6080
2.60k
      return;
6081
2.60k
  }
6082
1.29M
  SKIP_BLANKS;
6083
1.29M
  GROW;
6084
5.40M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
4.16M
      int type;
6086
4.16M
      int def;
6087
4.16M
      xmlChar *defaultValue = NULL;
6088
6089
4.16M
      GROW;
6090
4.16M
            tree = NULL;
6091
4.16M
      attrName = xmlParseName(ctxt);
6092
4.16M
      if (attrName == NULL) {
6093
11.3k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
11.3k
             "ATTLIST: no name for Attribute\n");
6095
11.3k
    break;
6096
11.3k
      }
6097
4.15M
      GROW;
6098
4.15M
      if (SKIP_BLANKS == 0) {
6099
5.66k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
5.66k
            "Space required after the attribute name\n");
6101
5.66k
    break;
6102
5.66k
      }
6103
6104
4.14M
      type = xmlParseAttributeType(ctxt, &tree);
6105
4.14M
      if (type <= 0) {
6106
15.0k
          break;
6107
15.0k
      }
6108
6109
4.12M
      GROW;
6110
4.12M
      if (SKIP_BLANKS == 0) {
6111
6.15k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
6.15k
             "Space required after the attribute type\n");
6113
6.15k
          if (tree != NULL)
6114
4.83k
        xmlFreeEnumeration(tree);
6115
6.15k
    break;
6116
6.15k
      }
6117
6118
4.12M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
4.12M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
4.12M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
149k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
4.12M
      GROW;
6130
4.12M
            if (RAW != '>') {
6131
3.93M
    if (SKIP_BLANKS == 0) {
6132
12.9k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
12.9k
      "Space required after the attribute default value\n");
6134
12.9k
        if (defaultValue != NULL)
6135
6.96k
      xmlFree(defaultValue);
6136
12.9k
        if (tree != NULL)
6137
2.08k
      xmlFreeEnumeration(tree);
6138
12.9k
        break;
6139
12.9k
    }
6140
3.93M
      }
6141
4.11M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
4.11M
    (ctxt->sax->attributeDecl != NULL))
6143
3.63M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
3.63M
                          type, def, defaultValue, tree);
6145
474k
      else if (tree != NULL)
6146
57.9k
    xmlFreeEnumeration(tree);
6147
6148
4.11M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
4.11M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
4.11M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
187k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
187k
      }
6153
4.11M
      if (ctxt->sax2) {
6154
2.53M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
2.53M
      }
6156
4.11M
      if (defaultValue != NULL)
6157
304k
          xmlFree(defaultValue);
6158
4.11M
      GROW;
6159
4.11M
  }
6160
1.29M
  if (RAW == '>') {
6161
1.24M
      if (inputid != ctxt->input->id) {
6162
480
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
480
                               "Attribute list declaration doesn't start and"
6164
480
                               " stop in the same entity\n");
6165
480
      }
6166
1.24M
      NEXT;
6167
1.24M
  }
6168
1.29M
    }
6169
1.29M
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
580k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
580k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
580k
    const xmlChar *elem = NULL;
6196
6197
580k
    GROW;
6198
580k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
580k
  SKIP(7);
6200
580k
  SKIP_BLANKS;
6201
580k
  SHRINK;
6202
580k
  if (RAW == ')') {
6203
336k
      if (ctxt->input->id != inputchk) {
6204
48
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
48
                               "Element content declaration doesn't start and"
6206
48
                               " stop in the same entity\n");
6207
48
      }
6208
336k
      NEXT;
6209
336k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
336k
      if (ret == NULL)
6211
0
          return(NULL);
6212
336k
      if (RAW == '*') {
6213
323
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
323
    NEXT;
6215
323
      }
6216
336k
      return(ret);
6217
336k
  }
6218
243k
  if ((RAW == '(') || (RAW == '|')) {
6219
242k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
242k
      if (ret == NULL) return(NULL);
6221
242k
  }
6222
2.59M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
2.35M
      NEXT;
6224
2.35M
      if (elem == NULL) {
6225
242k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
242k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
242k
    ret->c1 = cur;
6231
242k
    if (cur != NULL)
6232
242k
        cur->parent = ret;
6233
242k
    cur = ret;
6234
2.11M
      } else {
6235
2.11M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
2.11M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
2.11M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
2.11M
    if (n->c1 != NULL)
6242
2.11M
        n->c1->parent = n;
6243
2.11M
          cur->c2 = n;
6244
2.11M
    if (n != NULL)
6245
2.11M
        n->parent = cur;
6246
2.11M
    cur = n;
6247
2.11M
      }
6248
2.35M
      SKIP_BLANKS;
6249
2.35M
      elem = xmlParseName(ctxt);
6250
2.35M
      if (elem == NULL) {
6251
1.49k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
1.49k
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
1.49k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
1.49k
    return(NULL);
6255
1.49k
      }
6256
2.35M
      SKIP_BLANKS;
6257
2.35M
      GROW;
6258
2.35M
  }
6259
242k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
240k
      if (elem != NULL) {
6261
240k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
240k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
240k
    if (cur->c2 != NULL)
6264
240k
        cur->c2->parent = cur;
6265
240k
            }
6266
240k
            if (ret != NULL)
6267
240k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
240k
      if (ctxt->input->id != inputchk) {
6269
26
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
26
                               "Element content declaration doesn't start and"
6271
26
                               " stop in the same entity\n");
6272
26
      }
6273
240k
      SKIP(2);
6274
240k
  } else {
6275
2.42k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
2.42k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
2.42k
      return(NULL);
6278
2.42k
  }
6279
6280
242k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
240k
    return(ret);
6284
580k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
600k
                                       int depth) {
6321
600k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
600k
    const xmlChar *elem;
6323
600k
    xmlChar type = 0;
6324
6325
600k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
600k
        (depth >  2048)) {
6327
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
0
                          depth);
6330
0
  return(NULL);
6331
0
    }
6332
600k
    SKIP_BLANKS;
6333
600k
    GROW;
6334
600k
    if (RAW == '(') {
6335
35.8k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
35.8k
  NEXT;
6339
35.8k
  SKIP_BLANKS;
6340
35.8k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
35.8k
                                                           depth + 1);
6342
35.8k
        if (cur == NULL)
6343
4.36k
            return(NULL);
6344
31.5k
  SKIP_BLANKS;
6345
31.5k
  GROW;
6346
564k
    } else {
6347
564k
  elem = xmlParseName(ctxt);
6348
564k
  if (elem == NULL) {
6349
5.22k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
5.22k
      return(NULL);
6351
5.22k
  }
6352
559k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
559k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
559k
  GROW;
6358
559k
  if (RAW == '?') {
6359
71.0k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
71.0k
      NEXT;
6361
488k
  } else if (RAW == '*') {
6362
77.0k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
77.0k
      NEXT;
6364
411k
  } else if (RAW == '+') {
6365
64.3k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
64.3k
      NEXT;
6367
347k
  } else {
6368
347k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
347k
  }
6370
559k
  GROW;
6371
559k
    }
6372
591k
    SKIP_BLANKS;
6373
591k
    SHRINK;
6374
2.26M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
1.68M
        if (RAW == ',') {
6379
616k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
389k
      else if (type != CUR) {
6385
76
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
76
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
76
                      type);
6388
76
    if ((last != NULL) && (last != ret))
6389
76
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
76
    if (ret != NULL)
6391
76
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
76
    return(NULL);
6393
76
      }
6394
616k
      NEXT;
6395
6396
616k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
616k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
616k
      if (last == NULL) {
6404
227k
    op->c1 = ret;
6405
227k
    if (ret != NULL)
6406
227k
        ret->parent = op;
6407
227k
    ret = cur = op;
6408
389k
      } else {
6409
389k
          cur->c2 = op;
6410
389k
    if (op != NULL)
6411
389k
        op->parent = cur;
6412
389k
    op->c1 = last;
6413
389k
    if (last != NULL)
6414
389k
        last->parent = op;
6415
389k
    cur =op;
6416
389k
    last = NULL;
6417
389k
      }
6418
1.07M
  } else if (RAW == '|') {
6419
1.06M
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
902k
      else if (type != CUR) {
6425
74
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
74
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
74
          type);
6428
74
    if ((last != NULL) && (last != ret))
6429
74
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
74
    if (ret != NULL)
6431
74
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
74
    return(NULL);
6433
74
      }
6434
1.06M
      NEXT;
6435
6436
1.06M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
1.06M
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
1.06M
      if (last == NULL) {
6445
159k
    op->c1 = ret;
6446
159k
    if (ret != NULL)
6447
159k
        ret->parent = op;
6448
159k
    ret = cur = op;
6449
902k
      } else {
6450
902k
          cur->c2 = op;
6451
902k
    if (op != NULL)
6452
902k
        op->parent = cur;
6453
902k
    op->c1 = last;
6454
902k
    if (last != NULL)
6455
902k
        last->parent = op;
6456
902k
    cur =op;
6457
902k
    last = NULL;
6458
902k
      }
6459
1.06M
  } else {
6460
9.21k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
9.21k
      if ((last != NULL) && (last != ret))
6462
4.48k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
9.21k
      if (ret != NULL)
6464
9.21k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
9.21k
      return(NULL);
6466
9.21k
  }
6467
1.67M
  GROW;
6468
1.67M
  SKIP_BLANKS;
6469
1.67M
  GROW;
6470
1.67M
  if (RAW == '(') {
6471
75.7k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
75.7k
      NEXT;
6474
75.7k
      SKIP_BLANKS;
6475
75.7k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
75.7k
                                                          depth + 1);
6477
75.7k
            if (last == NULL) {
6478
1.38k
    if (ret != NULL)
6479
1.38k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
1.38k
    return(NULL);
6481
1.38k
            }
6482
74.3k
      SKIP_BLANKS;
6483
1.60M
  } else {
6484
1.60M
      elem = xmlParseName(ctxt);
6485
1.60M
      if (elem == NULL) {
6486
2.89k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
2.89k
    if (ret != NULL)
6488
2.89k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
2.89k
    return(NULL);
6490
2.89k
      }
6491
1.59M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
1.59M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
1.59M
      if (RAW == '?') {
6498
265k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
265k
    NEXT;
6500
1.33M
      } else if (RAW == '*') {
6501
179k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
179k
    NEXT;
6503
1.15M
      } else if (RAW == '+') {
6504
30.8k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
30.8k
    NEXT;
6506
1.12M
      } else {
6507
1.12M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
1.12M
      }
6509
1.59M
  }
6510
1.67M
  SKIP_BLANKS;
6511
1.67M
  GROW;
6512
1.67M
    }
6513
577k
    if ((cur != NULL) && (last != NULL)) {
6514
377k
        cur->c2 = last;
6515
377k
  if (last != NULL)
6516
377k
      last->parent = cur;
6517
377k
    }
6518
577k
    if (ctxt->input->id != inputchk) {
6519
637
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
637
                       "Element content declaration doesn't start and stop in"
6521
637
                       " the same entity\n");
6522
637
    }
6523
577k
    NEXT;
6524
577k
    if (RAW == '?') {
6525
24.2k
  if (ret != NULL) {
6526
24.2k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
24.2k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
21
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
24.2k
      else
6530
24.2k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
24.2k
  }
6532
24.2k
  NEXT;
6533
553k
    } else if (RAW == '*') {
6534
157k
  if (ret != NULL) {
6535
157k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
157k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
868k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
711k
    if ((cur->c1 != NULL) &&
6543
711k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
711k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
18.8k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
711k
    if ((cur->c2 != NULL) &&
6547
711k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
711k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
3.11k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
711k
    cur = cur->c2;
6551
711k
      }
6552
157k
  }
6553
157k
  NEXT;
6554
395k
    } else if (RAW == '+') {
6555
70.8k
  if (ret != NULL) {
6556
70.8k
      int found = 0;
6557
6558
70.8k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
70.8k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
51
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
70.7k
      else
6562
70.7k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
112k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
41.9k
    if ((cur->c1 != NULL) &&
6570
41.9k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
41.9k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
505
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
505
        found = 1;
6574
505
    }
6575
41.9k
    if ((cur->c2 != NULL) &&
6576
41.9k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
41.9k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
413
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
413
        found = 1;
6580
413
    }
6581
41.9k
    cur = cur->c2;
6582
41.9k
      }
6583
70.8k
      if (found)
6584
548
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
70.8k
  }
6586
70.8k
  NEXT;
6587
70.8k
    }
6588
577k
    return(ret);
6589
591k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
1.06M
                           xmlElementContentPtr *result) {
6648
6649
1.06M
    xmlElementContentPtr tree = NULL;
6650
1.06M
    int inputid = ctxt->input->id;
6651
1.06M
    int res;
6652
6653
1.06M
    *result = NULL;
6654
6655
1.06M
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
1.06M
    NEXT;
6661
1.06M
    GROW;
6662
1.06M
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
1.06M
    SKIP_BLANKS;
6665
1.06M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
580k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
580k
  res = XML_ELEMENT_TYPE_MIXED;
6668
580k
    } else {
6669
489k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
489k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
489k
    }
6672
1.06M
    SKIP_BLANKS;
6673
1.06M
    *result = tree;
6674
1.06M
    return(res);
6675
1.06M
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
1.48M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
1.48M
    const xmlChar *name;
6695
1.48M
    int ret = -1;
6696
1.48M
    xmlElementContentPtr content  = NULL;
6697
6698
1.48M
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
1.48M
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
1.48M
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
1.48M
  int inputid = ctxt->input->id;
6705
6706
1.48M
  SKIP(7);
6707
1.48M
  if (SKIP_BLANKS == 0) {
6708
1.32k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
1.32k
               "Space required after 'ELEMENT'\n");
6710
1.32k
      return(-1);
6711
1.32k
  }
6712
1.48M
        name = xmlParseName(ctxt);
6713
1.48M
  if (name == NULL) {
6714
2.49k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
2.49k
         "xmlParseElementDecl: no name for Element\n");
6716
2.49k
      return(-1);
6717
2.49k
  }
6718
1.47M
  if (SKIP_BLANKS == 0) {
6719
4.72k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
4.72k
         "Space required after the element name\n");
6721
4.72k
  }
6722
1.47M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
394k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
394k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
1.08M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
1.08M
             (NXT(2) == 'Y')) {
6730
6.12k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
6.12k
      ret = XML_ELEMENT_TYPE_ANY;
6735
1.07M
  } else if (RAW == '(') {
6736
1.06M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
1.06M
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
7.95k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
7.95k
          (ctxt->inputNr == 1)) {
6743
624
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
624
    "PEReference: forbidden within markup decl in internal subset\n");
6745
7.33k
      } else {
6746
7.33k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
7.33k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
7.33k
            }
6749
7.95k
      return(-1);
6750
7.95k
  }
6751
6752
1.47M
  SKIP_BLANKS;
6753
6754
1.47M
  if (RAW != '>') {
6755
18.5k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
18.5k
      if (content != NULL) {
6757
2.39k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
2.39k
      }
6759
1.45M
  } else {
6760
1.45M
      if (inputid != ctxt->input->id) {
6761
152
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
152
                               "Element declaration doesn't start and stop in"
6763
152
                               " the same entity\n");
6764
152
      }
6765
6766
1.45M
      NEXT;
6767
1.45M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
1.45M
    (ctxt->sax->elementDecl != NULL)) {
6769
1.29M
    if (content != NULL)
6770
937k
        content->parent = NULL;
6771
1.29M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
1.29M
                           content);
6773
1.29M
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
194k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
194k
    }
6782
1.29M
      } else if (content != NULL) {
6783
107k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
107k
      }
6785
1.45M
  }
6786
1.47M
    }
6787
1.47M
    return(ret);
6788
1.48M
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
7.42k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
7.42k
    int *inputIds = NULL;
6806
7.42k
    size_t inputIdsSize = 0;
6807
7.42k
    size_t depth = 0;
6808
6809
53.9k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
53.7k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
24.4k
            int id = ctxt->input->id;
6812
6813
24.4k
            SKIP(3);
6814
24.4k
            SKIP_BLANKS;
6815
6816
24.4k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
20.5k
                SKIP(7);
6818
20.5k
                SKIP_BLANKS;
6819
20.5k
                if (RAW != '[') {
6820
48
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
48
                    xmlHaltParser(ctxt);
6822
48
                    goto error;
6823
48
                }
6824
20.4k
                if (ctxt->input->id != id) {
6825
52
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
52
                                   "All markup of the conditional section is"
6827
52
                                   " not in the same entity\n");
6828
52
                }
6829
20.4k
                NEXT;
6830
6831
20.4k
                if (inputIdsSize <= depth) {
6832
5.85k
                    int *tmp;
6833
6834
5.85k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
5.85k
                    tmp = (int *) xmlRealloc(inputIds,
6836
5.85k
                            inputIdsSize * sizeof(int));
6837
5.85k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
5.85k
                    inputIds = tmp;
6842
5.85k
                }
6843
20.4k
                inputIds[depth] = id;
6844
20.4k
                depth++;
6845
20.4k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
3.14k
                size_t ignoreDepth = 0;
6847
6848
3.14k
                SKIP(6);
6849
3.14k
                SKIP_BLANKS;
6850
3.14k
                if (RAW != '[') {
6851
78
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
78
                    xmlHaltParser(ctxt);
6853
78
                    goto error;
6854
78
                }
6855
3.06k
                if (ctxt->input->id != id) {
6856
129
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
129
                                   "All markup of the conditional section is"
6858
129
                                   " not in the same entity\n");
6859
129
                }
6860
3.06k
                NEXT;
6861
6862
6.91M
                while (RAW != 0) {
6863
6.91M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
6.63k
                        SKIP(3);
6865
6.63k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
6.63k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
6.91M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
6.91M
                               (NXT(2) == '>')) {
6873
5.95k
                        if (ignoreDepth == 0)
6874
1.71k
                            break;
6875
4.24k
                        SKIP(3);
6876
4.24k
                        ignoreDepth--;
6877
6.90M
                    } else {
6878
6.90M
                        NEXT;
6879
6.90M
                    }
6880
6.91M
                }
6881
6882
3.06k
    if (RAW == 0) {
6883
1.35k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
1.35k
                    goto error;
6885
1.35k
    }
6886
1.71k
                if (ctxt->input->id != id) {
6887
36
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
36
                                   "All markup of the conditional section is"
6889
36
                                   " not in the same entity\n");
6890
36
                }
6891
1.71k
                SKIP(3);
6892
1.71k
            } else {
6893
828
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
828
                xmlHaltParser(ctxt);
6895
828
                goto error;
6896
828
            }
6897
29.2k
        } else if ((depth > 0) &&
6898
29.2k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
10.0k
            depth--;
6900
10.0k
            if (ctxt->input->id != inputIds[depth]) {
6901
381
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
381
                               "All markup of the conditional section is not"
6903
381
                               " in the same entity\n");
6904
381
            }
6905
10.0k
            SKIP(3);
6906
19.1k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
17.2k
            xmlParseMarkupDecl(ctxt);
6908
17.2k
        } else {
6909
1.94k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
1.94k
            xmlHaltParser(ctxt);
6911
1.94k
            goto error;
6912
1.94k
        }
6913
6914
49.4k
        if (depth == 0)
6915
2.92k
            break;
6916
6917
46.5k
        SKIP_BLANKS;
6918
46.5k
        GROW;
6919
46.5k
    }
6920
6921
7.42k
error:
6922
7.42k
    xmlFree(inputIds);
6923
7.42k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
32.0M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
32.0M
    GROW;
6952
32.0M
    if (CUR == '<') {
6953
32.0M
        if (NXT(1) == '!') {
6954
31.9M
      switch (NXT(2)) {
6955
2.57M
          case 'E':
6956
2.57M
        if (NXT(3) == 'L')
6957
1.48M
      xmlParseElementDecl(ctxt);
6958
1.09M
        else if (NXT(3) == 'N')
6959
1.09M
      xmlParseEntityDecl(ctxt);
6960
949
                    else
6961
949
                        SKIP(2);
6962
2.57M
        break;
6963
1.29M
          case 'A':
6964
1.29M
        xmlParseAttributeListDecl(ctxt);
6965
1.29M
        break;
6966
28.8k
          case 'N':
6967
28.8k
        xmlParseNotationDecl(ctxt);
6968
28.8k
        break;
6969
28.0M
          case '-':
6970
28.0M
        xmlParseComment(ctxt);
6971
28.0M
        break;
6972
17.1k
    default:
6973
        /* there is an error but it will be detected later */
6974
17.1k
                    SKIP(2);
6975
17.1k
        break;
6976
31.9M
      }
6977
31.9M
  } else if (NXT(1) == '?') {
6978
56.1k
      xmlParsePI(ctxt);
6979
56.1k
  }
6980
32.0M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
32.0M
    if (ctxt->instate == XML_PARSER_EOF)
6987
15.4k
        return;
6988
6989
31.9M
    ctxt->instate = XML_PARSER_DTD;
6990
31.9M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
50.1k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
50.1k
    xmlChar *version;
7006
50.1k
    const xmlChar *encoding;
7007
50.1k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
50.1k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
49.9k
  SKIP(5);
7014
49.9k
    } else {
7015
196
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
196
  return;
7017
196
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
49.9k
    oldstate = ctxt->instate;
7021
49.9k
    ctxt->instate = XML_PARSER_START;
7022
7023
49.9k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
49.9k
    version = xmlParseVersionInfo(ctxt);
7032
49.9k
    if (version == NULL)
7033
18.3k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
31.6k
    else {
7035
31.6k
  if (SKIP_BLANKS == 0) {
7036
3.12k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
3.12k
               "Space needed here\n");
7038
3.12k
  }
7039
31.6k
    }
7040
49.9k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
49.9k
    encoding = xmlParseEncodingDecl(ctxt);
7046
49.9k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
49.9k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
1.61k
        ctxt->instate = oldstate;
7053
1.61k
        return;
7054
1.61k
    }
7055
48.3k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
4.35k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
4.35k
           "Missing encoding in text declaration\n");
7058
4.35k
    }
7059
7060
48.3k
    SKIP_BLANKS;
7061
48.3k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
10.5k
        SKIP(2);
7063
37.7k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
860
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
860
  NEXT;
7067
36.9k
    } else {
7068
36.9k
        int c;
7069
7070
36.9k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
3.92M
        while ((c = CUR) != 0) {
7072
3.90M
            NEXT;
7073
3.90M
            if (c == '>')
7074
16.9k
                break;
7075
3.90M
        }
7076
36.9k
    }
7077
7078
48.3k
    ctxt->instate = oldstate;
7079
48.3k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
33.4k
                       const xmlChar *SystemID) {
7096
33.4k
    xmlDetectSAX2(ctxt);
7097
33.4k
    GROW;
7098
7099
33.4k
    if ((ctxt->encoding == NULL) &&
7100
33.4k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
33.3k
        xmlChar start[4];
7102
33.3k
  xmlCharEncoding enc;
7103
7104
33.3k
  start[0] = RAW;
7105
33.3k
  start[1] = NXT(1);
7106
33.3k
  start[2] = NXT(2);
7107
33.3k
  start[3] = NXT(3);
7108
33.3k
  enc = xmlDetectCharEncoding(start, 4);
7109
33.3k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
6.00k
      xmlSwitchEncoding(ctxt, enc);
7111
33.3k
    }
7112
7113
33.4k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
5.65k
  xmlParseTextDecl(ctxt);
7115
5.65k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
72
      xmlHaltParser(ctxt);
7120
72
      return;
7121
72
  }
7122
5.65k
    }
7123
33.3k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
33.3k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
33.3k
    ctxt->instate = XML_PARSER_DTD;
7135
33.3k
    ctxt->external = 1;
7136
33.3k
    SKIP_BLANKS;
7137
1.16M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
1.13M
  GROW;
7139
1.13M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
7.42k
            xmlParseConditionalSections(ctxt);
7141
1.12M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
1.12M
            xmlParseMarkupDecl(ctxt);
7143
1.12M
        } else {
7144
7.04k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
7.04k
            xmlHaltParser(ctxt);
7146
7.04k
            return;
7147
7.04k
        }
7148
1.12M
        SKIP_BLANKS;
7149
1.12M
    }
7150
7151
26.3k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
26.3k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
4.69M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
4.69M
    xmlEntityPtr ent;
7175
4.69M
    xmlChar *val;
7176
4.69M
    int was_checked;
7177
4.69M
    xmlNodePtr list = NULL;
7178
4.69M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
4.69M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
4.69M
    if (NXT(1) == '#') {
7188
248k
  int i = 0;
7189
248k
  xmlChar out[16];
7190
248k
  int hex = NXT(2);
7191
248k
  int value = xmlParseCharRef(ctxt);
7192
7193
248k
  if (value == 0)
7194
33.5k
      return;
7195
214k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
134k
      if (value <= 0xFF) {
7202
129k
    out[0] = value;
7203
129k
    out[1] = 0;
7204
129k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
129k
        (!ctxt->disableSAX))
7206
90.8k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
129k
      } else {
7208
5.11k
    if ((hex == 'x') || (hex == 'X'))
7209
665
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
4.45k
    else
7211
4.45k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
5.11k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
5.11k
        (!ctxt->disableSAX))
7214
2.64k
        ctxt->sax->reference(ctxt->userData, out);
7215
5.11k
      }
7216
134k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
80.0k
      COPY_BUF(0 ,out, i, value);
7221
80.0k
      out[i] = 0;
7222
80.0k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
80.0k
    (!ctxt->disableSAX))
7224
65.0k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
80.0k
  }
7226
214k
  return;
7227
248k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
4.45M
    ent = xmlParseEntityRef(ctxt);
7233
4.45M
    if (ent == NULL) return;
7234
3.42M
    if (!ctxt->wellFormed)
7235
1.09M
  return;
7236
2.32M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
2.32M
    if ((ent->name == NULL) ||
7240
2.32M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
140k
  val = ent->content;
7242
140k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
140k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
140k
      (!ctxt->disableSAX))
7248
140k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
140k
  return;
7250
140k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
2.18M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
2.18M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
133k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
125k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
125k
  void *user_data;
7273
125k
  if (ctxt->userData == ctxt)
7274
125k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
125k
        ctxt->sizeentcopy = 0;
7280
7281
125k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
299
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
299
            xmlHaltParser(ctxt);
7284
299
            return;
7285
299
        }
7286
7287
125k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
125k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
60.9k
      ctxt->depth++;
7297
60.9k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
60.9k
                                                user_data, &list);
7299
60.9k
      ctxt->depth--;
7300
7301
64.5k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
64.5k
      ctxt->depth++;
7303
64.5k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
64.5k
                                     user_data, ctxt->depth, ent->URI,
7305
64.5k
             ent->ExternalID, &list);
7306
64.5k
      ctxt->depth--;
7307
64.5k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
125k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
125k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
125k
        ent->expandedSize = ctxt->sizeentcopy;
7316
125k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
5.99k
            xmlHaltParser(ctxt);
7318
5.99k
      xmlFreeNodeList(list);
7319
5.99k
      return;
7320
5.99k
  }
7321
119k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
119k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
53.3k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
53.3k
            if ((ctxt->replaceEntities == 0) ||
7333
53.3k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
53.3k
                ((list->type == XML_TEXT_NODE) &&
7335
44.3k
                 (list->next == NULL))) {
7336
44.3k
                ent->owner = 1;
7337
443k
                while (list != NULL) {
7338
398k
                    list->parent = (xmlNodePtr) ent;
7339
398k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
398k
                    if (list->next == NULL)
7342
44.3k
                        ent->last = list;
7343
398k
                    list = list->next;
7344
398k
                }
7345
44.3k
                list = NULL;
7346
44.3k
            } else {
7347
9.07k
                ent->owner = 0;
7348
596k
                while (list != NULL) {
7349
587k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
587k
                    list->doc = ctxt->myDoc;
7351
587k
                    if (list->next == NULL)
7352
9.07k
                        ent->last = list;
7353
587k
                    list = list->next;
7354
587k
                }
7355
9.07k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
9.07k
            }
7361
66.0k
  } else if ((ret != XML_ERR_OK) &&
7362
66.0k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
33.0k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
33.0k
         "Entity '%s' failed to parse\n", ent->name);
7365
33.0k
            if (ent->content != NULL)
7366
5.95k
                ent->content[0] = 0;
7367
33.0k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
119k
        was_checked = 0;
7374
119k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
2.18M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
727k
  if (was_checked != 0) {
7389
653k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
653k
      if (ctxt->userData == ctxt)
7396
653k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
653k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
32.1k
    ctxt->depth++;
7402
32.1k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
32.1k
           ent->content, user_data, NULL);
7404
32.1k
    ctxt->depth--;
7405
621k
      } else if (ent->etype ==
7406
621k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
621k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
621k
    ctxt->depth++;
7410
621k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
621k
         ctxt->sax, user_data, ctxt->depth,
7412
621k
         ent->URI, ent->ExternalID, NULL);
7413
621k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
621k
                ctxt->sizeentities = oldsizeentities;
7417
621k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
653k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
653k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
653k
  }
7429
727k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
727k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
170k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
170k
  }
7437
727k
  return;
7438
727k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
1.45M
    if ((was_checked != 0) &&
7445
1.45M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
282
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
1.45M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
1.45M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
299k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
299k
  return;
7458
299k
    }
7459
7460
1.15M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
1.15M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
1.15M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
1.15M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
363k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
363k
    cur = ent->children;
7492
784k
    while (cur != NULL) {
7493
784k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
784k
        if (nw != NULL) {
7495
784k
      if (nw->_private == NULL)
7496
784k
          nw->_private = cur->_private;
7497
784k
      if (firstChild == NULL){
7498
363k
          firstChild = nw;
7499
363k
      }
7500
784k
      nw = xmlAddChild(ctxt->node, nw);
7501
784k
        }
7502
784k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
363k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
363k
          (nw != NULL) &&
7509
363k
          (nw->type == XML_ELEMENT_NODE) &&
7510
363k
          (nw->children == NULL))
7511
7.58k
          nw->extra = 1;
7512
7513
363k
      break;
7514
363k
        }
7515
421k
        cur = cur->next;
7516
421k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
792k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
792k
    xmlNodePtr nw = NULL, cur, next, last,
7523
792k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
792k
    cur = ent->children;
7532
792k
    ent->children = NULL;
7533
792k
    last = ent->last;
7534
792k
    ent->last = NULL;
7535
2.95M
    while (cur != NULL) {
7536
2.95M
        next = cur->next;
7537
2.95M
        cur->next = NULL;
7538
2.95M
        cur->parent = NULL;
7539
2.95M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
2.95M
        if (nw != NULL) {
7541
2.95M
      if (nw->_private == NULL)
7542
2.95M
          nw->_private = cur->_private;
7543
2.95M
      if (firstChild == NULL){
7544
792k
          firstChild = cur;
7545
792k
      }
7546
2.95M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
2.95M
        }
7548
2.95M
        xmlAddChild(ctxt->node, cur);
7549
2.95M
        if (cur == last)
7550
792k
      break;
7551
2.16M
        cur = next;
7552
2.16M
    }
7553
792k
    if (ent->owner == 0)
7554
9.07k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
792k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
1.15M
      ctxt->nodemem = 0;
7582
1.15M
      ctxt->nodelen = 0;
7583
1.15M
      return;
7584
1.15M
  }
7585
1.15M
    }
7586
1.15M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
6.82M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
6.82M
    const xmlChar *name;
7621
6.82M
    xmlEntityPtr ent = NULL;
7622
7623
6.82M
    GROW;
7624
6.82M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
6.82M
    if (RAW != '&')
7628
0
        return(NULL);
7629
6.82M
    NEXT;
7630
6.82M
    name = xmlParseName(ctxt);
7631
6.82M
    if (name == NULL) {
7632
98.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
98.4k
           "xmlParseEntityRef: no name\n");
7634
98.4k
        return(NULL);
7635
98.4k
    }
7636
6.72M
    if (RAW != ';') {
7637
76.7k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
76.7k
  return(NULL);
7639
76.7k
    }
7640
6.65M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
6.65M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
4.73M
        ent = xmlGetPredefinedEntity(name);
7647
4.73M
        if (ent != NULL)
7648
244k
            return(ent);
7649
4.73M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
6.40M
    if (ctxt->sax != NULL) {
7656
6.40M
  if (ctxt->sax->getEntity != NULL)
7657
6.40M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
6.40M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
6.40M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
39.3k
      ent = xmlGetPredefinedEntity(name);
7661
6.40M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
6.40M
      (ctxt->userData==ctxt)) {
7663
107k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
107k
  }
7665
6.40M
    }
7666
6.40M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
6.40M
    if (ent == NULL) {
7690
1.10M
  if ((ctxt->standalone == 1) ||
7691
1.10M
      ((ctxt->hasExternalSubset == 0) &&
7692
1.04M
       (ctxt->hasPErefs == 0))) {
7693
719k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
719k
         "Entity '%s' not defined\n", name);
7695
719k
  } else {
7696
380k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
380k
         "Entity '%s' not defined\n", name);
7698
380k
      if ((ctxt->inSubset == 0) &&
7699
380k
    (ctxt->sax != NULL) &&
7700
380k
    (ctxt->sax->reference != NULL)) {
7701
367k
    ctxt->sax->reference(ctxt->userData, name);
7702
367k
      }
7703
380k
  }
7704
1.10M
  ctxt->valid = 0;
7705
1.10M
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
5.30M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
1.97k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
1.97k
     "Entity reference to unparsed entity %s\n", name);
7715
1.97k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
5.30M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
5.30M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
16.3k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
16.3k
       "Attribute references external entity '%s'\n", name);
7726
16.3k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
5.28M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
5.28M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
2.05M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
28.3k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
618
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
28.3k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
28.3k
        }
7740
2.05M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
22.4k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
22.4k
                    "'<' in entity '%s' is not allowed in attributes "
7743
22.4k
                    "values\n", name);
7744
2.05M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
3.23M
    else {
7750
3.23M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
3.23M
      default:
7758
3.23M
      break;
7759
3.23M
  }
7760
3.23M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
6.40M
    return(ent);
7769
6.40M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
29.4M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
29.4M
    xmlChar *name;
7805
29.4M
    const xmlChar *ptr;
7806
29.4M
    xmlChar cur;
7807
29.4M
    xmlEntityPtr ent = NULL;
7808
7809
29.4M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
29.4M
    ptr = *str;
7812
29.4M
    cur = *ptr;
7813
29.4M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
29.4M
    ptr++;
7817
29.4M
    name = xmlParseStringName(ctxt, &ptr);
7818
29.4M
    if (name == NULL) {
7819
10.2k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
10.2k
           "xmlParseStringEntityRef: no name\n");
7821
10.2k
  *str = ptr;
7822
10.2k
  return(NULL);
7823
10.2k
    }
7824
29.4M
    if (*ptr != ';') {
7825
10.0k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
10.0k
        xmlFree(name);
7827
10.0k
  *str = ptr;
7828
10.0k
  return(NULL);
7829
10.0k
    }
7830
29.4M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
29.4M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
26.5M
        ent = xmlGetPredefinedEntity(name);
7838
26.5M
        if (ent != NULL) {
7839
104k
            xmlFree(name);
7840
104k
            *str = ptr;
7841
104k
            return(ent);
7842
104k
        }
7843
26.5M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
29.3M
    if (ctxt->sax != NULL) {
7850
29.3M
  if (ctxt->sax->getEntity != NULL)
7851
29.3M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
29.3M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
1.06M
      ent = xmlGetPredefinedEntity(name);
7854
29.3M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
1.64M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
1.64M
  }
7857
29.3M
    }
7858
29.3M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
29.3M
    if (ent == NULL) {
7885
1.64M
  if ((ctxt->standalone == 1) ||
7886
1.64M
      ((ctxt->hasExternalSubset == 0) &&
7887
1.64M
       (ctxt->hasPErefs == 0))) {
7888
1.58M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
1.58M
         "Entity '%s' not defined\n", name);
7890
1.58M
  } else {
7891
54.4k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
54.4k
        "Entity '%s' not defined\n",
7893
54.4k
        name);
7894
54.4k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
1.64M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
27.6M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
3.62k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
3.62k
     "Entity reference to unparsed entity %s\n", name);
7906
3.62k
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
27.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
27.6M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
8.12k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
8.12k
   "Attribute references external entity '%s'\n", name);
7917
8.12k
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
27.6M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
27.6M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
27.3M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
11.6k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
1.03k
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
11.6k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
11.6k
        }
7931
27.3M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
94.2k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
94.2k
                    "'<' in entity '%s' is not allowed in attributes "
7934
94.2k
                    "values\n", name);
7935
27.3M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
289k
    else {
7941
289k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
289k
      default:
7949
289k
      break;
7950
289k
  }
7951
289k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
29.3M
    xmlFree(name);
7961
29.3M
    *str = ptr;
7962
29.3M
    return(ent);
7963
29.3M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
36.9M
{
8000
36.9M
    const xmlChar *name;
8001
36.9M
    xmlEntityPtr entity = NULL;
8002
36.9M
    xmlParserInputPtr input;
8003
8004
36.9M
    if (RAW != '%')
8005
0
        return;
8006
36.9M
    NEXT;
8007
36.9M
    name = xmlParseName(ctxt);
8008
36.9M
    if (name == NULL) {
8009
14.5k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
14.5k
  return;
8011
14.5k
    }
8012
36.9M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
36.9M
    if (RAW != ';') {
8016
765k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
765k
        return;
8018
765k
    }
8019
8020
36.1M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
36.1M
    if ((ctxt->sax != NULL) &&
8026
36.1M
  (ctxt->sax->getParameterEntity != NULL))
8027
36.1M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
36.1M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
36.1M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
6.94M
  if ((ctxt->standalone == 1) ||
8040
6.94M
      ((ctxt->hasExternalSubset == 0) &&
8041
6.94M
       (ctxt->hasPErefs == 0))) {
8042
3.12k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
3.12k
            "PEReference: %%%s; not found\n",
8044
3.12k
            name);
8045
6.94M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
6.94M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
1.47M
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
1.47M
                                 "PEReference: %%%s; not found\n",
8056
1.47M
                                 name, NULL);
8057
1.47M
            } else
8058
5.46M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
5.46M
                              "PEReference: %%%s; not found\n",
8060
5.46M
                              name, NULL);
8061
6.94M
            ctxt->valid = 0;
8062
6.94M
  }
8063
29.2M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
29.2M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
29.2M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
29.2M
  } else {
8073
29.2M
            xmlChar start[4];
8074
29.2M
            xmlCharEncoding enc;
8075
29.2M
            unsigned long parentConsumed;
8076
29.2M
            xmlEntityPtr oldEnt;
8077
8078
29.2M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
29.2M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
29.2M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
29.2M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
29.2M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
29.2M
    (ctxt->replaceEntities == 0) &&
8084
29.2M
    (ctxt->validate == 0))
8085
1.85k
    return;
8086
8087
29.2M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
614
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
614
                xmlHaltParser(ctxt);
8090
614
                return;
8091
614
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
29.2M
            parentConsumed = ctxt->input->parentConsumed;
8095
29.2M
            oldEnt = ctxt->input->entity;
8096
29.2M
            if ((oldEnt == NULL) ||
8097
29.2M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
28.7M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
1.13M
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
1.13M
                xmlSaturatedAddSizeT(&parentConsumed,
8101
1.13M
                                     ctxt->input->cur - ctxt->input->base);
8102
1.13M
            }
8103
8104
29.2M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
29.2M
      if (xmlPushInput(ctxt, input) < 0) {
8106
11.4k
                xmlFreeInputStream(input);
8107
11.4k
    return;
8108
11.4k
            }
8109
8110
29.2M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
29.2M
            input->parentConsumed = parentConsumed;
8113
8114
29.2M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
104k
                GROW
8125
104k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
104k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
103k
                    start[0] = RAW;
8129
103k
                    start[1] = NXT(1);
8130
103k
                    start[2] = NXT(2);
8131
103k
                    start[3] = NXT(3);
8132
103k
                    enc = xmlDetectCharEncoding(start, 4);
8133
103k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
50.2k
                        xmlSwitchEncoding(ctxt, enc);
8135
50.2k
                    }
8136
103k
                }
8137
8138
104k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
104k
                    (IS_BLANK_CH(NXT(5)))) {
8140
42.6k
                    xmlParseTextDecl(ctxt);
8141
42.6k
                }
8142
104k
            }
8143
29.2M
  }
8144
29.2M
    }
8145
36.1M
    ctxt->hasPErefs = 1;
8146
36.1M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
4.38k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
4.38k
    xmlParserInputPtr input;
8162
4.38k
    xmlBufferPtr buf;
8163
4.38k
    int l, c;
8164
4.38k
    int count = 0;
8165
8166
4.38k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
4.38k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
4.38k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
4.38k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
4.38k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
4.38k
    buf = xmlBufferCreate();
8180
4.38k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
4.38k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
4.38k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
4.38k
    if (input == NULL) {
8189
1.17k
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
1.17k
              "xmlLoadEntityContent input error");
8191
1.17k
  xmlBufferFree(buf);
8192
1.17k
        return(-1);
8193
1.17k
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
3.21k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
3.21k
    GROW;
8206
3.21k
    c = CUR_CHAR(l);
8207
14.0M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
14.0M
           (IS_CHAR(c))) {
8209
14.0M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
14.0M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
135k
      count = 0;
8212
135k
      GROW;
8213
135k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
135k
  }
8218
14.0M
  NEXTL(l);
8219
14.0M
  c = CUR_CHAR(l);
8220
14.0M
  if (c == 0) {
8221
2.82k
      count = 0;
8222
2.82k
      GROW;
8223
2.82k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
2.82k
      c = CUR_CHAR(l);
8228
2.82k
  }
8229
14.0M
    }
8230
8231
3.21k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
1.85k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
1.85k
        xmlPopInput(ctxt);
8234
1.85k
    } else if (!IS_CHAR(c)) {
8235
1.35k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
1.35k
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
1.35k
                    c);
8238
1.35k
  xmlBufferFree(buf);
8239
1.35k
  return(-1);
8240
1.35k
    }
8241
1.85k
    entity->content = buf->content;
8242
1.85k
    entity->length = buf->use;
8243
1.85k
    buf->content = NULL;
8244
1.85k
    xmlBufferFree(buf);
8245
8246
1.85k
    return(0);
8247
3.21k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
706k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
706k
    const xmlChar *ptr;
8283
706k
    xmlChar cur;
8284
706k
    xmlChar *name;
8285
706k
    xmlEntityPtr entity = NULL;
8286
8287
706k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
706k
    ptr = *str;
8289
706k
    cur = *ptr;
8290
706k
    if (cur != '%')
8291
0
        return(NULL);
8292
706k
    ptr++;
8293
706k
    name = xmlParseStringName(ctxt, &ptr);
8294
706k
    if (name == NULL) {
8295
6.96k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
6.96k
           "xmlParseStringPEReference: no name\n");
8297
6.96k
  *str = ptr;
8298
6.96k
  return(NULL);
8299
6.96k
    }
8300
699k
    cur = *ptr;
8301
699k
    if (cur != ';') {
8302
3.10k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
3.10k
  xmlFree(name);
8304
3.10k
  *str = ptr;
8305
3.10k
  return(NULL);
8306
3.10k
    }
8307
696k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
696k
    if ((ctxt->sax != NULL) &&
8313
696k
  (ctxt->sax->getParameterEntity != NULL))
8314
696k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
696k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
696k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
251k
  if ((ctxt->standalone == 1) ||
8330
251k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
471
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
471
     "PEReference: %%%s; not found\n", name);
8333
250k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
250k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
250k
        "PEReference: %%%s; not found\n",
8343
250k
        name, NULL);
8344
250k
      ctxt->valid = 0;
8345
250k
  }
8346
445k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
445k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
445k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
445k
    }
8357
696k
    ctxt->hasPErefs = 1;
8358
696k
    xmlFree(name);
8359
696k
    *str = ptr;
8360
696k
    return(entity);
8361
696k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
291k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
291k
    const xmlChar *name = NULL;
8382
291k
    xmlChar *ExternalID = NULL;
8383
291k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
291k
    SKIP(9);
8389
8390
291k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
291k
    name = xmlParseName(ctxt);
8396
291k
    if (name == NULL) {
8397
485
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
485
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
485
    }
8400
291k
    ctxt->intSubName = name;
8401
8402
291k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
291k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
291k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
116k
        ctxt->hasExternalSubset = 1;
8411
116k
    }
8412
291k
    ctxt->extSubURI = URI;
8413
291k
    ctxt->extSubSystem = ExternalID;
8414
8415
291k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
291k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
291k
  (!ctxt->disableSAX))
8422
287k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
291k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
291k
    if (RAW == '[')
8431
233k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
58.3k
    if (RAW != '>') {
8437
5.99k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
5.99k
    }
8439
58.3k
    NEXT;
8440
58.3k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
233k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
233k
    if (RAW == '[') {
8457
233k
        int baseInputNr = ctxt->inputNr;
8458
233k
        ctxt->instate = XML_PARSER_DTD;
8459
233k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
233k
  SKIP_BLANKS;
8466
31.1M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
31.1M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
30.9M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
30.9M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
30.9M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
30.8M
          xmlParseMarkupDecl(ctxt);
8478
30.8M
            } else if (RAW == '%') {
8479
62.6k
          xmlParsePEReference(ctxt);
8480
62.6k
            } else {
8481
45.4k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
45.4k
                        "xmlParseInternalSubset: error detected in"
8483
45.4k
                        " Markup declaration\n");
8484
45.4k
                xmlHaltParser(ctxt);
8485
45.4k
                return;
8486
45.4k
            }
8487
30.9M
      SKIP_BLANKS;
8488
30.9M
  }
8489
187k
  if (RAW == ']') {
8490
175k
      NEXT;
8491
175k
      SKIP_BLANKS;
8492
175k
  }
8493
187k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
187k
    if (RAW != '>') {
8499
12.8k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
12.8k
  return;
8501
12.8k
    }
8502
174k
    NEXT;
8503
174k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
3.63M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
3.63M
    const xmlChar *name;
8544
3.63M
    xmlChar *val;
8545
8546
3.63M
    *value = NULL;
8547
3.63M
    GROW;
8548
3.63M
    name = xmlParseName(ctxt);
8549
3.63M
    if (name == NULL) {
8550
195k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
195k
                 "error parsing attribute name\n");
8552
195k
        return(NULL);
8553
195k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
3.43M
    SKIP_BLANKS;
8559
3.43M
    if (RAW == '=') {
8560
3.31M
        NEXT;
8561
3.31M
  SKIP_BLANKS;
8562
3.31M
  val = xmlParseAttValue(ctxt);
8563
3.31M
  ctxt->instate = XML_PARSER_CONTENT;
8564
3.31M
    } else {
8565
120k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
120k
         "Specification mandates value for attribute %s\n", name);
8567
120k
  return(name);
8568
120k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
3.31M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
12.6k
  if (!xmlCheckLanguageID(val)) {
8577
7.11k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
7.11k
              "Malformed value for xml:lang : %s\n",
8579
7.11k
        val, NULL);
8580
7.11k
  }
8581
12.6k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
3.31M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
1.08k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
136
      *(ctxt->space) = 0;
8589
952
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
516
      *(ctxt->space) = 1;
8591
436
  else {
8592
436
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
436
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
436
                                 val, NULL);
8595
436
  }
8596
1.08k
    }
8597
8598
3.31M
    *value = val;
8599
3.31M
    return(name);
8600
3.43M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
3.79M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
3.79M
    const xmlChar *name;
8634
3.79M
    const xmlChar *attname;
8635
3.79M
    xmlChar *attvalue;
8636
3.79M
    const xmlChar **atts = ctxt->atts;
8637
3.79M
    int nbatts = 0;
8638
3.79M
    int maxatts = ctxt->maxatts;
8639
3.79M
    int i;
8640
8641
3.79M
    if (RAW != '<') return(NULL);
8642
3.79M
    NEXT1;
8643
8644
3.79M
    name = xmlParseName(ctxt);
8645
3.79M
    if (name == NULL) {
8646
140k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
140k
       "xmlParseStartTag: invalid element name\n");
8648
140k
        return(NULL);
8649
140k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
3.65M
    SKIP_BLANKS;
8657
3.65M
    GROW;
8658
8659
5.09M
    while (((RAW != '>') &&
8660
5.09M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
5.09M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
3.63M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
3.63M
        if (attname == NULL) {
8664
195k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
195k
         "xmlParseStartTag: problem parsing attributes\n");
8666
195k
      break;
8667
195k
  }
8668
3.43M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
4.57M
      for (i = 0; i < nbatts;i += 2) {
8675
1.28M
          if (xmlStrEqual(atts[i], attname)) {
8676
7.74k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
7.74k
        xmlFree(attvalue);
8678
7.74k
        goto failed;
8679
7.74k
    }
8680
1.28M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
3.29M
      if (atts == NULL) {
8685
80.4k
          maxatts = 22; /* allow for 10 attrs by default */
8686
80.4k
          atts = (const xmlChar **)
8687
80.4k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
80.4k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
80.4k
    ctxt->atts = atts;
8695
80.4k
    ctxt->maxatts = maxatts;
8696
3.21M
      } else if (nbatts + 4 > maxatts) {
8697
135
          const xmlChar **n;
8698
8699
135
          maxatts *= 2;
8700
135
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
135
               maxatts * sizeof(const xmlChar *));
8702
135
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
135
    atts = n;
8709
135
    ctxt->atts = atts;
8710
135
    ctxt->maxatts = maxatts;
8711
135
      }
8712
3.29M
      atts[nbatts++] = attname;
8713
3.29M
      atts[nbatts++] = attvalue;
8714
3.29M
      atts[nbatts] = NULL;
8715
3.29M
      atts[nbatts + 1] = NULL;
8716
3.29M
  } else {
8717
132k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
132k
  }
8720
8721
3.43M
failed:
8722
8723
3.43M
  GROW
8724
3.43M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
2.00M
      break;
8726
1.43M
  if (SKIP_BLANKS == 0) {
8727
211k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
211k
         "attributes construct error\n");
8729
211k
  }
8730
1.43M
  SHRINK;
8731
1.43M
        GROW;
8732
1.43M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
3.65M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
3.65M
  (!ctxt->disableSAX)) {
8739
3.27M
  if (nbatts > 0)
8740
1.86M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
1.41M
  else
8742
1.41M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
3.27M
    }
8744
8745
3.65M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
6.63M
        for (i = 1;i < nbatts;i+=2)
8748
3.29M
      if (atts[i] != NULL)
8749
3.29M
         xmlFree((xmlChar *) atts[i]);
8750
3.34M
    }
8751
3.65M
    return(name);
8752
3.65M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
1.90M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
1.90M
    const xmlChar *name;
8772
8773
1.90M
    GROW;
8774
1.90M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
1.90M
    SKIP(2);
8780
8781
1.90M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
1.90M
    GROW;
8787
1.90M
    SKIP_BLANKS;
8788
1.90M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
32.3k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
32.3k
    } else
8791
1.87M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
1.90M
    if (name != (xmlChar*)1) {
8800
93.1k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
93.1k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
93.1k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
93.1k
                    ctxt->name, line, name);
8804
93.1k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
1.90M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
1.90M
  (!ctxt->disableSAX))
8811
1.71M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
1.90M
    namePop(ctxt);
8814
1.90M
    spacePop(ctxt);
8815
1.90M
    return;
8816
1.90M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
7.06M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
7.06M
    int i;
8858
8859
7.06M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
7.73M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
1.34M
        if (ctxt->nsTab[i] == prefix) {
8862
581k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
14.7k
          return(NULL);
8864
567k
      return(ctxt->nsTab[i + 1]);
8865
581k
  }
8866
6.38M
    return(NULL);
8867
6.97M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
14.3M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
14.3M
    const xmlChar *l, *p;
8886
8887
14.3M
    GROW;
8888
8889
14.3M
    l = xmlParseNCName(ctxt);
8890
14.3M
    if (l == NULL) {
8891
339k
        if (CUR == ':') {
8892
6.61k
      l = xmlParseName(ctxt);
8893
6.61k
      if (l != NULL) {
8894
6.61k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
6.61k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
6.61k
    *prefix = NULL;
8897
6.61k
    return(l);
8898
6.61k
      }
8899
6.61k
  }
8900
333k
        return(NULL);
8901
339k
    }
8902
14.0M
    if (CUR == ':') {
8903
758k
        NEXT;
8904
758k
  p = l;
8905
758k
  l = xmlParseNCName(ctxt);
8906
758k
  if (l == NULL) {
8907
18.5k
      xmlChar *tmp;
8908
8909
18.5k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
18.5k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
18.5k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
18.5k
      l = xmlParseNmtoken(ctxt);
8914
18.5k
      if (l == NULL) {
8915
11.9k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
11.9k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
11.9k
            } else {
8919
6.55k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
6.55k
    xmlFree((char *)l);
8921
6.55k
      }
8922
18.5k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
18.5k
      if (tmp != NULL) xmlFree(tmp);
8924
18.5k
      *prefix = NULL;
8925
18.5k
      return(p);
8926
18.5k
  }
8927
739k
  if (CUR == ':') {
8928
10.6k
      xmlChar *tmp;
8929
8930
10.6k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
10.6k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
10.6k
      NEXT;
8933
10.6k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
10.6k
      if (tmp != NULL) {
8935
8.35k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
8.35k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
8.35k
    if (tmp != NULL) xmlFree(tmp);
8938
8.35k
    *prefix = p;
8939
8.35k
    return(l);
8940
8.35k
      }
8941
2.27k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
2.27k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
2.27k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
2.27k
      if (tmp != NULL) xmlFree(tmp);
8946
2.27k
      *prefix = p;
8947
2.27k
      return(l);
8948
2.27k
  }
8949
729k
  *prefix = p;
8950
729k
    } else
8951
13.2M
        *prefix = NULL;
8952
13.9M
    return(l);
8953
14.0M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
184k
                        xmlChar const *prefix) {
8971
184k
    const xmlChar *cmp;
8972
184k
    const xmlChar *in;
8973
184k
    const xmlChar *ret;
8974
184k
    const xmlChar *prefix2;
8975
8976
184k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
184k
    GROW;
8979
184k
    in = ctxt->input->cur;
8980
8981
184k
    cmp = prefix;
8982
623k
    while (*in != 0 && *in == *cmp) {
8983
438k
  ++in;
8984
438k
  ++cmp;
8985
438k
    }
8986
184k
    if ((*cmp == 0) && (*in == ':')) {
8987
168k
        in++;
8988
168k
  cmp = name;
8989
1.34M
  while (*in != 0 && *in == *cmp) {
8990
1.17M
      ++in;
8991
1.17M
      ++cmp;
8992
1.17M
  }
8993
168k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
152k
            ctxt->input->col += in - ctxt->input->cur;
8996
152k
      ctxt->input->cur = in;
8997
152k
      return((const xmlChar*) 1);
8998
152k
  }
8999
168k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
32.6k
    ret = xmlParseQName (ctxt, &prefix2);
9004
32.6k
    if ((ret == name) && (prefix == prefix2))
9005
487
  return((const xmlChar*) 1);
9006
32.2k
    return ret;
9007
32.6k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
5.58k
    const xmlChar *oldbase = ctxt->input->base;\
9045
5.58k
    GROW;\
9046
5.58k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
5.58k
        return(NULL);\
9048
5.58k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
5.58k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
10.8M
{
9059
10.8M
    xmlChar limit = 0;
9060
10.8M
    const xmlChar *in = NULL, *start, *end, *last;
9061
10.8M
    xmlChar *ret = NULL;
9062
10.8M
    int line, col;
9063
10.8M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
3.50M
                    XML_MAX_HUGE_LENGTH :
9065
10.8M
                    XML_MAX_TEXT_LENGTH;
9066
9067
10.8M
    GROW;
9068
10.8M
    in = (xmlChar *) CUR_PTR;
9069
10.8M
    line = ctxt->input->line;
9070
10.8M
    col = ctxt->input->col;
9071
10.8M
    if (*in != '"' && *in != '\'') {
9072
32.3k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
32.3k
        return (NULL);
9074
32.3k
    }
9075
10.8M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
10.8M
    limit = *in++;
9083
10.8M
    col++;
9084
10.8M
    end = ctxt->input->end;
9085
10.8M
    start = in;
9086
10.8M
    if (in >= end) {
9087
919
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
919
    }
9089
10.8M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
468k
  while ((in < end) && (*in != limit) &&
9094
468k
         ((*in == 0x20) || (*in == 0x9) ||
9095
466k
          (*in == 0xA) || (*in == 0xD))) {
9096
138k
      if (*in == 0xA) {
9097
21.6k
          line++; col = 1;
9098
116k
      } else {
9099
116k
          col++;
9100
116k
      }
9101
138k
      in++;
9102
138k
      start = in;
9103
138k
      if (in >= end) {
9104
140
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
140
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
140
      }
9111
138k
  }
9112
3.38M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
3.38M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
3.05M
      col++;
9115
3.05M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
3.05M
      if (in >= end) {
9117
249
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
249
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
249
      }
9124
3.05M
  }
9125
330k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
334k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
560k
  while ((in < end) && (*in != limit) &&
9131
560k
         ((*in == 0x20) || (*in == 0x9) ||
9132
256k
          (*in == 0xA) || (*in == 0xD))) {
9133
230k
      if (*in == 0xA) {
9134
13.1k
          line++, col = 1;
9135
217k
      } else {
9136
217k
          col++;
9137
217k
      }
9138
230k
      in++;
9139
230k
      if (in >= end) {
9140
222
    const xmlChar *oldbase = ctxt->input->base;
9141
222
    GROW;
9142
222
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
222
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
222
    end = ctxt->input->end;
9151
222
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
222
      }
9157
230k
  }
9158
330k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
330k
  if (*in != limit) goto need_complex;
9164
10.5M
    } else {
9165
118M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
118M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
108M
      in++;
9168
108M
      col++;
9169
108M
      if (in >= end) {
9170
4.28k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
4.28k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
4.28k
      }
9177
108M
  }
9178
10.5M
  last = in;
9179
10.5M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
10.5M
  if (*in != limit) goto need_complex;
9185
10.5M
    }
9186
10.3M
    in++;
9187
10.3M
    col++;
9188
10.3M
    if (len != NULL) {
9189
6.97M
        if (alloc) *alloc = 0;
9190
6.97M
        *len = last - start;
9191
6.97M
        ret = (xmlChar *) start;
9192
6.97M
    } else {
9193
3.40M
        if (alloc) *alloc = 1;
9194
3.40M
        ret = xmlStrndup(start, last - start);
9195
3.40M
    }
9196
10.3M
    CUR_PTR = in;
9197
10.3M
    ctxt->input->line = line;
9198
10.3M
    ctxt->input->col = col;
9199
10.3M
    return ret;
9200
466k
need_complex:
9201
466k
    if (alloc) *alloc = 1;
9202
466k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
10.8M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
7.42M
{
9226
7.42M
    const xmlChar *name;
9227
7.42M
    xmlChar *val, *internal_val = NULL;
9228
7.42M
    int normalize = 0;
9229
9230
7.42M
    *value = NULL;
9231
7.42M
    GROW;
9232
7.42M
    name = xmlParseQName(ctxt, prefix);
9233
7.42M
    if (name == NULL) {
9234
119k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
119k
                       "error parsing attribute name\n");
9236
119k
        return (NULL);
9237
119k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
7.30M
    if (ctxt->attsSpecial != NULL) {
9243
851k
        int type;
9244
9245
851k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
851k
                                                 pref, elem, *prefix, name);
9247
851k
        if (type != 0)
9248
331k
            normalize = 1;
9249
851k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
7.30M
    SKIP_BLANKS;
9255
7.30M
    if (RAW == '=') {
9256
7.24M
        NEXT;
9257
7.24M
        SKIP_BLANKS;
9258
7.24M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
7.24M
        if (val == NULL)
9260
14.7k
            return (NULL);
9261
7.23M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
330k
      if (*alloc) {
9269
26.5k
          const xmlChar *val2;
9270
9271
26.5k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
26.5k
    if ((val2 != NULL) && (val2 != val)) {
9273
5.97k
        xmlFree(val);
9274
5.97k
        val = (xmlChar *) val2;
9275
5.97k
    }
9276
26.5k
      }
9277
330k
  }
9278
7.23M
        ctxt->instate = XML_PARSER_CONTENT;
9279
7.23M
    } else {
9280
61.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
61.5k
                          "Specification mandates value for attribute %s\n",
9282
61.5k
                          name);
9283
61.5k
        return (name);
9284
61.5k
    }
9285
9286
7.23M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
49.2k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
11.1k
            internal_val = xmlStrndup(val, *len);
9294
11.1k
            if (!xmlCheckLanguageID(internal_val)) {
9295
6.77k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
6.77k
                              "Malformed value for xml:lang : %s\n",
9297
6.77k
                              internal_val, NULL);
9298
6.77k
            }
9299
11.1k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
49.2k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
1.02k
            internal_val = xmlStrndup(val, *len);
9306
1.02k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
133
                *(ctxt->space) = 0;
9308
896
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
397
                *(ctxt->space) = 1;
9310
499
            else {
9311
499
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
499
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
499
                              internal_val, NULL);
9314
499
            }
9315
1.02k
        }
9316
49.2k
        if (internal_val) {
9317
12.2k
            xmlFree(internal_val);
9318
12.2k
        }
9319
49.2k
    }
9320
9321
7.23M
    *value = val;
9322
7.23M
    return (name);
9323
7.30M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
6.89M
                  const xmlChar **URI, int *tlen) {
9356
6.89M
    const xmlChar *localname;
9357
6.89M
    const xmlChar *prefix;
9358
6.89M
    const xmlChar *attname;
9359
6.89M
    const xmlChar *aprefix;
9360
6.89M
    const xmlChar *nsname;
9361
6.89M
    xmlChar *attvalue;
9362
6.89M
    const xmlChar **atts = ctxt->atts;
9363
6.89M
    int maxatts = ctxt->maxatts;
9364
6.89M
    int nratts, nbatts, nbdef, inputid;
9365
6.89M
    int i, j, nbNs, attval;
9366
6.89M
    unsigned long cur;
9367
6.89M
    int nsNr = ctxt->nsNr;
9368
9369
6.89M
    if (RAW != '<') return(NULL);
9370
6.89M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
6.89M
    SHRINK;
9380
6.89M
    cur = ctxt->input->cur - ctxt->input->base;
9381
6.89M
    inputid = ctxt->input->id;
9382
6.89M
    nbatts = 0;
9383
6.89M
    nratts = 0;
9384
6.89M
    nbdef = 0;
9385
6.89M
    nbNs = 0;
9386
6.89M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
6.89M
    ctxt->nsNr = nsNr;
9389
9390
6.89M
    localname = xmlParseQName(ctxt, &prefix);
9391
6.89M
    if (localname == NULL) {
9392
212k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
212k
           "StartTag: invalid element name\n");
9394
212k
        return(NULL);
9395
212k
    }
9396
6.68M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
6.68M
    SKIP_BLANKS;
9404
6.68M
    GROW;
9405
9406
9.62M
    while (((RAW != '>') &&
9407
9.62M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
9.62M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
7.42M
  int len = -1, alloc = 0;
9410
9411
7.42M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
7.42M
                               &aprefix, &attvalue, &len, &alloc);
9413
7.42M
        if (attname == NULL) {
9414
133k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
133k
           "xmlParseStartTag: problem parsing attributes\n");
9416
133k
      break;
9417
133k
  }
9418
7.29M
        if (attvalue == NULL)
9419
61.5k
            goto next_attr;
9420
7.23M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
7.23M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
58.1k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
58.1k
            xmlURIPtr uri;
9425
9426
58.1k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
58.1k
            if (*URL != 0) {
9434
53.7k
                uri = xmlParseURI((const char *) URL);
9435
53.7k
                if (uri == NULL) {
9436
21.0k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
21.0k
                             "xmlns: '%s' is not a valid URI\n",
9438
21.0k
                                       URL, NULL, NULL);
9439
32.7k
                } else {
9440
32.7k
                    if (uri->scheme == NULL) {
9441
6.74k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
6.74k
                                  "xmlns: URI %s is not absolute\n",
9443
6.74k
                                  URL, NULL, NULL);
9444
6.74k
                    }
9445
32.7k
                    xmlFreeURI(uri);
9446
32.7k
                }
9447
53.7k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
53.7k
                if ((len == 29) &&
9456
53.7k
                    (xmlStrEqual(URL,
9457
1.94k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
641
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
641
                         "reuse of the xmlns namespace name is forbidden\n",
9460
641
                             NULL, NULL, NULL);
9461
641
                    goto next_attr;
9462
641
                }
9463
53.7k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
75.2k
            for (j = 1;j <= nbNs;j++)
9468
26.6k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
8.93k
                    break;
9470
57.5k
            if (j <= nbNs)
9471
8.93k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
48.6k
            else
9473
48.6k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
7.17M
        } else if (aprefix == ctxt->str_xmlns) {
9476
94.1k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
94.1k
            xmlURIPtr uri;
9478
9479
94.1k
            if (attname == ctxt->str_xml) {
9480
1.43k
                if (URL != ctxt->str_xml_ns) {
9481
1.43k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
1.43k
                             "xml namespace prefix mapped to wrong URI\n",
9483
1.43k
                             NULL, NULL, NULL);
9484
1.43k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
1.43k
                goto next_attr;
9489
1.43k
            }
9490
92.7k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
92.7k
            if (attname == ctxt->str_xmlns) {
9499
369
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
369
                         "redefinition of the xmlns prefix is forbidden\n",
9501
369
                         NULL, NULL, NULL);
9502
369
                goto next_attr;
9503
369
            }
9504
92.3k
            if ((len == 29) &&
9505
92.3k
                (xmlStrEqual(URL,
9506
2.15k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
170
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
170
                         "reuse of the xmlns namespace name is forbidden\n",
9509
170
                         NULL, NULL, NULL);
9510
170
                goto next_attr;
9511
170
            }
9512
92.2k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
1.35k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
1.35k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
1.35k
                              attname, NULL, NULL);
9516
1.35k
                goto next_attr;
9517
90.8k
            } else {
9518
90.8k
                uri = xmlParseURI((const char *) URL);
9519
90.8k
                if (uri == NULL) {
9520
22.5k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
22.5k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
22.5k
                                       attname, URL, NULL);
9523
68.2k
                } else {
9524
68.2k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
2.24k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
2.24k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
2.24k
                                  attname, URL, NULL);
9528
2.24k
                    }
9529
68.2k
                    xmlFreeURI(uri);
9530
68.2k
                }
9531
90.8k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
147k
            for (j = 1;j <= nbNs;j++)
9537
61.2k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
4.63k
                    break;
9539
90.8k
            if (j <= nbNs)
9540
4.63k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
86.2k
            else
9542
86.2k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
7.07M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
7.07M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
103k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
103k
                maxatts = ctxt->maxatts;
9553
103k
                atts = ctxt->atts;
9554
103k
            }
9555
7.07M
            ctxt->attallocs[nratts++] = alloc;
9556
7.07M
            atts[nbatts++] = attname;
9557
7.07M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
7.07M
            if (alloc)
9565
213k
                atts[nbatts++] = NULL;
9566
6.86M
            else
9567
6.86M
                atts[nbatts++] = ctxt->input->base;
9568
7.07M
            atts[nbatts++] = attvalue;
9569
7.07M
            attvalue += len;
9570
7.07M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
7.07M
            if (alloc != 0) attval = 1;
9575
7.07M
            attvalue = NULL; /* moved into atts */
9576
7.07M
        }
9577
9578
7.29M
next_attr:
9579
7.29M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
42.9k
            xmlFree(attvalue);
9581
42.9k
            attvalue = NULL;
9582
42.9k
        }
9583
9584
7.29M
  GROW
9585
7.29M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
7.29M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
4.17M
      break;
9589
3.12M
  if (SKIP_BLANKS == 0) {
9590
180k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
180k
         "attributes construct error\n");
9592
180k
      break;
9593
180k
  }
9594
2.93M
        GROW;
9595
2.93M
    }
9596
9597
6.68M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
13.7M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
7.07M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
6.86M
            const xmlChar *old = atts[i+2];
9612
6.86M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
6.86M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
6.86M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
6.86M
        }
9616
7.07M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
6.68M
    if (ctxt->attsDefault != NULL) {
9622
1.11M
        xmlDefAttrsPtr defaults;
9623
9624
1.11M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
1.11M
  if (defaults != NULL) {
9626
368k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
257k
          attname = defaults->values[5 * i];
9628
257k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
257k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
12.8k
        for (j = 1;j <= nbNs;j++)
9638
3.68k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
1.62k
          break;
9640
10.8k
              if (j <= nbNs) continue;
9641
9642
9.17k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
9.17k
        if (nsname != defaults->values[5 * i + 2]) {
9644
5.10k
      if (nsPush(ctxt, NULL,
9645
5.10k
                 defaults->values[5 * i + 2]) > 0)
9646
4.65k
          nbNs++;
9647
5.10k
        }
9648
246k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
29.7k
        for (j = 1;j <= nbNs;j++)
9653
6.36k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
3.14k
          break;
9655
26.5k
              if (j <= nbNs) continue;
9656
9657
23.3k
        nsname = xmlGetNamespace(ctxt, attname);
9658
23.3k
        if (nsname != defaults->values[5 * i + 2]) {
9659
12.7k
      if (nsPush(ctxt, attname,
9660
12.7k
                 defaults->values[5 * i + 2]) > 0)
9661
12.1k
          nbNs++;
9662
12.7k
        }
9663
220k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
606k
        for (j = 0;j < nbatts;j+=5) {
9668
388k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
2.34k
          break;
9670
388k
        }
9671
220k
        if (j < nbatts) continue;
9672
9673
217k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
5.99k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
5.99k
      maxatts = ctxt->maxatts;
9679
5.99k
      atts = ctxt->atts;
9680
5.99k
        }
9681
217k
        atts[nbatts++] = attname;
9682
217k
        atts[nbatts++] = aprefix;
9683
217k
        if (aprefix == NULL)
9684
160k
      atts[nbatts++] = NULL;
9685
57.8k
        else
9686
57.8k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
217k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
217k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
217k
        if ((ctxt->standalone == 1) &&
9690
217k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
217k
        nbdef++;
9696
217k
    }
9697
257k
      }
9698
111k
  }
9699
1.11M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
13.9M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
7.29M
  if (atts[i + 1] != NULL) {
9709
289k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
289k
      if (nsname == NULL) {
9711
125k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
125k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
125k
        atts[i + 1], atts[i], localname);
9714
125k
      }
9715
289k
      atts[i + 2] = nsname;
9716
289k
  } else
9717
7.00M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
10.6M
        for (j = 0; j < i;j += 5) {
9725
3.31M
      if (atts[i] == atts[j]) {
9726
29.4k
          if (atts[i+1] == atts[j+1]) {
9727
10.1k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
10.1k
        break;
9729
10.1k
    }
9730
19.2k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
1.23k
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
1.23k
           "Namespaced Attribute %s in '%s' redefined\n",
9733
1.23k
           atts[i], nsname, NULL);
9734
1.23k
        break;
9735
1.23k
    }
9736
19.2k
      }
9737
3.31M
  }
9738
7.29M
    }
9739
9740
6.68M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
6.68M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
176k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
176k
           "Namespace prefix %s on %s is not defined\n",
9744
176k
     prefix, localname, NULL);
9745
176k
    }
9746
6.68M
    *pref = prefix;
9747
6.68M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
6.68M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
6.68M
  (!ctxt->disableSAX)) {
9754
5.82M
  if (nbNs > 0)
9755
79.8k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
79.8k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
79.8k
        nbatts / 5, nbdef, atts);
9758
5.74M
  else
9759
5.74M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
5.74M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
5.82M
    }
9762
9763
6.68M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
6.68M
    if (attval != 0) {
9768
456k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
261k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
213k
          xmlFree((xmlChar *) atts[i]);
9771
195k
    }
9772
9773
6.68M
    return(localname);
9774
6.68M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
3.07M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
3.07M
    const xmlChar *name;
9794
9795
3.07M
    GROW;
9796
3.07M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
3.07M
    SKIP(2);
9801
9802
3.07M
    if (tag->prefix == NULL)
9803
2.89M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
184k
    else
9805
184k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
3.07M
    GROW;
9811
3.07M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
3.07M
    SKIP_BLANKS;
9814
3.07M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
40.5k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
40.5k
    } else
9817
3.03M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
3.07M
    if (name != (xmlChar*)1) {
9826
126k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
126k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
126k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
126k
                    ctxt->name, tag->line, name);
9830
126k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
3.07M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
3.07M
  (!ctxt->disableSAX))
9837
2.60M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
2.60M
                                tag->URI);
9839
9840
3.07M
    spacePop(ctxt);
9841
3.07M
    if (tag->nsNr != 0)
9842
16.3k
  nsPop(ctxt, tag->nsNr);
9843
3.07M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
26.9k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
26.9k
    xmlChar *buf = NULL;
9864
26.9k
    int len = 0;
9865
26.9k
    int size = XML_PARSER_BUFFER_SIZE;
9866
26.9k
    int r, rl;
9867
26.9k
    int s, sl;
9868
26.9k
    int cur, l;
9869
26.9k
    int count = 0;
9870
26.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
9.99k
                    XML_MAX_HUGE_LENGTH :
9872
26.9k
                    XML_MAX_TEXT_LENGTH;
9873
9874
26.9k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
26.9k
    SKIP(3);
9877
9878
26.9k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
26.9k
    SKIP(6);
9881
9882
26.9k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
26.9k
    r = CUR_CHAR(rl);
9884
26.9k
    if (!IS_CHAR(r)) {
9885
1.05k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
1.05k
        goto out;
9887
1.05k
    }
9888
25.8k
    NEXTL(rl);
9889
25.8k
    s = CUR_CHAR(sl);
9890
25.8k
    if (!IS_CHAR(s)) {
9891
1.17k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
1.17k
        goto out;
9893
1.17k
    }
9894
24.7k
    NEXTL(sl);
9895
24.7k
    cur = CUR_CHAR(l);
9896
24.7k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
24.7k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
13.5M
    while (IS_CHAR(cur) &&
9902
13.5M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
13.4M
  if (len + 5 >= size) {
9904
26.3k
      xmlChar *tmp;
9905
9906
26.3k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
26.3k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
26.3k
      buf = tmp;
9912
26.3k
      size *= 2;
9913
26.3k
  }
9914
13.4M
  COPY_BUF(rl,buf,len,r);
9915
13.4M
  r = s;
9916
13.4M
  rl = sl;
9917
13.4M
  s = cur;
9918
13.4M
  sl = l;
9919
13.4M
  count++;
9920
13.4M
  if (count > 50) {
9921
254k
      SHRINK;
9922
254k
      GROW;
9923
254k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
254k
      count = 0;
9927
254k
  }
9928
13.4M
  NEXTL(l);
9929
13.4M
  cur = CUR_CHAR(l);
9930
13.4M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
13.4M
    }
9936
24.7k
    buf[len] = 0;
9937
24.7k
    if (cur != '>') {
9938
4.55k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
4.55k
                       "CData section not finished\n%.50s\n", buf);
9940
4.55k
        goto out;
9941
4.55k
    }
9942
20.1k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
20.1k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
14.5k
  if (ctxt->sax->cdataBlock != NULL)
9949
9.63k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
4.87k
  else if (ctxt->sax->characters != NULL)
9951
4.87k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
14.5k
    }
9953
9954
26.9k
out:
9955
26.9k
    if (ctxt->instate != XML_PARSER_EOF)
9956
26.9k
        ctxt->instate = XML_PARSER_CONTENT;
9957
26.9k
    xmlFree(buf);
9958
26.9k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
249k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
249k
    int nameNr = ctxt->nameNr;
9971
9972
249k
    GROW;
9973
17.6M
    while ((RAW != 0) &&
9974
17.6M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
17.4M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
17.4M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
40.1k
      xmlParsePI(ctxt);
9982
40.1k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
17.4M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
26.9k
      xmlParseCDSect(ctxt);
9990
26.9k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
17.3M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
17.3M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
159k
      xmlParseComment(ctxt);
9998
159k
      ctxt->instate = XML_PARSER_CONTENT;
9999
159k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
17.2M
  else if (*cur == '<') {
10005
7.05M
            if (NXT(1) == '/') {
10006
2.31M
                if (ctxt->nameNr <= nameNr)
10007
38.9k
                    break;
10008
2.28M
          xmlParseElementEnd(ctxt);
10009
4.73M
            } else {
10010
4.73M
          xmlParseElementStart(ctxt);
10011
4.73M
            }
10012
7.05M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
10.1M
  else if (*cur == '&') {
10020
2.55M
      xmlParseReference(ctxt);
10021
2.55M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
7.61M
  else {
10027
7.61M
      xmlParseCharData(ctxt, 0);
10028
7.61M
  }
10029
10030
17.4M
  GROW;
10031
17.4M
  SHRINK;
10032
17.4M
    }
10033
249k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
164k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
164k
    int nameNr = ctxt->nameNr;
10047
10048
164k
    xmlParseContentInternal(ctxt);
10049
10050
164k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
8.90k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
8.90k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
8.90k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
8.90k
                "Premature end of data in tag %s line %d\n",
10055
8.90k
    name, line, NULL);
10056
8.90k
    }
10057
164k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
115k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
115k
    if (xmlParseElementStart(ctxt) != 0)
10078
29.8k
        return;
10079
10080
85.3k
    xmlParseContentInternal(ctxt);
10081
85.3k
    if (ctxt->instate == XML_PARSER_EOF)
10082
262
  return;
10083
10084
85.0k
    if (CUR == 0) {
10085
49.3k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
49.3k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
49.3k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
49.3k
                "Premature end of data in tag %s line %d\n",
10089
49.3k
    name, line, NULL);
10090
49.3k
        return;
10091
49.3k
    }
10092
10093
35.7k
    xmlParseElementEnd(ctxt);
10094
35.7k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
4.84M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
4.84M
    const xmlChar *name;
10108
4.84M
    const xmlChar *prefix = NULL;
10109
4.84M
    const xmlChar *URI = NULL;
10110
4.84M
    xmlParserNodeInfo node_info;
10111
4.84M
    int line, tlen = 0;
10112
4.84M
    xmlNodePtr ret;
10113
4.84M
    int nsNr = ctxt->nsNr;
10114
10115
4.84M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
4.84M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
4.84M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
4.84M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
4.84M
    else if (*ctxt->space == -2)
10134
928k
  spacePush(ctxt, -1);
10135
3.92M
    else
10136
3.92M
  spacePush(ctxt, *ctxt->space);
10137
10138
4.84M
    line = ctxt->input->line;
10139
4.84M
#ifdef LIBXML_SAX1_ENABLED
10140
4.84M
    if (ctxt->sax2)
10141
3.10M
#endif /* LIBXML_SAX1_ENABLED */
10142
3.10M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
1.74M
#ifdef LIBXML_SAX1_ENABLED
10144
1.74M
    else
10145
1.74M
  name = xmlParseStartTag(ctxt);
10146
4.84M
#endif /* LIBXML_SAX1_ENABLED */
10147
4.84M
    if (ctxt->instate == XML_PARSER_EOF)
10148
191
  return(-1);
10149
4.84M
    if (name == NULL) {
10150
334k
  spacePop(ctxt);
10151
334k
        return(-1);
10152
334k
    }
10153
4.51M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
4.51M
    ret = ctxt->node;
10155
10156
4.51M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
4.51M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
4.51M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
4.51M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
4.51M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
1.75M
        SKIP(2);
10172
1.75M
  if (ctxt->sax2) {
10173
1.20M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
1.20M
    (!ctxt->disableSAX))
10175
920k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
1.20M
#ifdef LIBXML_SAX1_ENABLED
10177
1.20M
  } else {
10178
553k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
553k
    (!ctxt->disableSAX))
10180
406k
    ctxt->sax->endElement(ctxt->userData, name);
10181
553k
#endif /* LIBXML_SAX1_ENABLED */
10182
553k
  }
10183
1.75M
  namePop(ctxt);
10184
1.75M
  spacePop(ctxt);
10185
1.75M
  if (nsNr != ctxt->nsNr)
10186
7.40k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
1.75M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
1.75M
  return(1);
10195
1.75M
    }
10196
2.75M
    if (RAW == '>') {
10197
2.52M
        NEXT1;
10198
2.52M
    } else {
10199
234k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
234k
         "Couldn't find end of Start Tag %s line %d\n",
10201
234k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
234k
  nodePop(ctxt);
10207
234k
  namePop(ctxt);
10208
234k
  spacePop(ctxt);
10209
234k
  if (nsNr != ctxt->nsNr)
10210
17.8k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
234k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
234k
  return(-1);
10223
234k
    }
10224
10225
2.52M
    return(0);
10226
2.75M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
2.31M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
2.31M
    xmlParserNodeInfo node_info;
10237
2.31M
    xmlNodePtr ret = ctxt->node;
10238
10239
2.31M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
2.31M
    if (ctxt->sax2) {
10249
1.43M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
1.43M
  namePop(ctxt);
10251
1.43M
    }
10252
877k
#ifdef LIBXML_SAX1_ENABLED
10253
877k
    else
10254
877k
  xmlParseEndTag1(ctxt, 0);
10255
2.31M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
2.31M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
2.31M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
252k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
252k
    xmlChar *buf = NULL;
10286
252k
    int len = 0;
10287
252k
    int size = 10;
10288
252k
    xmlChar cur;
10289
10290
252k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
252k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
252k
    cur = CUR;
10296
252k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
1.78k
  xmlFree(buf);
10298
1.78k
  return(NULL);
10299
1.78k
    }
10300
250k
    buf[len++] = cur;
10301
250k
    NEXT;
10302
250k
    cur=CUR;
10303
250k
    if (cur != '.') {
10304
2.38k
  xmlFree(buf);
10305
2.38k
  return(NULL);
10306
2.38k
    }
10307
248k
    buf[len++] = cur;
10308
248k
    NEXT;
10309
248k
    cur=CUR;
10310
1.08M
    while ((cur >= '0') && (cur <= '9')) {
10311
836k
  if (len + 1 >= size) {
10312
1.86k
      xmlChar *tmp;
10313
10314
1.86k
      size *= 2;
10315
1.86k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
1.86k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
1.86k
      buf = tmp;
10322
1.86k
  }
10323
836k
  buf[len++] = cur;
10324
836k
  NEXT;
10325
836k
  cur=CUR;
10326
836k
    }
10327
248k
    buf[len] = 0;
10328
248k
    return(buf);
10329
248k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
285k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
285k
    xmlChar *version = NULL;
10349
10350
285k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
259k
  SKIP(7);
10352
259k
  SKIP_BLANKS;
10353
259k
  if (RAW != '=') {
10354
4.03k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
4.03k
      return(NULL);
10356
4.03k
        }
10357
255k
  NEXT;
10358
255k
  SKIP_BLANKS;
10359
255k
  if (RAW == '"') {
10360
231k
      NEXT;
10361
231k
      version = xmlParseVersionNum(ctxt);
10362
231k
      if (RAW != '"') {
10363
7.88k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
7.88k
      } else
10365
223k
          NEXT;
10366
231k
  } else if (RAW == '\''){
10367
21.3k
      NEXT;
10368
21.3k
      version = xmlParseVersionNum(ctxt);
10369
21.3k
      if (RAW != '\'') {
10370
1.42k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
1.42k
      } else
10372
19.8k
          NEXT;
10373
21.3k
  } else {
10374
2.77k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
2.77k
  }
10376
255k
    }
10377
281k
    return(version);
10378
285k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
117k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
117k
    xmlChar *buf = NULL;
10395
117k
    int len = 0;
10396
117k
    int size = 10;
10397
117k
    xmlChar cur;
10398
10399
117k
    cur = CUR;
10400
117k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
117k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
115k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
115k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
115k
  buf[len++] = cur;
10409
115k
  NEXT;
10410
115k
  cur = CUR;
10411
1.82M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
1.82M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
1.82M
         ((cur >= '0') && (cur <= '9')) ||
10414
1.82M
         (cur == '.') || (cur == '_') ||
10415
1.82M
         (cur == '-')) {
10416
1.71M
      if (len + 1 >= size) {
10417
55.8k
          xmlChar *tmp;
10418
10419
55.8k
    size *= 2;
10420
55.8k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
55.8k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
55.8k
    buf = tmp;
10427
55.8k
      }
10428
1.71M
      buf[len++] = cur;
10429
1.71M
      NEXT;
10430
1.71M
      cur = CUR;
10431
1.71M
      if (cur == 0) {
10432
1.72k
          SHRINK;
10433
1.72k
    GROW;
10434
1.72k
    cur = CUR;
10435
1.72k
      }
10436
1.71M
        }
10437
115k
  buf[len] = 0;
10438
115k
    } else {
10439
2.28k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
2.28k
    }
10441
117k
    return(buf);
10442
117k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
198k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
198k
    xmlChar *encoding = NULL;
10462
10463
198k
    SKIP_BLANKS;
10464
198k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
121k
  SKIP(8);
10466
121k
  SKIP_BLANKS;
10467
121k
  if (RAW != '=') {
10468
1.60k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
1.60k
      return(NULL);
10470
1.60k
        }
10471
120k
  NEXT;
10472
120k
  SKIP_BLANKS;
10473
120k
  if (RAW == '"') {
10474
104k
      NEXT;
10475
104k
      encoding = xmlParseEncName(ctxt);
10476
104k
      if (RAW != '"') {
10477
7.02k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
7.02k
    xmlFree((xmlChar *) encoding);
10479
7.02k
    return(NULL);
10480
7.02k
      } else
10481
97.3k
          NEXT;
10482
104k
  } else if (RAW == '\''){
10483
13.5k
      NEXT;
10484
13.5k
      encoding = xmlParseEncName(ctxt);
10485
13.5k
      if (RAW != '\'') {
10486
869
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
869
    xmlFree((xmlChar *) encoding);
10488
869
    return(NULL);
10489
869
      } else
10490
12.6k
          NEXT;
10491
13.5k
  } else {
10492
2.26k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
2.26k
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
112k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
52.8k
      xmlFree((xmlChar *) encoding);
10500
52.8k
            return(NULL);
10501
52.8k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
59.4k
        if ((encoding != NULL) &&
10508
59.4k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
58.1k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
2.55k
      if ((ctxt->encoding == NULL) &&
10517
2.55k
          (ctxt->input->buf != NULL) &&
10518
2.55k
          (ctxt->input->buf->encoder == NULL)) {
10519
2.11k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
2.11k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
2.11k
      }
10522
2.55k
      if (ctxt->encoding != NULL)
10523
435
    xmlFree((xmlChar *) ctxt->encoding);
10524
2.55k
      ctxt->encoding = encoding;
10525
2.55k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
56.9k
        else if ((encoding != NULL) &&
10530
56.9k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
55.5k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
20.6k
      if (ctxt->encoding != NULL)
10533
963
    xmlFree((xmlChar *) ctxt->encoding);
10534
20.6k
      ctxt->encoding = encoding;
10535
20.6k
  }
10536
36.3k
  else if (encoding != NULL) {
10537
34.9k
      xmlCharEncodingHandlerPtr handler;
10538
10539
34.9k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
34.9k
      ctxt->input->encoding = encoding;
10542
10543
34.9k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
34.9k
      if (handler != NULL) {
10545
33.9k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
1.08k
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
1.08k
        return(NULL);
10549
1.08k
    }
10550
33.9k
      } else {
10551
1.05k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
1.05k
      "Unsupported encoding %s\n", encoding);
10553
1.05k
    return(NULL);
10554
1.05k
      }
10555
34.9k
  }
10556
59.4k
    }
10557
133k
    return(encoding);
10558
198k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
128k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
128k
    int standalone = -2;
10596
10597
128k
    SKIP_BLANKS;
10598
128k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
23.8k
  SKIP(10);
10600
23.8k
        SKIP_BLANKS;
10601
23.8k
  if (RAW != '=') {
10602
177
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
177
      return(standalone);
10604
177
        }
10605
23.6k
  NEXT;
10606
23.6k
  SKIP_BLANKS;
10607
23.6k
        if (RAW == '\''){
10608
9.32k
      NEXT;
10609
9.32k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
7.55k
          standalone = 0;
10611
7.55k
                SKIP(2);
10612
7.55k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
1.76k
                 (NXT(2) == 's')) {
10614
1.46k
          standalone = 1;
10615
1.46k
    SKIP(3);
10616
1.46k
            } else {
10617
303
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
303
      }
10619
9.32k
      if (RAW != '\'') {
10620
426
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
426
      } else
10622
8.89k
          NEXT;
10623
14.3k
  } else if (RAW == '"'){
10624
14.2k
      NEXT;
10625
14.2k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
6.17k
          standalone = 0;
10627
6.17k
    SKIP(2);
10628
8.02k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
8.02k
                 (NXT(2) == 's')) {
10630
7.61k
          standalone = 1;
10631
7.61k
                SKIP(3);
10632
7.61k
            } else {
10633
417
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
417
      }
10635
14.2k
      if (RAW != '"') {
10636
606
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
606
      } else
10638
13.5k
          NEXT;
10639
14.2k
  } else {
10640
114
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
114
        }
10642
23.6k
    }
10643
128k
    return(standalone);
10644
128k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
235k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
235k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
235k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
235k
    SKIP(5);
10672
10673
235k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
235k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
235k
    version = xmlParseVersionInfo(ctxt);
10683
235k
    if (version == NULL) {
10684
18.8k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
216k
    } else {
10686
216k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
2.14k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
733
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
733
                "Unsupported version '%s'\n",
10693
733
                version);
10694
1.41k
      } else {
10695
1.41k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
1.26k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
1.26k
                      "Unsupported version '%s'\n",
10698
1.26k
          version, NULL);
10699
1.26k
    } else {
10700
147
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
147
              "Unsupported version '%s'\n",
10702
147
              version);
10703
147
    }
10704
1.41k
      }
10705
2.14k
  }
10706
216k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
216k
  ctxt->version = version;
10709
216k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
235k
    if (!IS_BLANK_CH(RAW)) {
10715
110k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
87.1k
      SKIP(2);
10717
87.1k
      return;
10718
87.1k
  }
10719
22.9k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
22.9k
    }
10721
148k
    xmlParseEncodingDecl(ctxt);
10722
148k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
148k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
528
        return;
10728
528
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
147k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
20.7k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
19.0k
      SKIP(2);
10736
19.0k
      return;
10737
19.0k
  }
10738
1.75k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
1.75k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
128k
    GROW;
10745
10746
128k
    SKIP_BLANKS;
10747
128k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
128k
    SKIP_BLANKS;
10750
128k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
89.8k
        SKIP(2);
10752
89.8k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
777
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
777
  NEXT;
10756
37.9k
    } else {
10757
37.9k
        int c;
10758
10759
37.9k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
2.60M
        while ((c = CUR) != 0) {
10761
2.59M
            NEXT;
10762
2.59M
            if (c == '>')
10763
34.6k
                break;
10764
2.59M
        }
10765
37.9k
    }
10766
128k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
341k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
380k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
380k
        SKIP_BLANKS;
10783
380k
        GROW;
10784
380k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
23.4k
      xmlParsePI(ctxt);
10786
357k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
15.5k
      xmlParseComment(ctxt);
10788
341k
        } else {
10789
341k
            break;
10790
341k
        }
10791
380k
    }
10792
341k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
153k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
153k
    xmlChar start[4];
10812
153k
    xmlCharEncoding enc;
10813
10814
153k
    xmlInitParser();
10815
10816
153k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
153k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
153k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
153k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
153k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
153k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
153k
    if ((ctxt->encoding == NULL) &&
10835
153k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
152k
  start[0] = RAW;
10842
152k
  start[1] = NXT(1);
10843
152k
  start[2] = NXT(2);
10844
152k
  start[3] = NXT(3);
10845
152k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
152k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
85.0k
      xmlSwitchEncoding(ctxt, enc);
10848
85.0k
  }
10849
152k
    }
10850
10851
10852
153k
    if (CUR == 0) {
10853
287
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
287
  return(-1);
10855
287
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
153k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
5.75k
       GROW;
10865
5.75k
    }
10866
153k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
78.5k
  xmlParseXMLDecl(ctxt);
10872
78.5k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
78.5k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
176
      return(-1);
10878
176
  }
10879
78.3k
  ctxt->standalone = ctxt->input->standalone;
10880
78.3k
  SKIP_BLANKS;
10881
78.3k
    } else {
10882
74.8k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
74.8k
    }
10884
153k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
149k
        ctxt->sax->startDocument(ctxt->userData);
10886
153k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
153k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
153k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
153k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
153k
    GROW;
10903
153k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
98.1k
  ctxt->inSubset = 1;
10906
98.1k
  xmlParseDocTypeDecl(ctxt);
10907
98.1k
  if (RAW == '[') {
10908
78.7k
      ctxt->instate = XML_PARSER_DTD;
10909
78.7k
      xmlParseInternalSubset(ctxt);
10910
78.7k
      if (ctxt->instate == XML_PARSER_EOF)
10911
19.9k
    return(-1);
10912
78.7k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
78.1k
  ctxt->inSubset = 2;
10918
78.1k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
78.1k
      (!ctxt->disableSAX))
10920
74.2k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
74.2k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
78.1k
  if (ctxt->instate == XML_PARSER_EOF)
10923
4.60k
      return(-1);
10924
73.5k
  ctxt->inSubset = 0;
10925
10926
73.5k
        xmlCleanSpecialAttr(ctxt);
10927
10928
73.5k
  ctxt->instate = XML_PARSER_PROLOG;
10929
73.5k
  xmlParseMisc(ctxt);
10930
73.5k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
128k
    GROW;
10936
128k
    if (RAW != '<') {
10937
13.4k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
13.4k
           "Start tag expected, '<' not found\n");
10939
115k
    } else {
10940
115k
  ctxt->instate = XML_PARSER_CONTENT;
10941
115k
  xmlParseElement(ctxt);
10942
115k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
115k
  xmlParseMisc(ctxt);
10949
10950
115k
  if (RAW != 0) {
10951
31.3k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
31.3k
  }
10953
115k
  ctxt->instate = XML_PARSER_EOF;
10954
115k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
128k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
128k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
128k
    if ((ctxt->myDoc != NULL) &&
10966
128k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
177
  xmlFreeDoc(ctxt->myDoc);
10968
177
  ctxt->myDoc = NULL;
10969
177
    }
10970
10971
128k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
16.4k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
16.4k
  if (ctxt->valid)
10974
12.0k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
16.4k
  if (ctxt->nsWellFormed)
10976
15.4k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
16.4k
  if (ctxt->options & XML_PARSE_OLD10)
10978
2.46k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
16.4k
    }
10980
128k
    if (! ctxt->wellFormed) {
10981
112k
  ctxt->valid = 0;
10982
112k
  return(-1);
10983
112k
    }
10984
16.4k
    return(0);
10985
128k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
4.35M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
4.35M
    const xmlChar *cur;
11110
11111
4.35M
    if (ctxt->checkIndex == 0) {
11112
4.20M
        cur = ctxt->input->cur + 1;
11113
4.20M
    } else {
11114
153k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
153k
    }
11116
11117
4.35M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
164k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
164k
        return(0);
11120
4.19M
    } else {
11121
4.19M
        ctxt->checkIndex = 0;
11122
4.19M
        return(1);
11123
4.19M
    }
11124
4.35M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
1.10M
                     const char *str, size_t strLen) {
11138
1.10M
    const xmlChar *cur, *term;
11139
11140
1.10M
    if (ctxt->checkIndex == 0) {
11141
605k
        cur = ctxt->input->cur + startDelta;
11142
605k
    } else {
11143
502k
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
502k
    }
11145
11146
1.10M
    term = BAD_CAST strstr((const char *) cur, str);
11147
1.10M
    if (term == NULL) {
11148
629k
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
629k
        if ((size_t) (end - cur) < strLen)
11152
10.4k
            end = cur;
11153
619k
        else
11154
619k
            end -= strLen - 1;
11155
629k
        ctxt->checkIndex = end - ctxt->input->cur;
11156
629k
    } else {
11157
478k
        ctxt->checkIndex = 0;
11158
478k
    }
11159
11160
1.10M
    return(term);
11161
1.10M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
6.68M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
6.68M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
6.68M
    const xmlChar *end = ctxt->input->end;
11173
11174
119M
    while (cur < end) {
11175
118M
        if ((*cur == '<') || (*cur == '&')) {
11176
5.94M
            ctxt->checkIndex = 0;
11177
5.94M
            return(1);
11178
5.94M
        }
11179
112M
        cur++;
11180
112M
    }
11181
11182
731k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
731k
    return(0);
11184
6.68M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
6.69M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
6.69M
    const xmlChar *cur;
11196
6.69M
    const xmlChar *end = ctxt->input->end;
11197
6.69M
    int state = ctxt->endCheckState;
11198
11199
6.69M
    if (ctxt->checkIndex == 0)
11200
5.37M
        cur = ctxt->input->cur + 1;
11201
1.32M
    else
11202
1.32M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
285M
    while (cur < end) {
11205
284M
        if (state) {
11206
160M
            if (*cur == state)
11207
7.09M
                state = 0;
11208
160M
        } else if (*cur == '\'' || *cur == '"') {
11209
7.13M
            state = *cur;
11210
116M
        } else if (*cur == '>') {
11211
5.30M
            ctxt->checkIndex = 0;
11212
5.30M
            ctxt->endCheckState = 0;
11213
5.30M
            return(1);
11214
5.30M
        }
11215
278M
        cur++;
11216
278M
    }
11217
11218
1.39M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
1.39M
    ctxt->endCheckState = state;
11220
1.39M
    return(0);
11221
6.69M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
531k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
531k
    const xmlChar *cur, *start;
11240
531k
    const xmlChar *end = ctxt->input->end;
11241
531k
    int state = ctxt->endCheckState;
11242
11243
531k
    if (ctxt->checkIndex == 0) {
11244
151k
        cur = ctxt->input->cur + 1;
11245
380k
    } else {
11246
380k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
380k
    }
11248
531k
    start = cur;
11249
11250
91.0M
    while (cur < end) {
11251
90.6M
        if (state == '-') {
11252
12.4M
            if ((*cur == '-') &&
11253
12.4M
                (cur[1] == '-') &&
11254
12.4M
                (cur[2] == '>')) {
11255
103k
                state = 0;
11256
103k
                cur += 3;
11257
103k
                start = cur;
11258
103k
                continue;
11259
103k
            }
11260
12.4M
        }
11261
78.2M
        else if (state == ']') {
11262
151k
            if (*cur == '>') {
11263
126k
                ctxt->checkIndex = 0;
11264
126k
                ctxt->endCheckState = 0;
11265
126k
                return(1);
11266
126k
            }
11267
25.2k
            if (IS_BLANK_CH(*cur)) {
11268
16.2k
                state = ' ';
11269
16.2k
            } else if (*cur != ']') {
11270
3.75k
                state = 0;
11271
3.75k
                start = cur;
11272
3.75k
                continue;
11273
3.75k
            }
11274
25.2k
        }
11275
78.0M
        else if (state == ' ') {
11276
190k
            if (*cur == '>') {
11277
538
                ctxt->checkIndex = 0;
11278
538
                ctxt->endCheckState = 0;
11279
538
                return(1);
11280
538
            }
11281
189k
            if (!IS_BLANK_CH(*cur)) {
11282
15.7k
                state = 0;
11283
15.7k
                start = cur;
11284
15.7k
                continue;
11285
15.7k
            }
11286
189k
        }
11287
77.8M
        else if (state != 0) {
11288
34.4M
            if (*cur == state) {
11289
999k
                state = 0;
11290
999k
                start = cur + 1;
11291
999k
            }
11292
34.4M
        }
11293
43.4M
        else if (*cur == '<') {
11294
1.22M
            if ((cur[1] == '!') &&
11295
1.22M
                (cur[2] == '-') &&
11296
1.22M
                (cur[3] == '-')) {
11297
105k
                state = '-';
11298
105k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
105k
                start = cur;
11301
105k
                continue;
11302
105k
            }
11303
1.22M
        }
11304
42.2M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
1.15M
            state = *cur;
11306
1.15M
        }
11307
11308
90.2M
        cur++;
11309
90.2M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
404k
    if ((state == 0) || (state == '-')) {
11316
233k
        if (cur - start < 3)
11317
20.5k
            cur = start;
11318
212k
        else
11319
212k
            cur -= 3;
11320
233k
    }
11321
404k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
404k
    ctxt->endCheckState = state;
11323
404k
    return(0);
11324
531k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
235k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
235k
    int ix;
11340
235k
    unsigned char c;
11341
235k
    int codepoint;
11342
11343
235k
    if ((utf == NULL) || (len <= 0))
11344
3.59k
        return(0);
11345
11346
10.8M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
10.8M
        c = utf[ix];
11348
10.8M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
9.74M
      if (c >= 0x20)
11350
8.93M
    ix++;
11351
808k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
797k
          ix++;
11353
11.5k
      else
11354
11.5k
          return(-ix);
11355
9.74M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
371k
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
366k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
67.1k
          return(-ix);
11359
299k
      codepoint = (utf[ix] & 0x1f) << 6;
11360
299k
      codepoint |= utf[ix+1] & 0x3f;
11361
299k
      if (!xmlIsCharQ(codepoint))
11362
5.36k
          return(-ix);
11363
293k
      ix += 2;
11364
702k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
318k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
313k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
313k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
21.4k
        return(-ix);
11369
292k
      codepoint = (utf[ix] & 0xf) << 12;
11370
292k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
292k
      codepoint |= utf[ix+2] & 0x3f;
11372
292k
      if (!xmlIsCharQ(codepoint))
11373
8.83k
          return(-ix);
11374
283k
      ix += 3;
11375
383k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
362k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
358k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
358k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
358k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
24.5k
        return(-ix);
11381
333k
      codepoint = (utf[ix] & 0x7) << 18;
11382
333k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
333k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
333k
      codepoint |= utf[ix+3] & 0x3f;
11385
333k
      if (!xmlIsCharQ(codepoint))
11386
10.8k
          return(-ix);
11387
322k
      ix += 4;
11388
322k
  } else       /* unknown encoding */
11389
21.1k
      return(-ix);
11390
10.8M
      }
11391
46.2k
      return(ix);
11392
231k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
3.99M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
3.99M
    int ret = 0;
11406
3.99M
    int avail, tlen;
11407
3.99M
    xmlChar cur, next;
11408
11409
3.99M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
3.99M
    if ((ctxt->input != NULL) &&
11466
3.99M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
60.5k
        xmlParserInputShrink(ctxt->input);
11468
60.5k
    }
11469
11470
33.9M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
33.9M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
74.7k
      return(0);
11473
11474
33.8M
  if (ctxt->input == NULL) break;
11475
33.8M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
33.8M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
33.8M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
33.8M
          (ctxt->input->buf->raw != NULL) &&
11488
33.8M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
133k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
133k
                                                 ctxt->input);
11491
133k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
133k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
133k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
133k
                                      base, current);
11496
133k
      }
11497
33.8M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
33.8M
        (ctxt->input->cur - ctxt->input->base);
11499
33.8M
  }
11500
33.8M
        if (avail < 1)
11501
226k
      goto done;
11502
33.6M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
777k
            case XML_PARSER_START:
11509
777k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
224k
        xmlChar start[4];
11511
224k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
224k
        if (avail < 4)
11517
3.41k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
220k
        start[0] = RAW;
11527
220k
        start[1] = NXT(1);
11528
220k
        start[2] = NXT(2);
11529
220k
        start[3] = NXT(3);
11530
220k
        enc = xmlDetectCharEncoding(start, 4);
11531
220k
        xmlSwitchEncoding(ctxt, enc);
11532
220k
        break;
11533
224k
    }
11534
11535
553k
    if (avail < 2)
11536
126
        goto done;
11537
552k
    cur = ctxt->input->cur[0];
11538
552k
    next = ctxt->input->cur[1];
11539
552k
    if (cur == 0) {
11540
378
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
378
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
378
                  &xmlDefaultSAXLocator);
11543
378
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
378
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
378
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
378
      ctxt->sax->endDocument(ctxt->userData);
11551
378
        goto done;
11552
378
    }
11553
552k
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
416k
        if (avail < 5) goto done;
11556
416k
        if ((!terminate) &&
11557
416k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
247k
      goto done;
11559
169k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
169k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
169k
                  &xmlDefaultSAXLocator);
11562
169k
        if ((ctxt->input->cur[2] == 'x') &&
11563
169k
      (ctxt->input->cur[3] == 'm') &&
11564
169k
      (ctxt->input->cur[4] == 'l') &&
11565
169k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
156k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
156k
      xmlParseXMLDecl(ctxt);
11572
156k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
352
          xmlHaltParser(ctxt);
11578
352
          return(0);
11579
352
      }
11580
156k
      ctxt->standalone = ctxt->input->standalone;
11581
156k
      if ((ctxt->encoding == NULL) &&
11582
156k
          (ctxt->input->encoding != NULL))
11583
18.5k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
156k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
156k
          (!ctxt->disableSAX))
11586
150k
          ctxt->sax->startDocument(ctxt->userData);
11587
156k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
156k
        } else {
11593
12.5k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
12.5k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
12.5k
          (!ctxt->disableSAX))
11596
12.5k
          ctxt->sax->startDocument(ctxt->userData);
11597
12.5k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
12.5k
        }
11603
169k
    } else {
11604
135k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
135k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
135k
                  &xmlDefaultSAXLocator);
11607
135k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
135k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
135k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
135k
            (!ctxt->disableSAX))
11614
135k
      ctxt->sax->startDocument(ctxt->userData);
11615
135k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
135k
    }
11621
304k
    break;
11622
7.13M
            case XML_PARSER_START_TAG: {
11623
7.13M
          const xmlChar *name;
11624
7.13M
    const xmlChar *prefix = NULL;
11625
7.13M
    const xmlChar *URI = NULL;
11626
7.13M
                int line = ctxt->input->line;
11627
7.13M
    int nsNr = ctxt->nsNr;
11628
11629
7.13M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
7.13M
    cur = ctxt->input->cur[0];
11632
7.13M
          if (cur != '<') {
11633
16.2k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
16.2k
        xmlHaltParser(ctxt);
11635
16.2k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
16.2k
      ctxt->sax->endDocument(ctxt->userData);
11637
16.2k
        goto done;
11638
16.2k
    }
11639
7.11M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
1.26M
                    goto done;
11641
5.84M
    if (ctxt->spaceNr == 0)
11642
50.1k
        spacePush(ctxt, -1);
11643
5.79M
    else if (*ctxt->space == -2)
11644
682k
        spacePush(ctxt, -1);
11645
5.11M
    else
11646
5.11M
        spacePush(ctxt, *ctxt->space);
11647
5.84M
#ifdef LIBXML_SAX1_ENABLED
11648
5.84M
    if (ctxt->sax2)
11649
3.79M
#endif /* LIBXML_SAX1_ENABLED */
11650
3.79M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
2.05M
#ifdef LIBXML_SAX1_ENABLED
11652
2.05M
    else
11653
2.05M
        name = xmlParseStartTag(ctxt);
11654
5.84M
#endif /* LIBXML_SAX1_ENABLED */
11655
5.84M
    if (ctxt->instate == XML_PARSER_EOF)
11656
289
        goto done;
11657
5.84M
    if (name == NULL) {
11658
19.0k
        spacePop(ctxt);
11659
19.0k
        xmlHaltParser(ctxt);
11660
19.0k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
19.0k
      ctxt->sax->endDocument(ctxt->userData);
11662
19.0k
        goto done;
11663
19.0k
    }
11664
5.82M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
5.82M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
5.82M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
5.82M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
5.82M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
2.52M
        SKIP(2);
11680
11681
2.52M
        if (ctxt->sax2) {
11682
1.78M
      if ((ctxt->sax != NULL) &&
11683
1.78M
          (ctxt->sax->endElementNs != NULL) &&
11684
1.78M
          (!ctxt->disableSAX))
11685
1.78M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
1.78M
                                  prefix, URI);
11687
1.78M
      if (ctxt->nsNr - nsNr > 0)
11688
8.22k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
1.78M
#ifdef LIBXML_SAX1_ENABLED
11690
1.78M
        } else {
11691
733k
      if ((ctxt->sax != NULL) &&
11692
733k
          (ctxt->sax->endElement != NULL) &&
11693
733k
          (!ctxt->disableSAX))
11694
733k
          ctxt->sax->endElement(ctxt->userData, name);
11695
733k
#endif /* LIBXML_SAX1_ENABLED */
11696
733k
        }
11697
2.52M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
2.52M
        spacePop(ctxt);
11700
2.52M
        if (ctxt->nameNr == 0) {
11701
8.87k
      ctxt->instate = XML_PARSER_EPILOG;
11702
2.51M
        } else {
11703
2.51M
      ctxt->instate = XML_PARSER_CONTENT;
11704
2.51M
        }
11705
2.52M
        break;
11706
2.52M
    }
11707
3.30M
    if (RAW == '>') {
11708
2.97M
        NEXT;
11709
2.97M
    } else {
11710
338k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
338k
           "Couldn't find end of Start Tag %s\n",
11712
338k
           name);
11713
338k
        nodePop(ctxt);
11714
338k
        spacePop(ctxt);
11715
338k
    }
11716
3.30M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
3.30M
    ctxt->instate = XML_PARSER_CONTENT;
11719
3.30M
                break;
11720
5.82M
      }
11721
21.2M
            case XML_PARSER_CONTENT: {
11722
21.2M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
91.1k
        goto done;
11724
21.2M
    cur = ctxt->input->cur[0];
11725
21.2M
    next = ctxt->input->cur[1];
11726
11727
21.2M
    if ((cur == '<') && (next == '/')) {
11728
2.66M
        ctxt->instate = XML_PARSER_END_TAG;
11729
2.66M
        break;
11730
18.5M
          } else if ((cur == '<') && (next == '?')) {
11731
51.9k
        if ((!terminate) &&
11732
51.9k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
21.6k
      goto done;
11734
30.2k
        xmlParsePI(ctxt);
11735
30.2k
        ctxt->instate = XML_PARSER_CONTENT;
11736
18.4M
    } else if ((cur == '<') && (next != '!')) {
11737
5.64M
        ctxt->instate = XML_PARSER_START_TAG;
11738
5.64M
        break;
11739
12.8M
    } else if ((cur == '<') && (next == '!') &&
11740
12.8M
               (ctxt->input->cur[2] == '-') &&
11741
12.8M
         (ctxt->input->cur[3] == '-')) {
11742
287k
        if ((!terminate) &&
11743
287k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
124k
      goto done;
11745
163k
        xmlParseComment(ctxt);
11746
163k
        ctxt->instate = XML_PARSER_CONTENT;
11747
12.5M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
12.5M
        (ctxt->input->cur[2] == '[') &&
11749
12.5M
        (ctxt->input->cur[3] == 'C') &&
11750
12.5M
        (ctxt->input->cur[4] == 'D') &&
11751
12.5M
        (ctxt->input->cur[5] == 'A') &&
11752
12.5M
        (ctxt->input->cur[6] == 'T') &&
11753
12.5M
        (ctxt->input->cur[7] == 'A') &&
11754
12.5M
        (ctxt->input->cur[8] == '[')) {
11755
24.7k
        SKIP(9);
11756
24.7k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
24.7k
        break;
11758
12.5M
    } else if ((cur == '<') && (next == '!') &&
11759
12.5M
               (avail < 9)) {
11760
19.5k
        goto done;
11761
12.5M
    } else if (cur == '<') {
11762
285k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
285k
                    "detected an error in element content\n");
11764
285k
                    SKIP(1);
11765
12.2M
    } else if (cur == '&') {
11766
2.22M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
80.5k
      goto done;
11768
2.14M
        xmlParseReference(ctxt);
11769
9.99M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
9.99M
        if ((ctxt->inputNr == 1) &&
11783
9.99M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
6.99M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
731k
          goto done;
11786
6.99M
                    }
11787
9.26M
                    ctxt->checkIndex = 0;
11788
9.26M
        xmlParseCharData(ctxt, 0);
11789
9.26M
    }
11790
11.8M
    break;
11791
21.2M
      }
11792
11.8M
            case XML_PARSER_END_TAG:
11793
2.75M
    if (avail < 2)
11794
0
        goto done;
11795
2.75M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
83.5k
        goto done;
11797
2.66M
    if (ctxt->sax2) {
11798
1.64M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
1.64M
        nameNsPop(ctxt);
11800
1.64M
    }
11801
1.02M
#ifdef LIBXML_SAX1_ENABLED
11802
1.02M
      else
11803
1.02M
        xmlParseEndTag1(ctxt, 0);
11804
2.66M
#endif /* LIBXML_SAX1_ENABLED */
11805
2.66M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
2.66M
    } else if (ctxt->nameNr == 0) {
11808
48.2k
        ctxt->instate = XML_PARSER_EPILOG;
11809
2.61M
    } else {
11810
2.61M
        ctxt->instate = XML_PARSER_CONTENT;
11811
2.61M
    }
11812
2.66M
    break;
11813
302k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
302k
    const xmlChar *term;
11819
11820
302k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
5.92k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
5.92k
                                           "]]>");
11827
296k
                } else {
11828
296k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
296k
                }
11830
11831
302k
    if (term == NULL) {
11832
173k
        int tmp, size;
11833
11834
173k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
2.64k
                        size = ctxt->input->end - ctxt->input->cur;
11837
170k
                    } else {
11838
170k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
67.2k
                            goto done;
11840
103k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
103k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
103k
                    }
11844
106k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
106k
                    if (tmp <= 0) {
11846
73.2k
                        tmp = -tmp;
11847
73.2k
                        ctxt->input->cur += tmp;
11848
73.2k
                        goto encoding_error;
11849
73.2k
                    }
11850
32.8k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
32.8k
                        if (ctxt->sax->cdataBlock != NULL)
11852
20.2k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
20.2k
                                                  ctxt->input->cur, tmp);
11854
12.5k
                        else if (ctxt->sax->characters != NULL)
11855
12.5k
                            ctxt->sax->characters(ctxt->userData,
11856
12.5k
                                                  ctxt->input->cur, tmp);
11857
32.8k
                    }
11858
32.8k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
32.8k
                    SKIPL(tmp);
11861
129k
    } else {
11862
129k
                    int base = term - CUR_PTR;
11863
129k
        int tmp;
11864
11865
129k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
129k
        if ((tmp < 0) || (tmp != base)) {
11867
110k
      tmp = -tmp;
11868
110k
      ctxt->input->cur += tmp;
11869
110k
      goto encoding_error;
11870
110k
        }
11871
18.3k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
18.3k
            (ctxt->sax->cdataBlock != NULL) &&
11873
18.3k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
2.16k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
2.16k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
2.16k
                     "<![CDATA[", 9)))
11882
2.15k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
2.15k
                                 BAD_CAST "", 0);
11884
16.1k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
16.1k
      (!ctxt->disableSAX)) {
11886
14.7k
      if (ctxt->sax->cdataBlock != NULL)
11887
9.48k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
9.48k
              ctxt->input->cur, base);
11889
5.25k
      else if (ctxt->sax->characters != NULL)
11890
5.25k
          ctxt->sax->characters(ctxt->userData,
11891
5.25k
              ctxt->input->cur, base);
11892
14.7k
        }
11893
18.3k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
18.3k
        SKIPL(base + 3);
11896
18.3k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
18.3k
    }
11902
51.1k
    break;
11903
302k
      }
11904
536k
            case XML_PARSER_MISC:
11905
722k
            case XML_PARSER_PROLOG:
11906
785k
            case XML_PARSER_EPILOG:
11907
785k
    SKIP_BLANKS;
11908
785k
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
785k
    else
11912
785k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
785k
                (ctxt->input->cur - ctxt->input->base);
11914
785k
    if (avail < 2)
11915
50.7k
        goto done;
11916
734k
    cur = ctxt->input->cur[0];
11917
734k
    next = ctxt->input->cur[1];
11918
734k
          if ((cur == '<') && (next == '?')) {
11919
57.6k
        if ((!terminate) &&
11920
57.6k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
19.4k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
38.1k
        xmlParsePI(ctxt);
11927
38.1k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
676k
    } else if ((cur == '<') && (next == '!') &&
11930
676k
        (ctxt->input->cur[2] == '-') &&
11931
676k
        (ctxt->input->cur[3] == '-')) {
11932
74.0k
        if ((!terminate) &&
11933
74.0k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
46.2k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
27.8k
        xmlParseComment(ctxt);
11940
27.8k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
602k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
602k
                    (cur == '<') && (next == '!') &&
11944
602k
        (ctxt->input->cur[2] == 'D') &&
11945
602k
        (ctxt->input->cur[3] == 'O') &&
11946
602k
        (ctxt->input->cur[4] == 'C') &&
11947
602k
        (ctxt->input->cur[5] == 'T') &&
11948
602k
        (ctxt->input->cur[6] == 'Y') &&
11949
602k
        (ctxt->input->cur[7] == 'P') &&
11950
602k
        (ctxt->input->cur[8] == 'E')) {
11951
318k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
125k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
193k
        ctxt->inSubset = 1;
11958
193k
        xmlParseDocTypeDecl(ctxt);
11959
193k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
193k
        if (RAW == '[') {
11962
155k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
155k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
37.8k
      ctxt->inSubset = 2;
11972
37.8k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
37.8k
          (ctxt->sax->externalSubset != NULL))
11974
35.9k
          ctxt->sax->externalSubset(ctxt->userData,
11975
35.9k
            ctxt->intSubName, ctxt->extSubSystem,
11976
35.9k
            ctxt->extSubURI);
11977
37.8k
      ctxt->inSubset = 0;
11978
37.8k
      xmlCleanSpecialAttr(ctxt);
11979
37.8k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
37.8k
        }
11985
283k
    } else if ((cur == '<') && (next == '!') &&
11986
283k
               (avail <
11987
41.0k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
38.9k
        goto done;
11989
244k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
10.6k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
10.6k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
10.6k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
10.6k
      ctxt->sax->endDocument(ctxt->userData);
11998
10.6k
        goto done;
11999
234k
                } else {
12000
234k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
234k
    }
12006
493k
    break;
12007
558k
            case XML_PARSER_DTD: {
12008
558k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
404k
                    goto done;
12010
154k
    xmlParseInternalSubset(ctxt);
12011
154k
    if (ctxt->instate == XML_PARSER_EOF)
12012
37.6k
        goto done;
12013
116k
    ctxt->inSubset = 2;
12014
116k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
116k
        (ctxt->sax->externalSubset != NULL))
12016
112k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
112k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
116k
    ctxt->inSubset = 0;
12019
116k
    xmlCleanSpecialAttr(ctxt);
12020
116k
    if (ctxt->instate == XML_PARSER_EOF)
12021
3.14k
        goto done;
12022
113k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
113k
                break;
12028
116k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
33.6M
  }
12102
33.6M
    }
12103
3.73M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
3.73M
    return(ret);
12108
184k
encoding_error:
12109
184k
    {
12110
184k
        char buffer[150];
12111
12112
184k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
184k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
184k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
184k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
184k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
184k
         BAD_CAST buffer, NULL);
12118
184k
    }
12119
184k
    return(0);
12120
3.99M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
5.09M
              int terminate) {
12136
5.09M
    int end_in_lf = 0;
12137
5.09M
    int remain = 0;
12138
12139
5.09M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
5.09M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
1.11M
        return(ctxt->errNo);
12143
3.97M
    if (ctxt->instate == XML_PARSER_EOF)
12144
149
        return(-1);
12145
3.97M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
3.97M
    ctxt->progressive = 1;
12149
3.97M
    if (ctxt->instate == XML_PARSER_START)
12150
534k
        xmlDetectSAX2(ctxt);
12151
3.97M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
3.97M
        (chunk[size - 1] == '\r')) {
12153
29.4k
  end_in_lf = 1;
12154
29.4k
  size--;
12155
29.4k
    }
12156
12157
3.99M
xmldecl_done:
12158
12159
3.99M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
3.99M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
3.77M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
3.77M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
3.77M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
3.77M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
3.77M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
35.2k
            unsigned int len = 45;
12173
12174
35.2k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
35.2k
                               BAD_CAST "UTF-16")) ||
12176
35.2k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
6.98k
                               BAD_CAST "UTF16")))
12178
28.2k
                len = 90;
12179
6.98k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
6.98k
                                    BAD_CAST "UCS-4")) ||
12181
6.98k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
6.56k
                                    BAD_CAST "UCS4")))
12183
418
                len = 180;
12184
12185
35.2k
            if (ctxt->input->buf->rawconsumed < len)
12186
2.04k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
35.2k
            if ((unsigned int) size > len) {
12194
23.4k
                remain = size - len;
12195
23.4k
                size = len;
12196
23.4k
            } else {
12197
11.7k
                remain = 0;
12198
11.7k
            }
12199
35.2k
        }
12200
3.77M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
3.77M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
3.77M
  if (res < 0) {
12203
445
      ctxt->errNo = XML_PARSER_EOF;
12204
445
      xmlHaltParser(ctxt);
12205
445
      return (XML_PARSER_EOF);
12206
445
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
3.77M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
222k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
222k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
222k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
222k
        (in->raw != NULL)) {
12216
13.5k
    int nbchars;
12217
13.5k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
13.5k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
13.5k
    nbchars = xmlCharEncInput(in, terminate);
12221
13.5k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
13.5k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
299
        xmlGenericError(xmlGenericErrorContext,
12225
299
            "xmlParseChunk: encoder error\n");
12226
299
                    xmlHaltParser(ctxt);
12227
299
        return(XML_ERR_INVALID_ENCODING);
12228
299
    }
12229
13.5k
      }
12230
222k
  }
12231
222k
    }
12232
12233
3.99M
    if (remain != 0) {
12234
23.3k
        xmlParseTryOrFinish(ctxt, 0);
12235
3.97M
    } else {
12236
3.97M
        xmlParseTryOrFinish(ctxt, terminate);
12237
3.97M
    }
12238
3.99M
    if (ctxt->instate == XML_PARSER_EOF)
12239
88.2k
        return(ctxt->errNo);
12240
12241
3.90M
    if ((ctxt->input != NULL) &&
12242
3.90M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
3.90M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
3.90M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
3.90M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
76.1k
        return(ctxt->errNo);
12250
12251
3.83M
    if (remain != 0) {
12252
22.9k
        chunk += size;
12253
22.9k
        size = remain;
12254
22.9k
        remain = 0;
12255
22.9k
        goto xmldecl_done;
12256
22.9k
    }
12257
3.80M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
3.80M
        (ctxt->input->buf != NULL)) {
12259
28.9k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
28.9k
           ctxt->input);
12261
28.9k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
28.9k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
28.9k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
28.9k
            base, current);
12267
28.9k
    }
12268
3.80M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
101k
  int cur_avail = 0;
12273
12274
101k
  if (ctxt->input != NULL) {
12275
101k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
101k
      else
12279
101k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
101k
                    (ctxt->input->cur - ctxt->input->base);
12281
101k
  }
12282
12283
101k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
101k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
62.1k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
62.1k
  }
12287
101k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
414
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
414
  }
12290
101k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
101k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
101k
    ctxt->sax->endDocument(ctxt->userData);
12293
101k
  }
12294
101k
  ctxt->instate = XML_PARSER_EOF;
12295
101k
    }
12296
3.80M
    if (ctxt->wellFormed == 0)
12297
1.28M
  return((xmlParserErrors) ctxt->errNo);
12298
2.52M
    else
12299
2.52M
        return(0);
12300
3.80M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
307k
                        const char *chunk, int size, const char *filename) {
12330
307k
    xmlParserCtxtPtr ctxt;
12331
307k
    xmlParserInputPtr inputStream;
12332
307k
    xmlParserInputBufferPtr buf;
12333
307k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
307k
    if ((chunk != NULL) && (size >= 4))
12339
152k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
307k
    buf = xmlAllocParserInputBuffer(enc);
12342
307k
    if (buf == NULL) return(NULL);
12343
12344
307k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
307k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
307k
    ctxt->dictNames = 1;
12351
307k
    if (filename == NULL) {
12352
153k
  ctxt->directory = NULL;
12353
153k
    } else {
12354
153k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
153k
    }
12356
12357
307k
    inputStream = xmlNewInputStream(ctxt);
12358
307k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
307k
    if (filename == NULL)
12365
153k
  inputStream->filename = NULL;
12366
153k
    else {
12367
153k
  inputStream->filename = (char *)
12368
153k
      xmlCanonicPath((const xmlChar *) filename);
12369
153k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
153k
    }
12376
307k
    inputStream->buf = buf;
12377
307k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
307k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
307k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
307k
    if ((size != 0) && (chunk != NULL) &&
12388
307k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
152k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
152k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
152k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
152k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
152k
    }
12399
12400
307k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
85.0k
        xmlSwitchEncoding(ctxt, enc);
12402
85.0k
    }
12403
12404
307k
    return(ctxt);
12405
307k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
280k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
280k
    if (ctxt == NULL)
12418
0
        return;
12419
280k
    ctxt->instate = XML_PARSER_EOF;
12420
280k
    ctxt->disableSAX = 1;
12421
295k
    while (ctxt->inputNr > 1)
12422
15.5k
        xmlFreeInputStream(inputPop(ctxt));
12423
280k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
280k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
280k
        if (ctxt->input->buf != NULL) {
12433
240k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
240k
            ctxt->input->buf = NULL;
12435
240k
        }
12436
280k
  ctxt->input->cur = BAD_CAST"";
12437
280k
        ctxt->input->length = 0;
12438
280k
  ctxt->input->base = ctxt->input->cur;
12439
280k
        ctxt->input->end = ctxt->input->cur;
12440
280k
    }
12441
280k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
154k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
154k
    if (ctxt == NULL)
12452
0
        return;
12453
154k
    xmlHaltParser(ctxt);
12454
154k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
154k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
686k
          const xmlChar *ID, xmlNodePtr *list) {
12832
686k
    xmlParserCtxtPtr ctxt;
12833
686k
    xmlDocPtr newDoc;
12834
686k
    xmlNodePtr newRoot;
12835
686k
    xmlParserErrors ret = XML_ERR_OK;
12836
686k
    xmlChar start[4];
12837
686k
    xmlCharEncoding enc;
12838
12839
686k
    if (((depth > 40) &&
12840
686k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
686k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
686k
    if (list != NULL)
12848
64.5k
        *list = NULL;
12849
686k
    if ((URL == NULL) && (ID == NULL))
12850
534
  return(XML_ERR_INTERNAL_ERROR);
12851
685k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
685k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
685k
                                             oldctxt);
12856
685k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
92.0k
    if (oldctxt != NULL) {
12858
92.0k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
92.0k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
92.0k
    }
12861
92.0k
    xmlDetectSAX2(ctxt);
12862
12863
92.0k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
92.0k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
92.0k
    newDoc->properties = XML_DOC_INTERNAL;
12869
92.0k
    if (doc) {
12870
92.0k
        newDoc->intSubset = doc->intSubset;
12871
92.0k
        newDoc->extSubset = doc->extSubset;
12872
92.0k
        if (doc->dict) {
12873
47.5k
            newDoc->dict = doc->dict;
12874
47.5k
            xmlDictReference(newDoc->dict);
12875
47.5k
        }
12876
92.0k
        if (doc->URL != NULL) {
12877
60.6k
            newDoc->URL = xmlStrdup(doc->URL);
12878
60.6k
        }
12879
92.0k
    }
12880
92.0k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
92.0k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
92.0k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
92.0k
    nodePush(ctxt, newDoc->children);
12891
92.0k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
92.0k
    } else {
12894
92.0k
        ctxt->myDoc = doc;
12895
92.0k
        newRoot->doc = doc;
12896
92.0k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
92.0k
    GROW;
12904
92.0k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
87.7k
  start[0] = RAW;
12906
87.7k
  start[1] = NXT(1);
12907
87.7k
  start[2] = NXT(2);
12908
87.7k
  start[3] = NXT(3);
12909
87.7k
  enc = xmlDetectCharEncoding(start, 4);
12910
87.7k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
3.81k
      xmlSwitchEncoding(ctxt, enc);
12912
3.81k
  }
12913
87.7k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
92.0k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
1.91k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
1.91k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
1.91k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
102
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
102
                           "Version mismatch between document and entity\n");
12927
102
        }
12928
1.91k
    }
12929
12930
92.0k
    ctxt->instate = XML_PARSER_CONTENT;
12931
92.0k
    ctxt->depth = depth;
12932
92.0k
    if (oldctxt != NULL) {
12933
92.0k
  ctxt->_private = oldctxt->_private;
12934
92.0k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
92.0k
  ctxt->validate = oldctxt->validate;
12936
92.0k
  ctxt->valid = oldctxt->valid;
12937
92.0k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
92.0k
        if (oldctxt->validate) {
12939
55.2k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
55.2k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
55.2k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
55.2k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
55.2k
        }
12944
92.0k
  ctxt->external = oldctxt->external;
12945
92.0k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
92.0k
        ctxt->dict = oldctxt->dict;
12947
92.0k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
92.0k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
92.0k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
92.0k
        ctxt->dictNames = oldctxt->dictNames;
12951
92.0k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
92.0k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
92.0k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
92.0k
  ctxt->record_info = oldctxt->record_info;
12955
92.0k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
92.0k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
92.0k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
92.0k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
92.0k
    xmlParseContent(ctxt);
12970
12971
92.0k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
3.08k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
88.9k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
92.0k
    if (ctxt->node != newDoc->children) {
12977
13.2k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
13.2k
    }
12979
12980
92.0k
    if (!ctxt->wellFormed) {
12981
26.9k
  ret = (xmlParserErrors)ctxt->errNo;
12982
26.9k
        if (oldctxt != NULL) {
12983
26.9k
            oldctxt->errNo = ctxt->errNo;
12984
26.9k
            oldctxt->wellFormed = 0;
12985
26.9k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
26.9k
        }
12987
65.0k
    } else {
12988
65.0k
  if (list != NULL) {
12989
6.42k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
6.42k
      cur = newDoc->children->children;
12996
6.42k
      *list = cur;
12997
877k
      while (cur != NULL) {
12998
871k
    cur->parent = NULL;
12999
871k
    cur = cur->next;
13000
871k
      }
13001
6.42k
            newDoc->children->children = NULL;
13002
6.42k
  }
13003
65.0k
  ret = XML_ERR_OK;
13004
65.0k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
92.0k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
92.0k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
92.0k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
92.0k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
92.0k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
92.0k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
92.0k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
92.0k
    }
13020
13021
92.0k
    if (oldctxt != NULL) {
13022
92.0k
        ctxt->dict = NULL;
13023
92.0k
        ctxt->attsDefault = NULL;
13024
92.0k
        ctxt->attsSpecial = NULL;
13025
92.0k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
92.0k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
92.0k
        oldctxt->validate = ctxt->validate;
13028
92.0k
        oldctxt->valid = ctxt->valid;
13029
92.0k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
92.0k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
92.0k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
92.0k
    }
13033
92.0k
    ctxt->node_seq.maximum = 0;
13034
92.0k
    ctxt->node_seq.length = 0;
13035
92.0k
    ctxt->node_seq.buffer = NULL;
13036
92.0k
    xmlFreeParserCtxt(ctxt);
13037
92.0k
    newDoc->intSubset = NULL;
13038
92.0k
    newDoc->extSubset = NULL;
13039
92.0k
    xmlFreeDoc(newDoc);
13040
13041
92.0k
    return(ret);
13042
92.0k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
93.1k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
93.1k
    xmlParserCtxtPtr ctxt;
13125
93.1k
    xmlDocPtr newDoc = NULL;
13126
93.1k
    xmlNodePtr newRoot;
13127
93.1k
    xmlSAXHandlerPtr oldsax = NULL;
13128
93.1k
    xmlNodePtr content = NULL;
13129
93.1k
    xmlNodePtr last = NULL;
13130
93.1k
    int size;
13131
93.1k
    xmlParserErrors ret = XML_ERR_OK;
13132
93.1k
#ifdef SAX2
13133
93.1k
    int i;
13134
93.1k
#endif
13135
13136
93.1k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
93.1k
        (oldctxt->depth >  100)) {
13138
99
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
99
                       "Maximum entity nesting depth exceeded");
13140
99
  return(XML_ERR_ENTITY_LOOP);
13141
99
    }
13142
13143
13144
93.0k
    if (lst != NULL)
13145
60.8k
        *lst = NULL;
13146
93.0k
    if (string == NULL)
13147
83
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
92.9k
    size = xmlStrlen(string);
13150
13151
92.9k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
92.9k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
72.2k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
72.2k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
72.2k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
72.2k
    else
13158
72.2k
  ctxt->userData = ctxt;
13159
72.2k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
72.2k
    ctxt->dict = oldctxt->dict;
13161
72.2k
    ctxt->input_id = oldctxt->input_id;
13162
72.2k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
72.2k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
72.2k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
72.2k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
72.7k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
537
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
537
    }
13171
72.2k
#endif
13172
13173
72.2k
    oldsax = ctxt->sax;
13174
72.2k
    ctxt->sax = oldctxt->sax;
13175
72.2k
    xmlDetectSAX2(ctxt);
13176
72.2k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
72.2k
    ctxt->options = oldctxt->options;
13178
13179
72.2k
    ctxt->_private = oldctxt->_private;
13180
72.2k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
72.2k
    } else {
13193
72.2k
  ctxt->myDoc = oldctxt->myDoc;
13194
72.2k
        content = ctxt->myDoc->children;
13195
72.2k
  last = ctxt->myDoc->last;
13196
72.2k
    }
13197
72.2k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
72.2k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
72.2k
    ctxt->myDoc->children = NULL;
13208
72.2k
    ctxt->myDoc->last = NULL;
13209
72.2k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
72.2k
    nodePush(ctxt, ctxt->myDoc->children);
13211
72.2k
    ctxt->instate = XML_PARSER_CONTENT;
13212
72.2k
    ctxt->depth = oldctxt->depth;
13213
13214
72.2k
    ctxt->validate = 0;
13215
72.2k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
72.2k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
60.0k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
60.0k
    }
13222
72.2k
    ctxt->dictNames = oldctxt->dictNames;
13223
72.2k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
72.2k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
72.2k
    xmlParseContent(ctxt);
13227
72.2k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
177
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
72.0k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
72.2k
    if (ctxt->node != ctxt->myDoc->children) {
13233
1.12k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
1.12k
    }
13235
13236
72.2k
    if (!ctxt->wellFormed) {
13237
11.3k
  ret = (xmlParserErrors)ctxt->errNo;
13238
11.3k
        oldctxt->errNo = ctxt->errNo;
13239
11.3k
        oldctxt->wellFormed = 0;
13240
11.3k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
60.9k
    } else {
13242
60.9k
        ret = XML_ERR_OK;
13243
60.9k
    }
13244
13245
72.2k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
49.1k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
49.1k
  cur = ctxt->myDoc->children->children;
13253
49.1k
  *lst = cur;
13254
164k
  while (cur != NULL) {
13255
115k
#ifdef LIBXML_VALID_ENABLED
13256
115k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
115k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
115k
    (cur->type == XML_ELEMENT_NODE)) {
13259
17.7k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
17.7k
      oldctxt->myDoc, cur);
13261
17.7k
      }
13262
115k
#endif /* LIBXML_VALID_ENABLED */
13263
115k
      cur->parent = NULL;
13264
115k
      cur = cur->next;
13265
115k
  }
13266
49.1k
  ctxt->myDoc->children->children = NULL;
13267
49.1k
    }
13268
72.2k
    if (ctxt->myDoc != NULL) {
13269
72.2k
  xmlFreeNode(ctxt->myDoc->children);
13270
72.2k
        ctxt->myDoc->children = content;
13271
72.2k
        ctxt->myDoc->last = last;
13272
72.2k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
72.2k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
72.2k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
72.2k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
72.2k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
72.2k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
72.2k
    }
13285
13286
72.2k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
72.2k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
72.2k
    ctxt->sax = oldsax;
13289
72.2k
    ctxt->dict = NULL;
13290
72.2k
    ctxt->attsDefault = NULL;
13291
72.2k
    ctxt->attsSpecial = NULL;
13292
72.2k
    xmlFreeParserCtxt(ctxt);
13293
72.2k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
72.2k
    return(ret);
13298
72.2k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
685k
        xmlParserCtxtPtr pctx) {
13783
685k
    xmlParserCtxtPtr ctxt;
13784
685k
    xmlParserInputPtr inputStream;
13785
685k
    char *directory = NULL;
13786
685k
    xmlChar *uri;
13787
13788
685k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
685k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
685k
    if (pctx != NULL) {
13794
685k
        ctxt->options = pctx->options;
13795
685k
        ctxt->_private = pctx->_private;
13796
685k
  ctxt->input_id = pctx->input_id;
13797
685k
    }
13798
13799
    /* Don't read from stdin. */
13800
685k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
685k
    uri = xmlBuildURI(URL, base);
13804
13805
685k
    if (uri == NULL) {
13806
17.3k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
17.3k
  if (inputStream == NULL) {
13808
13.8k
      xmlFreeParserCtxt(ctxt);
13809
13.8k
      return(NULL);
13810
13.8k
  }
13811
13812
3.51k
  inputPush(ctxt, inputStream);
13813
13814
3.51k
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
3.51k
      directory = xmlParserGetDirectory((char *)URL);
13816
3.51k
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
3.51k
      ctxt->directory = directory;
13818
668k
    } else {
13819
668k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
668k
  if (inputStream == NULL) {
13821
579k
      xmlFree(uri);
13822
579k
      xmlFreeParserCtxt(ctxt);
13823
579k
      return(NULL);
13824
579k
  }
13825
13826
88.5k
  inputPush(ctxt, inputStream);
13827
13828
88.5k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
88.5k
      directory = xmlParserGetDirectory((char *)uri);
13830
88.5k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
88.5k
      ctxt->directory = directory;
13832
88.5k
  xmlFree(uri);
13833
88.5k
    }
13834
92.0k
    return(ctxt);
13835
685k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
246k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
246k
    xmlParserCtxtPtr ctxt;
14178
246k
    xmlParserInputPtr input;
14179
246k
    xmlParserInputBufferPtr buf;
14180
14181
246k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
246k
    if (size <= 0)
14184
20.7k
  return(NULL);
14185
14186
225k
    ctxt = xmlNewParserCtxt();
14187
225k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
225k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
225k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
225k
    input = xmlNewInputStream(ctxt);
14197
225k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
225k
    input->filename = NULL;
14204
225k
    input->buf = buf;
14205
225k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
225k
    inputPush(ctxt, input);
14208
225k
    return(ctxt);
14209
225k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
254M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
254M
    if (xmlParserInitialized != 0)
14525
254M
  return;
14526
14527
2.96k
#ifdef LIBXML_THREAD_ENABLED
14528
2.96k
    __xmlGlobalInitMutexLock();
14529
2.96k
    if (xmlParserInitialized == 0) {
14530
2.96k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
2.96k
  xmlInitThreadsInternal();
14537
2.96k
  xmlInitGlobalsInternal();
14538
2.96k
  xmlInitMemoryInternal();
14539
2.96k
        __xmlInitializeDict();
14540
2.96k
  xmlInitEncodingInternal();
14541
2.96k
  xmlRegisterDefaultInputCallbacks();
14542
2.96k
#ifdef LIBXML_OUTPUT_ENABLED
14543
2.96k
  xmlRegisterDefaultOutputCallbacks();
14544
2.96k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
2.96k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
2.96k
  xmlInitXPathInternal();
14547
2.96k
#endif
14548
2.96k
  xmlParserInitialized = 1;
14549
2.96k
#ifdef LIBXML_THREAD_ENABLED
14550
2.96k
    }
14551
2.96k
    __xmlGlobalInitMutexUnlock();
14552
2.96k
#endif
14553
2.96k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
461k
{
14843
461k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
461k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
461k
    if (options & XML_PARSE_RECOVER) {
14851
252k
        ctxt->recovery = 1;
14852
252k
        options -= XML_PARSE_RECOVER;
14853
252k
  ctxt->options |= XML_PARSE_RECOVER;
14854
252k
    } else
14855
208k
        ctxt->recovery = 0;
14856
461k
    if (options & XML_PARSE_DTDLOAD) {
14857
288k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
288k
        options -= XML_PARSE_DTDLOAD;
14859
288k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
288k
    } else
14861
172k
        ctxt->loadsubset = 0;
14862
461k
    if (options & XML_PARSE_DTDATTR) {
14863
192k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
192k
        options -= XML_PARSE_DTDATTR;
14865
192k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
192k
    }
14867
461k
    if (options & XML_PARSE_NOENT) {
14868
260k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
260k
        options -= XML_PARSE_NOENT;
14871
260k
  ctxt->options |= XML_PARSE_NOENT;
14872
260k
    } else
14873
200k
        ctxt->replaceEntities = 0;
14874
461k
    if (options & XML_PARSE_PEDANTIC) {
14875
59.1k
        ctxt->pedantic = 1;
14876
59.1k
        options -= XML_PARSE_PEDANTIC;
14877
59.1k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
59.1k
    } else
14879
401k
        ctxt->pedantic = 0;
14880
461k
    if (options & XML_PARSE_NOBLANKS) {
14881
183k
        ctxt->keepBlanks = 0;
14882
183k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
183k
        options -= XML_PARSE_NOBLANKS;
14884
183k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
183k
    } else
14886
277k
        ctxt->keepBlanks = 1;
14887
461k
    if (options & XML_PARSE_DTDVALID) {
14888
198k
        ctxt->validate = 1;
14889
198k
        if (options & XML_PARSE_NOWARNING)
14890
94.8k
            ctxt->vctxt.warning = NULL;
14891
198k
        if (options & XML_PARSE_NOERROR)
14892
157k
            ctxt->vctxt.error = NULL;
14893
198k
        options -= XML_PARSE_DTDVALID;
14894
198k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
198k
    } else
14896
262k
        ctxt->validate = 0;
14897
461k
    if (options & XML_PARSE_NOWARNING) {
14898
198k
        ctxt->sax->warning = NULL;
14899
198k
        options -= XML_PARSE_NOWARNING;
14900
198k
    }
14901
461k
    if (options & XML_PARSE_NOERROR) {
14902
283k
        ctxt->sax->error = NULL;
14903
283k
        ctxt->sax->fatalError = NULL;
14904
283k
        options -= XML_PARSE_NOERROR;
14905
283k
    }
14906
461k
#ifdef LIBXML_SAX1_ENABLED
14907
461k
    if (options & XML_PARSE_SAX1) {
14908
174k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
174k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
174k
        ctxt->sax->startElementNs = NULL;
14911
174k
        ctxt->sax->endElementNs = NULL;
14912
174k
        ctxt->sax->initialized = 1;
14913
174k
        options -= XML_PARSE_SAX1;
14914
174k
  ctxt->options |= XML_PARSE_SAX1;
14915
174k
    }
14916
461k
#endif /* LIBXML_SAX1_ENABLED */
14917
461k
    if (options & XML_PARSE_NODICT) {
14918
156k
        ctxt->dictNames = 0;
14919
156k
        options -= XML_PARSE_NODICT;
14920
156k
  ctxt->options |= XML_PARSE_NODICT;
14921
305k
    } else {
14922
305k
        ctxt->dictNames = 1;
14923
305k
    }
14924
461k
    if (options & XML_PARSE_NOCDATA) {
14925
186k
        ctxt->sax->cdataBlock = NULL;
14926
186k
        options -= XML_PARSE_NOCDATA;
14927
186k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
186k
    }
14929
461k
    if (options & XML_PARSE_NSCLEAN) {
14930
278k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
278k
        options -= XML_PARSE_NSCLEAN;
14932
278k
    }
14933
461k
    if (options & XML_PARSE_NONET) {
14934
174k
  ctxt->options |= XML_PARSE_NONET;
14935
174k
        options -= XML_PARSE_NONET;
14936
174k
    }
14937
461k
    if (options & XML_PARSE_COMPACT) {
14938
272k
  ctxt->options |= XML_PARSE_COMPACT;
14939
272k
        options -= XML_PARSE_COMPACT;
14940
272k
    }
14941
461k
    if (options & XML_PARSE_OLD10) {
14942
160k
  ctxt->options |= XML_PARSE_OLD10;
14943
160k
        options -= XML_PARSE_OLD10;
14944
160k
    }
14945
461k
    if (options & XML_PARSE_NOBASEFIX) {
14946
191k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
191k
        options -= XML_PARSE_NOBASEFIX;
14948
191k
    }
14949
461k
    if (options & XML_PARSE_HUGE) {
14950
168k
  ctxt->options |= XML_PARSE_HUGE;
14951
168k
        options -= XML_PARSE_HUGE;
14952
168k
        if (ctxt->dict != NULL)
14953
168k
            xmlDictSetLimit(ctxt->dict, 0);
14954
168k
    }
14955
461k
    if (options & XML_PARSE_OLDSAX) {
14956
160k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
160k
        options -= XML_PARSE_OLDSAX;
14958
160k
    }
14959
461k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
269k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
269k
        options -= XML_PARSE_IGNORE_ENC;
14962
269k
    }
14963
461k
    if (options & XML_PARSE_BIG_LINES) {
14964
191k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
191k
        options -= XML_PARSE_BIG_LINES;
14966
191k
    }
14967
461k
    ctxt->linenumbers = 1;
14968
461k
    return (options);
14969
461k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
307k
{
14984
307k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
307k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
153k
{
15003
153k
    xmlDocPtr ret;
15004
15005
153k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
153k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
153k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
153k
        (ctxt->input->filename == NULL))
15015
153k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
153k
    xmlParseDocument(ctxt);
15017
153k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
97.3k
        ret = ctxt->myDoc;
15019
56.3k
    else {
15020
56.3k
        ret = NULL;
15021
56.3k
  if (ctxt->myDoc != NULL) {
15022
53.0k
      xmlFreeDoc(ctxt->myDoc);
15023
53.0k
  }
15024
56.3k
    }
15025
153k
    ctxt->myDoc = NULL;
15026
153k
    if (!reuse) {
15027
153k
  xmlFreeParserCtxt(ctxt);
15028
153k
    }
15029
15030
153k
    return (ret);
15031
153k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
153k
{
15096
153k
    xmlParserCtxtPtr ctxt;
15097
15098
153k
    xmlInitParser();
15099
153k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
153k
    if (ctxt == NULL)
15101
20
        return (NULL);
15102
153k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
153k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387