Coverage Report

Created: 2024-01-23 06:29

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
138M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
6.16k
#define XML_PARSER_NON_LINEAR 10
129
130
632M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
338M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
51.3G
#define XML_PARSER_BUFFER_SIZE 100
147
5.99M
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
237M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
71.3k
{
215
71.3k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
71.3k
        (ctxt->instate == XML_PARSER_EOF))
217
162
  return;
218
71.1k
    if (ctxt != NULL)
219
71.1k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
71.1k
    if (prefix == NULL)
222
41.3k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
41.3k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
41.3k
                        (const char *) localname, NULL, NULL, 0, 0,
225
41.3k
                        "Attribute %s redefined\n", localname);
226
29.7k
    else
227
29.7k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
29.7k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
29.7k
                        (const char *) prefix, (const char *) localname,
230
29.7k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
29.7k
                        localname);
232
71.1k
    if (ctxt != NULL) {
233
71.1k
  ctxt->wellFormed = 0;
234
71.1k
  if (ctxt->recovery == 0)
235
31.9k
      ctxt->disableSAX = 1;
236
71.1k
    }
237
71.1k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
27.0M
{
250
27.0M
    const char *errmsg;
251
252
27.0M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
27.0M
        (ctxt->instate == XML_PARSER_EOF))
254
32.4k
  return;
255
27.0M
    switch (error) {
256
83.0k
        case XML_ERR_INVALID_HEX_CHARREF:
257
83.0k
            errmsg = "CharRef: invalid hexadecimal value";
258
83.0k
            break;
259
116k
        case XML_ERR_INVALID_DEC_CHARREF:
260
116k
            errmsg = "CharRef: invalid decimal value";
261
116k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
1.02M
        case XML_ERR_INTERNAL_ERROR:
266
1.02M
            errmsg = "internal error";
267
1.02M
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
7.83M
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
7.83M
            errmsg = "PEReference: expecting ';'";
282
7.83M
            break;
283
4.18k
        case XML_ERR_ENTITY_LOOP:
284
4.18k
            errmsg = "Detected an entity reference loop";
285
4.18k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
4.87k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
4.87k
            errmsg = "PEReferences forbidden in internal subset";
291
4.87k
            break;
292
6.73k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
6.73k
            errmsg = "EntityValue: \" or ' expected";
294
6.73k
            break;
295
85.4k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
85.4k
            errmsg = "AttValue: \" or ' expected";
297
85.4k
            break;
298
277k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
277k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
277k
            break;
301
36.3k
        case XML_ERR_LITERAL_NOT_STARTED:
302
36.3k
            errmsg = "SystemLiteral \" or ' expected";
303
36.3k
            break;
304
61.7k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
61.7k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
61.7k
            break;
307
56.6k
        case XML_ERR_MISPLACED_CDATA_END:
308
56.6k
            errmsg = "Sequence ']]>' not allowed in content";
309
56.6k
            break;
310
33.8k
        case XML_ERR_URI_REQUIRED:
311
33.8k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
33.8k
            break;
313
2.69k
        case XML_ERR_PUBID_REQUIRED:
314
2.69k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
2.69k
            break;
316
15.1M
        case XML_ERR_HYPHEN_IN_COMMENT:
317
15.1M
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
15.1M
            break;
319
82.1k
        case XML_ERR_PI_NOT_STARTED:
320
82.1k
            errmsg = "xmlParsePI : no target name";
321
82.1k
            break;
322
6.81k
        case XML_ERR_RESERVED_XML_NAME:
323
6.81k
            errmsg = "Invalid PI name";
324
6.81k
            break;
325
1.79k
        case XML_ERR_NOTATION_NOT_STARTED:
326
1.79k
            errmsg = "NOTATION: Name expected here";
327
1.79k
            break;
328
42.7k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
42.7k
            errmsg = "'>' required to close NOTATION declaration";
330
42.7k
            break;
331
29.3k
        case XML_ERR_VALUE_REQUIRED:
332
29.3k
            errmsg = "Entity value required";
333
29.3k
            break;
334
4.51k
        case XML_ERR_URI_FRAGMENT:
335
4.51k
            errmsg = "Fragment not allowed";
336
4.51k
            break;
337
20.7k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
20.7k
            errmsg = "'(' required to start ATTLIST enumeration";
339
20.7k
            break;
340
1.72k
        case XML_ERR_NMTOKEN_REQUIRED:
341
1.72k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
1.72k
            break;
343
6.50k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
6.50k
            errmsg = "')' required to finish ATTLIST enumeration";
345
6.50k
            break;
346
116k
        case XML_ERR_MIXED_NOT_STARTED:
347
116k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
116k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
374k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
374k
            errmsg = "ContentDecl : Name or '(' expected";
354
374k
            break;
355
68.6k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
68.6k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
68.6k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
512k
        case XML_ERR_GT_REQUIRED:
363
512k
            errmsg = "expected '>'";
364
512k
            break;
365
381
        case XML_ERR_CONDSEC_INVALID:
366
381
            errmsg = "XML conditional section '[' expected";
367
381
            break;
368
40.9k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
40.9k
            errmsg = "Content error in the external subset";
370
40.9k
            break;
371
2.06k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
2.06k
            errmsg =
373
2.06k
                "conditional section INCLUDE or IGNORE keyword expected";
374
2.06k
            break;
375
2.58k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
2.58k
            errmsg = "XML conditional section not closed";
377
2.58k
            break;
378
413
        case XML_ERR_XMLDECL_NOT_STARTED:
379
413
            errmsg = "Text declaration '<?xml' required";
380
413
            break;
381
152k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
152k
            errmsg = "parsing XML declaration: '?>' expected";
383
152k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
296k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
296k
            errmsg = "EntityRef: expecting ';'";
389
296k
            break;
390
25.9k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
25.9k
            errmsg = "DOCTYPE improperly terminated";
392
25.9k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
7.94k
        case XML_ERR_EQUAL_REQUIRED:
397
7.94k
            errmsg = "expected '='";
398
7.94k
            break;
399
40.6k
        case XML_ERR_STRING_NOT_CLOSED:
400
40.6k
            errmsg = "String not closed expecting \" or '";
401
40.6k
            break;
402
10.1k
        case XML_ERR_STRING_NOT_STARTED:
403
10.1k
            errmsg = "String not started expecting ' or \"";
404
10.1k
            break;
405
1.05k
        case XML_ERR_ENCODING_NAME:
406
1.05k
            errmsg = "Invalid XML encoding name";
407
1.05k
            break;
408
1.56k
        case XML_ERR_STANDALONE_VALUE:
409
1.56k
            errmsg = "standalone accepts only 'yes' or 'no'";
410
1.56k
            break;
411
26.8k
        case XML_ERR_DOCUMENT_EMPTY:
412
26.8k
            errmsg = "Document is empty";
413
26.8k
            break;
414
226k
        case XML_ERR_DOCUMENT_END:
415
226k
            errmsg = "Extra content at the end of the document";
416
226k
            break;
417
12.9k
        case XML_ERR_NOT_WELL_BALANCED:
418
12.9k
            errmsg = "chunk is not well balanced";
419
12.9k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
84.6k
        case XML_ERR_VERSION_MISSING:
424
84.6k
            errmsg = "Malformed declaration expecting version";
425
84.6k
            break;
426
1.33k
        case XML_ERR_NAME_TOO_LONG:
427
1.33k
            errmsg = "Name too long";
428
1.33k
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
20.4k
        default:
435
20.4k
            errmsg = "Unregistered error message";
436
27.0M
    }
437
27.0M
    if (ctxt != NULL)
438
27.0M
  ctxt->errNo = error;
439
27.0M
    if (info == NULL) {
440
26.0M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
26.0M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
26.0M
                        errmsg);
443
26.0M
    } else {
444
1.02M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
1.02M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
1.02M
                        errmsg, info);
447
1.02M
    }
448
27.0M
    if (ctxt != NULL) {
449
27.0M
  ctxt->wellFormed = 0;
450
27.0M
  if (ctxt->recovery == 0)
451
3.63M
      ctxt->disableSAX = 1;
452
27.0M
    }
453
27.0M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
7.01M
{
467
7.01M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
7.01M
        (ctxt->instate == XML_PARSER_EOF))
469
1.72k
  return;
470
7.01M
    if (ctxt != NULL)
471
7.01M
  ctxt->errNo = error;
472
7.01M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
7.01M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
7.01M
    if (ctxt != NULL) {
475
7.01M
  ctxt->wellFormed = 0;
476
7.01M
  if (ctxt->recovery == 0)
477
1.44M
      ctxt->disableSAX = 1;
478
7.01M
    }
479
7.01M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
46.6M
{
495
46.6M
    xmlStructuredErrorFunc schannel = NULL;
496
497
46.6M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
46.6M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
46.6M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
46.6M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
34.1M
        schannel = ctxt->sax->serror;
503
46.6M
    if (ctxt != NULL) {
504
46.6M
        __xmlRaiseError(schannel,
505
46.6M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
46.6M
                    ctxt->userData,
507
46.6M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
46.6M
                    XML_ERR_WARNING, NULL, 0,
509
46.6M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
46.6M
        msg, (const char *) str1, (const char *) str2);
511
46.6M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
46.6M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
11.6M
{
533
11.6M
    xmlStructuredErrorFunc schannel = NULL;
534
535
11.6M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
11.6M
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
11.6M
    if (ctxt != NULL) {
539
11.6M
  ctxt->errNo = error;
540
11.6M
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
7.11M
      schannel = ctxt->sax->serror;
542
11.6M
    }
543
11.6M
    if (ctxt != NULL) {
544
11.6M
        __xmlRaiseError(schannel,
545
11.6M
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
11.6M
                    ctxt, NULL, XML_FROM_DTD, error,
547
11.6M
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
11.6M
        (const char *) str2, NULL, 0, 0,
549
11.6M
        msg, (const char *) str1, (const char *) str2);
550
11.6M
  ctxt->valid = 0;
551
11.6M
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
11.6M
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
4.50M
{
573
4.50M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
4.50M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
4.50M
    if (ctxt != NULL)
577
4.50M
  ctxt->errNo = error;
578
4.50M
    __xmlRaiseError(NULL, NULL, NULL,
579
4.50M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
4.50M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
4.50M
    if (ctxt != NULL) {
582
4.50M
  ctxt->wellFormed = 0;
583
4.50M
  if (ctxt->recovery == 0)
584
361k
      ctxt->disableSAX = 1;
585
4.50M
    }
586
4.50M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
1.40M
{
604
1.40M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
1.40M
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
1.40M
    if (ctxt != NULL)
608
1.40M
  ctxt->errNo = error;
609
1.40M
    __xmlRaiseError(NULL, NULL, NULL,
610
1.40M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
1.40M
                    NULL, 0, (const char *) str1, (const char *) str2,
612
1.40M
        NULL, val, 0, msg, str1, val, str2);
613
1.40M
    if (ctxt != NULL) {
614
1.40M
  ctxt->wellFormed = 0;
615
1.40M
  if (ctxt->recovery == 0)
616
373k
      ctxt->disableSAX = 1;
617
1.40M
    }
618
1.40M
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
127M
{
633
127M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
127M
        (ctxt->instate == XML_PARSER_EOF))
635
78
  return;
636
127M
    if (ctxt != NULL)
637
127M
  ctxt->errNo = error;
638
127M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
127M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
127M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
127M
                    val);
642
127M
    if (ctxt != NULL) {
643
127M
  ctxt->wellFormed = 0;
644
127M
  if (ctxt->recovery == 0)
645
61.3M
      ctxt->disableSAX = 1;
646
127M
    }
647
127M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
2.89M
{
662
2.89M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
2.89M
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
2.89M
    if (ctxt != NULL)
666
2.89M
  ctxt->errNo = error;
667
2.89M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
2.89M
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
2.89M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
2.89M
                    val);
671
2.89M
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
958k
{
689
958k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
958k
        (ctxt->instate == XML_PARSER_EOF))
691
392
  return;
692
957k
    if (ctxt != NULL)
693
957k
  ctxt->errNo = error;
694
957k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
957k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
957k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
957k
                    info1, info2, info3);
698
957k
    if (ctxt != NULL)
699
957k
  ctxt->nsWellFormed = 0;
700
957k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
62.2k
{
718
62.2k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
62.2k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
62.2k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
62.2k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
62.2k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
62.2k
                    info1, info2, info3);
725
62.2k
}
726
727
static void
728
2.12G
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
2.12G
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
2.12G
    else
732
2.12G
        *dst += val;
733
2.12G
}
734
735
static void
736
643M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
643M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
643M
    else
740
643M
        *dst += val;
741
643M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
632M
{
770
632M
    unsigned long consumed;
771
632M
    xmlParserInputPtr input = ctxt->input;
772
632M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
632M
    consumed = input->parentConsumed;
779
632M
    if ((entity == NULL) ||
780
632M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
414M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
217M
        xmlSaturatedAdd(&consumed, input->consumed);
783
217M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
217M
    }
785
632M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
632M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
632M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
632M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
632M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
6.16k
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
6.16k
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
6.16k
                       "Maximum entity amplification factor exceeded");
803
6.16k
        xmlHaltParser(ctxt);
804
6.16k
        return(1);
805
6.16k
    }
806
807
632M
    return(0);
808
632M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
1.91M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
1.91M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
1.91M
    (void) sax;
1048
1049
1.91M
    if (ctxt == NULL) return;
1050
1.91M
    sax = ctxt->sax;
1051
1.91M
#ifdef LIBXML_SAX1_ENABLED
1052
1.91M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
1.91M
        ((sax->startElementNs != NULL) ||
1054
1.28M
         (sax->endElementNs != NULL) ||
1055
1.28M
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
1.28M
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
1.91M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
1.91M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
1.91M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
1.91M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
1.91M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
1.91M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
749k
{
1103
749k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
1.33M
    while (*src == 0x20) src++;
1107
9.96M
    while (*src != 0) {
1108
9.22M
  if (*src == 0x20) {
1109
3.07M
      while (*src == 0x20) src++;
1110
446k
      if (*src != 0)
1111
400k
    *dst++ = 0x20;
1112
8.77M
  } else {
1113
8.77M
      *dst++ = *src++;
1114
8.77M
  }
1115
9.22M
    }
1116
749k
    *dst = 0;
1117
749k
    if (dst == src)
1118
612k
       return(NULL);
1119
136k
    return(dst);
1120
749k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
149k
{
1136
149k
    int i;
1137
149k
    int remove_head = 0;
1138
149k
    int need_realloc = 0;
1139
149k
    const xmlChar *cur;
1140
1141
149k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
149k
    i = *len;
1144
149k
    if (i <= 0)
1145
7.01k
        return(NULL);
1146
1147
142k
    cur = src;
1148
179k
    while (*cur == 0x20) {
1149
37.1k
        cur++;
1150
37.1k
  remove_head++;
1151
37.1k
    }
1152
2.02M
    while (*cur != 0) {
1153
1.89M
  if (*cur == 0x20) {
1154
93.0k
      cur++;
1155
93.0k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
21.2k
          need_realloc = 1;
1157
21.2k
    break;
1158
21.2k
      }
1159
93.0k
  } else
1160
1.80M
      cur++;
1161
1.89M
    }
1162
142k
    if (need_realloc) {
1163
21.2k
        xmlChar *ret;
1164
1165
21.2k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
21.2k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
21.2k
  xmlAttrNormalizeSpace(ret, ret);
1171
21.2k
  *len = strlen((const char *)ret);
1172
21.2k
        return(ret);
1173
121k
    } else if (remove_head) {
1174
5.79k
        *len -= remove_head;
1175
5.79k
        memmove(src, src + remove_head, 1 + *len);
1176
5.79k
  return(src);
1177
5.79k
    }
1178
115k
    return(NULL);
1179
142k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
1.21M
               const xmlChar *value) {
1195
1.21M
    xmlDefAttrsPtr defaults;
1196
1.21M
    int len;
1197
1.21M
    const xmlChar *name;
1198
1.21M
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
1.21M
    if (ctxt->attsSpecial != NULL) {
1204
1.17M
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
65.8k
      return;
1206
1.17M
    }
1207
1208
1.14M
    if (ctxt->attsDefault == NULL) {
1209
91.7k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
91.7k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
91.7k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
1.14M
    name = xmlSplitQName3(fullname, &len);
1219
1.14M
    if (name == NULL) {
1220
1.12M
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
1.12M
  prefix = NULL;
1222
1.12M
    } else {
1223
23.1k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
23.1k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
23.1k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
1.14M
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
1.14M
    if (defaults == NULL) {
1232
587k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
587k
                     (4 * 5) * sizeof(const xmlChar *));
1234
587k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
587k
  defaults->nbAttrs = 0;
1237
587k
  defaults->maxAttrs = 4;
1238
587k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
587k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
587k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
5.61k
        xmlDefAttrsPtr temp;
1245
1246
5.61k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
5.61k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
5.61k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
5.61k
  defaults = temp;
1251
5.61k
  defaults->maxAttrs *= 2;
1252
5.61k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
5.61k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
5.61k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
1.14M
    name = xmlSplitQName3(fullattr, &len);
1264
1.14M
    if (name == NULL) {
1265
863k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
863k
  prefix = NULL;
1267
863k
    } else {
1268
285k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
285k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
285k
    }
1271
1272
1.14M
    defaults->values[5 * defaults->nbAttrs] = name;
1273
1.14M
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
1.14M
    len = xmlStrlen(value);
1276
1.14M
    value = xmlDictLookup(ctxt->dict, value, len);
1277
1.14M
    if (value == NULL)
1278
0
        goto mem_error;
1279
1.14M
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
1.14M
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
1.14M
    if (ctxt->external)
1282
824k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
324k
    else
1284
324k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
1.14M
    defaults->nbAttrs++;
1286
1287
1.14M
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
1.14M
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
16.6M
{
1309
16.6M
    if (ctxt->attsSpecial == NULL) {
1310
194k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
194k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
194k
    }
1314
1315
16.6M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
1.00M
        return;
1317
1318
15.6M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
15.6M
                     (void *) (ptrdiff_t) type);
1320
15.6M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
16.6M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
13.4M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
13.4M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
13.4M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
4.75M
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
4.75M
    }
1341
13.4M
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
497k
{
1354
497k
    if (ctxt->attsSpecial == NULL)
1355
349k
        return;
1356
1357
147k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
147k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
46.0k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
46.0k
        ctxt->attsSpecial = NULL;
1362
46.0k
    }
1363
147k
    return;
1364
497k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
72.8k
{
1427
72.8k
    const xmlChar *cur = lang, *nxt;
1428
1429
72.8k
    if (cur == NULL)
1430
1.29k
        return (0);
1431
71.5k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
71.5k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
71.5k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
71.5k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
2.77k
        cur += 2;
1441
27.1k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
27.1k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
24.4k
            cur++;
1444
2.77k
        return(cur[0] == 0);
1445
2.77k
    }
1446
68.7k
    nxt = cur;
1447
258k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
258k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
189k
           nxt++;
1450
68.7k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
4.58k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
3.82k
            return(0);
1456
768
        return(1);
1457
4.58k
    }
1458
64.1k
    if (nxt - cur < 2)
1459
6.94k
        return(0);
1460
    /* we got an ISO 639 code */
1461
57.2k
    if (nxt[0] == 0)
1462
41.2k
        return(1);
1463
15.9k
    if (nxt[0] != '-')
1464
1.83k
        return(0);
1465
1466
14.1k
    nxt++;
1467
14.1k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
14.1k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
934
        goto region_m49;
1471
1472
58.9k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
58.9k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
45.7k
           nxt++;
1475
13.1k
    if (nxt - cur == 4)
1476
3.39k
        goto script;
1477
9.79k
    if (nxt - cur == 2)
1478
1.73k
        goto region;
1479
8.06k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
702
        goto variant;
1481
7.36k
    if (nxt - cur != 3)
1482
1.50k
        return(0);
1483
    /* we parsed an extlang */
1484
5.85k
    if (nxt[0] == 0)
1485
748
        return(1);
1486
5.11k
    if (nxt[0] != '-')
1487
350
        return(0);
1488
1489
4.76k
    nxt++;
1490
4.76k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
4.76k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
572
        goto region_m49;
1494
1495
22.7k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
22.7k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
18.5k
           nxt++;
1498
4.18k
    if (nxt - cur == 2)
1499
1.17k
        goto region;
1500
3.01k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
461
        goto variant;
1502
2.55k
    if (nxt - cur != 4)
1503
1.40k
        return(0);
1504
    /* we parsed a script */
1505
4.54k
script:
1506
4.54k
    if (nxt[0] == 0)
1507
544
        return(1);
1508
3.99k
    if (nxt[0] != '-')
1509
869
        return(0);
1510
1511
3.12k
    nxt++;
1512
3.12k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
3.12k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
670
        goto region_m49;
1516
1517
15.3k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
15.3k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
12.9k
           nxt++;
1520
1521
2.45k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
441
        goto variant;
1523
2.01k
    if (nxt - cur != 2)
1524
1.23k
        return(0);
1525
    /* we parsed a region */
1526
4.57k
region:
1527
4.57k
    if (nxt[0] == 0)
1528
719
        return(1);
1529
3.85k
    if (nxt[0] != '-')
1530
1.94k
        return(0);
1531
1532
1.91k
    nxt++;
1533
1.91k
    cur = nxt;
1534
    /* now we can just have a variant */
1535
23.6k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
23.6k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
21.7k
           nxt++;
1538
1539
1.91k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
1.44k
        return(0);
1541
1542
    /* we parsed a variant */
1543
2.07k
variant:
1544
2.07k
    if (nxt[0] == 0)
1545
306
        return(1);
1546
1.77k
    if (nxt[0] != '-')
1547
1.52k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
252
    return (1);
1550
1551
2.17k
region_m49:
1552
2.17k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
2.17k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
880
        nxt += 3;
1555
880
        goto region;
1556
880
    }
1557
1.29k
    return(0);
1558
2.17k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
467k
{
1584
467k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
160k
        int i;
1586
833k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
773k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
100k
          if (ctxt->nsTab[i + 1] == URL)
1590
38.6k
        return(-2);
1591
    /* out of scope keep it */
1592
62.1k
    break;
1593
100k
      }
1594
773k
  }
1595
160k
    }
1596
428k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
84.5k
  ctxt->nsMax = 10;
1598
84.5k
  ctxt->nsNr = 0;
1599
84.5k
  ctxt->nsTab = (const xmlChar **)
1600
84.5k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
84.5k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
344k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
10.4k
        const xmlChar ** tmp;
1608
10.4k
        ctxt->nsMax *= 2;
1609
10.4k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
10.4k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
10.4k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
10.4k
  ctxt->nsTab = tmp;
1617
10.4k
    }
1618
428k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
428k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
428k
    return (ctxt->nsNr);
1621
428k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
124k
{
1634
124k
    int i;
1635
1636
124k
    if (ctxt->nsTab == NULL) return(0);
1637
124k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
124k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
483k
    for (i = 0;i < nr;i++) {
1645
358k
         ctxt->nsNr--;
1646
358k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
358k
    }
1648
124k
    return(nr);
1649
124k
}
1650
#endif
1651
1652
static int
1653
208k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
208k
    const xmlChar **atts;
1655
208k
    int *attallocs;
1656
208k
    int maxatts;
1657
1658
208k
    if (nr + 5 > ctxt->maxatts) {
1659
208k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
208k
  atts = (const xmlChar **) xmlMalloc(
1661
208k
             maxatts * sizeof(const xmlChar *));
1662
208k
  if (atts == NULL) goto mem_error;
1663
208k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
208k
                               (maxatts / 5) * sizeof(int));
1665
208k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
208k
        if (ctxt->maxatts > 0)
1670
1.31k
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
208k
        xmlFree(ctxt->atts);
1672
208k
  ctxt->atts = atts;
1673
208k
  ctxt->attallocs = attallocs;
1674
208k
  ctxt->maxatts = maxatts;
1675
208k
    }
1676
208k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
208k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
416M
{
1694
416M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
416M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
12.3k
        size_t newSize = ctxt->inputMax * 2;
1698
12.3k
        xmlParserInputPtr *tmp;
1699
1700
12.3k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
12.3k
                                               newSize * sizeof(*tmp));
1702
12.3k
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
12.3k
        ctxt->inputTab = tmp;
1707
12.3k
        ctxt->inputMax = newSize;
1708
12.3k
    }
1709
416M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
416M
    ctxt->input = value;
1711
416M
    return (ctxt->inputNr++);
1712
416M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
420M
{
1724
420M
    xmlParserInputPtr ret;
1725
1726
420M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
420M
    if (ctxt->inputNr <= 0)
1729
4.36M
        return (NULL);
1730
415M
    ctxt->inputNr--;
1731
415M
    if (ctxt->inputNr > 0)
1732
414M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
1.34M
    else
1734
1.34M
        ctxt->input = NULL;
1735
415M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
415M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
415M
    return (ret);
1738
420M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
37.0M
{
1751
37.0M
    if (ctxt == NULL) return(0);
1752
37.0M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
32.6k
        xmlNodePtr *tmp;
1754
1755
32.6k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
32.6k
                                      ctxt->nodeMax * 2 *
1757
32.6k
                                      sizeof(ctxt->nodeTab[0]));
1758
32.6k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
32.6k
        ctxt->nodeTab = tmp;
1763
32.6k
  ctxt->nodeMax *= 2;
1764
32.6k
    }
1765
37.0M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
37.0M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
292
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
292
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
292
        xmlParserMaxDepth);
1770
292
  xmlHaltParser(ctxt);
1771
292
  return(-1);
1772
292
    }
1773
37.0M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
37.0M
    ctxt->node = value;
1775
37.0M
    return (ctxt->nodeNr++);
1776
37.0M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
35.7M
{
1789
35.7M
    xmlNodePtr ret;
1790
1791
35.7M
    if (ctxt == NULL) return(NULL);
1792
35.7M
    if (ctxt->nodeNr <= 0)
1793
291k
        return (NULL);
1794
35.4M
    ctxt->nodeNr--;
1795
35.4M
    if (ctxt->nodeNr > 0)
1796
34.7M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
665k
    else
1798
665k
        ctxt->node = NULL;
1799
35.4M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
35.4M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
35.4M
    return (ret);
1802
35.7M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
33.6M
{
1821
33.6M
    xmlStartTag *tag;
1822
1823
33.6M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
57.2k
        const xmlChar * *tmp;
1825
57.2k
        xmlStartTag *tmp2;
1826
57.2k
        ctxt->nameMax *= 2;
1827
57.2k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
57.2k
                                    ctxt->nameMax *
1829
57.2k
                                    sizeof(ctxt->nameTab[0]));
1830
57.2k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
57.2k
  ctxt->nameTab = tmp;
1835
57.2k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
57.2k
                                    ctxt->nameMax *
1837
57.2k
                                    sizeof(ctxt->pushTab[0]));
1838
57.2k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
57.2k
  ctxt->pushTab = tmp2;
1843
33.5M
    } else if (ctxt->pushTab == NULL) {
1844
681k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
681k
                                            sizeof(ctxt->pushTab[0]));
1846
681k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
681k
    }
1849
33.6M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
33.6M
    ctxt->name = value;
1851
33.6M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
33.6M
    tag->prefix = prefix;
1853
33.6M
    tag->URI = URI;
1854
33.6M
    tag->line = line;
1855
33.6M
    tag->nsNr = nsNr;
1856
33.6M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
33.6M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
8.10M
{
1873
8.10M
    const xmlChar *ret;
1874
1875
8.10M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
8.10M
    ctxt->nameNr--;
1878
8.10M
    if (ctxt->nameNr > 0)
1879
8.04M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
65.4k
    else
1881
65.4k
        ctxt->name = NULL;
1882
8.10M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
8.10M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
8.10M
    return (ret);
1885
8.10M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
23.1M
{
1931
23.1M
    const xmlChar *ret;
1932
1933
23.1M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
23.1M
    ctxt->nameNr--;
1936
23.1M
    if (ctxt->nameNr > 0)
1937
22.8M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
268k
    else
1939
268k
        ctxt->name = NULL;
1940
23.1M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
23.1M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
23.1M
    return (ret);
1943
23.1M
}
1944
1945
42.3M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
42.3M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
66.1k
        int *tmp;
1948
1949
66.1k
  ctxt->spaceMax *= 2;
1950
66.1k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
66.1k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
66.1k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
66.1k
  ctxt->spaceTab = tmp;
1958
66.1k
    }
1959
42.3M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
42.3M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
42.3M
    return(ctxt->spaceNr++);
1962
42.3M
}
1963
1964
40.9M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
40.9M
    int ret;
1966
40.9M
    if (ctxt->spaceNr <= 0) return(0);
1967
40.9M
    ctxt->spaceNr--;
1968
40.9M
    if (ctxt->spaceNr > 0)
1969
40.7M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
120k
    else
1971
120k
        ctxt->space = &ctxt->spaceTab[0];
1972
40.9M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
40.9M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
40.9M
    return(ret);
1975
40.9M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
3.86G
#define RAW (*ctxt->input->cur)
2013
3.89G
#define CUR (*ctxt->input->cur)
2014
3.29G
#define NXT(val) ctxt->input->cur[(val)]
2015
75.4M
#define CUR_PTR ctxt->input->cur
2016
1.45M
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
543M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
272M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
489M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
395M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
312M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
226M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
91.9M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
91.9M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
340k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
340k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
1.51G
#define SKIP(val) do {             \
2037
1.51G
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
1.51G
    if (*ctxt->input->cur == 0)           \
2039
1.51G
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
1.51G
  } while (0)
2041
2042
194k
#define SKIPL(val) do {             \
2043
194k
    int skipl;                \
2044
47.9M
    for(skipl=0; skipl<val; skipl++) {         \
2045
47.7M
  if (*(ctxt->input->cur) == '\n') {       \
2046
801k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
46.9M
  } else ctxt->input->col++;         \
2048
47.7M
  ctxt->input->cur++;           \
2049
47.7M
    }                  \
2050
194k
    if (*ctxt->input->cur == 0)           \
2051
194k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
194k
  } while (0)
2053
2054
1.25G
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
1.25G
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
1.25G
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
1.25G
  xmlSHRINK (ctxt);
2058
2059
5.88M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
5.88M
    if ((ctxt->input->buf) &&
2062
5.88M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
37.6k
        xmlParserInputShrink(ctxt->input);
2064
5.88M
    if (*ctxt->input->cur == 0)
2065
171k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
5.88M
}
2067
2068
3.89G
#define GROW if ((ctxt->progressive == 0) &&       \
2069
3.89G
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
3.89G
  xmlGROW (ctxt);
2071
2072
1.00G
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
1.00G
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
1.00G
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
1.00G
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
1.00G
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
1.00G
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
1.00G
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
1.00G
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
1.00G
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
1.00G
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
1.00G
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
16.4M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
1.00G
}
2095
2096
947M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
2.41G
#define NEXT xmlNextChar(ctxt)
2099
2100
78.3M
#define NEXT1 {               \
2101
78.3M
  ctxt->input->col++;           \
2102
78.3M
  ctxt->input->cur++;           \
2103
78.3M
  if (*ctxt->input->cur == 0)         \
2104
78.3M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
78.3M
    }
2106
2107
1.44G
#define NEXTL(l) do {             \
2108
1.44G
    if (*(ctxt->input->cur) == '\n') {         \
2109
23.7M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
1.42G
    } else ctxt->input->col++;           \
2111
1.44G
    ctxt->input->cur += l;        \
2112
1.44G
  } while (0)
2113
2114
1.52G
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
21.7G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
17.0G
    if (l == 1) b[i++] = v;           \
2119
17.0G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
947M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
947M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
947M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
947M
        (ctxt->instate == XML_PARSER_START)) {
2141
172M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
172M
  cur = ctxt->input->cur;
2146
172M
  while (IS_BLANK_CH(*cur)) {
2147
51.9M
      if (*cur == '\n') {
2148
3.48M
    ctxt->input->line++; ctxt->input->col = 1;
2149
48.4M
      } else {
2150
48.4M
    ctxt->input->col++;
2151
48.4M
      }
2152
51.9M
      cur++;
2153
51.9M
      if (res < INT_MAX)
2154
51.9M
    res++;
2155
51.9M
      if (*cur == 0) {
2156
170k
    ctxt->input->cur = cur;
2157
170k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
170k
    cur = ctxt->input->cur;
2159
170k
      }
2160
51.9M
  }
2161
172M
  ctxt->input->cur = cur;
2162
775M
    } else {
2163
775M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
2.85G
  while (ctxt->instate != XML_PARSER_EOF) {
2166
2.85G
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
1.18G
    NEXT;
2168
1.67G
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
485M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
4.65M
                    break;
2174
480M
          xmlParsePEReference(ctxt);
2175
1.18G
            } else if (CUR == 0) {
2176
414M
                unsigned long consumed;
2177
414M
                xmlEntityPtr ent;
2178
2179
414M
                if (ctxt->inputNr <= 1)
2180
112k
                    break;
2181
2182
414M
                consumed = ctxt->input->consumed;
2183
414M
                xmlSaturatedAddSizeT(&consumed,
2184
414M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
414M
                ent = ctxt->input->entity;
2191
414M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
414M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
19.5k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
19.5k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
19.5k
                }
2197
2198
414M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
414M
                xmlPopInput(ctxt);
2201
770M
            } else {
2202
770M
                break;
2203
770M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
2.07G
      if (res < INT_MAX)
2213
2.07G
    res++;
2214
2.07G
        }
2215
775M
    }
2216
947M
    return(res);
2217
947M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
414M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
414M
    xmlParserInputPtr input;
2237
2238
414M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
414M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
414M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
414M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
414M
    input = inputPop(ctxt);
2247
414M
    if (input->entity != NULL)
2248
414M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
414M
    xmlFreeInputStream(input);
2250
414M
    if (*ctxt->input->cur == 0)
2251
198M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
414M
    return(CUR);
2253
414M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
414M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
414M
    int ret;
2267
414M
    if (input == NULL) return(-1);
2268
2269
414M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
414M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
414M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
414M
    ret = inputPush(ctxt, input);
2285
414M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
414M
    GROW;
2288
414M
    return(ret);
2289
414M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
1.80M
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
1.80M
    int val = 0;
2311
1.80M
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
1.80M
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
1.80M
        (NXT(2) == 'x')) {
2318
805k
  SKIP(3);
2319
805k
  GROW;
2320
2.68M
  while (RAW != ';') { /* loop blocked by count */
2321
1.95M
      if (count++ > 20) {
2322
81.7k
    count = 0;
2323
81.7k
    GROW;
2324
81.7k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
81.7k
      }
2327
1.95M
      if ((RAW >= '0') && (RAW <= '9'))
2328
1.21M
          val = val * 16 + (CUR - '0');
2329
740k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
559k
          val = val * 16 + (CUR - 'a') + 10;
2331
181k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
103k
          val = val * 16 + (CUR - 'A') + 10;
2333
78.0k
      else {
2334
78.0k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
78.0k
    val = 0;
2336
78.0k
    break;
2337
78.0k
      }
2338
1.87M
      if (val > 0x110000)
2339
903k
          val = 0x110000;
2340
2341
1.87M
      NEXT;
2342
1.87M
      count++;
2343
1.87M
  }
2344
805k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
727k
      ctxt->input->col++;
2347
727k
      ctxt->input->cur++;
2348
727k
  }
2349
995k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
995k
  SKIP(2);
2351
995k
  GROW;
2352
4.45M
  while (RAW != ';') { /* loop blocked by count */
2353
3.57M
      if (count++ > 20) {
2354
82.1k
    count = 0;
2355
82.1k
    GROW;
2356
82.1k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
82.1k
      }
2359
3.57M
      if ((RAW >= '0') && (RAW <= '9'))
2360
3.45M
          val = val * 10 + (CUR - '0');
2361
110k
      else {
2362
110k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
110k
    val = 0;
2364
110k
    break;
2365
110k
      }
2366
3.45M
      if (val > 0x110000)
2367
896k
          val = 0x110000;
2368
2369
3.45M
      NEXT;
2370
3.45M
      count++;
2371
3.45M
  }
2372
995k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
885k
      ctxt->input->col++;
2375
885k
      ctxt->input->cur++;
2376
885k
  }
2377
995k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
1.80M
    if (val >= 0x110000) {
2389
10.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
10.9k
                "xmlParseCharRef: character reference out of bounds\n",
2391
10.9k
          val);
2392
1.79M
    } else if (IS_CHAR(val)) {
2393
1.56M
        return(val);
2394
1.56M
    } else {
2395
229k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
229k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
229k
                    val);
2398
229k
    }
2399
240k
    return(0);
2400
1.80M
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
5.31M
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
5.31M
    const xmlChar *ptr;
2423
5.31M
    xmlChar cur;
2424
5.31M
    int val = 0;
2425
2426
5.31M
    if ((str == NULL) || (*str == NULL)) return(0);
2427
5.31M
    ptr = *str;
2428
5.31M
    cur = *ptr;
2429
5.31M
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
101k
  ptr += 3;
2431
101k
  cur = *ptr;
2432
250k
  while (cur != ';') { /* Non input consuming loop */
2433
153k
      if ((cur >= '0') && (cur <= '9'))
2434
44.5k
          val = val * 16 + (cur - '0');
2435
109k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
16.6k
          val = val * 16 + (cur - 'a') + 10;
2437
92.7k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
87.7k
          val = val * 16 + (cur - 'A') + 10;
2439
4.98k
      else {
2440
4.98k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
4.98k
    val = 0;
2442
4.98k
    break;
2443
4.98k
      }
2444
148k
      if (val > 0x110000)
2445
27.6k
          val = 0x110000;
2446
2447
148k
      ptr++;
2448
148k
      cur = *ptr;
2449
148k
  }
2450
101k
  if (cur == ';')
2451
96.2k
      ptr++;
2452
5.21M
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
5.21M
  ptr += 2;
2454
5.21M
  cur = *ptr;
2455
20.0M
  while (cur != ';') { /* Non input consuming loops */
2456
14.8M
      if ((cur >= '0') && (cur <= '9'))
2457
14.8M
          val = val * 10 + (cur - '0');
2458
6.35k
      else {
2459
6.35k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
6.35k
    val = 0;
2461
6.35k
    break;
2462
6.35k
      }
2463
14.8M
      if (val > 0x110000)
2464
33.4k
          val = 0x110000;
2465
2466
14.8M
      ptr++;
2467
14.8M
      cur = *ptr;
2468
14.8M
  }
2469
5.21M
  if (cur == ';')
2470
5.20M
      ptr++;
2471
5.21M
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
5.31M
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
5.31M
    if (val >= 0x110000) {
2483
1.38k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
1.38k
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
1.38k
                val);
2486
5.31M
    } else if (IS_CHAR(val)) {
2487
5.29M
        return(val);
2488
5.29M
    } else {
2489
12.5k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
12.5k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
12.5k
        val);
2492
12.5k
    }
2493
13.9k
    return(0);
2494
5.31M
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
15.2M
#define growBuffer(buffer, n) {           \
2593
15.2M
    xmlChar *tmp;             \
2594
15.2M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
15.2M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
15.2M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
15.2M
    if (tmp == NULL) goto mem_error;         \
2598
15.2M
    buffer = tmp;             \
2599
15.2M
    buffer##_size = new_size;                                           \
2600
15.2M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
221M
                           int check) {
2617
221M
    xmlChar *buffer = NULL;
2618
221M
    size_t buffer_size = 0;
2619
221M
    size_t nbchars = 0;
2620
2621
221M
    xmlChar *current = NULL;
2622
221M
    xmlChar *rep = NULL;
2623
221M
    const xmlChar *last;
2624
221M
    xmlEntityPtr ent;
2625
221M
    int c,l;
2626
2627
221M
    if (str == NULL)
2628
43.1k
        return(NULL);
2629
221M
    last = str + len;
2630
2631
221M
    if (((ctxt->depth > 40) &&
2632
221M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
221M
  (ctxt->depth > 100)) {
2634
3
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
3
                       "Maximum entity nesting depth exceeded");
2636
3
  return(NULL);
2637
3
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
221M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
221M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
221M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
221M
    if (str < last)
2651
220M
  c = CUR_SCHAR(str, l);
2652
1.46M
    else
2653
1.46M
        c = 0;
2654
14.6G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
14.6G
           (c != end2) && (c != end3) &&
2656
14.6G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
14.3G
  if (c == 0) break;
2659
14.3G
        if ((c == '&') && (str[1] == '#')) {
2660
5.31M
      int val = xmlParseStringCharRef(ctxt, &str);
2661
5.31M
      if (val == 0)
2662
13.9k
                goto int_error;
2663
5.29M
      COPY_BUF(0,buffer,nbchars,val);
2664
5.29M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
732
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
732
      }
2667
14.3G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
5.60G
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
5.60G
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
5.60G
      if ((ent != NULL) &&
2674
5.60G
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
1.61M
    if (ent->content != NULL) {
2676
1.61M
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
1.61M
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
37.1k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
37.1k
        }
2680
1.61M
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
5.60G
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
202M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
1.71k
                    goto int_error;
2688
2689
202M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
989
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
989
                    xmlHaltParser(ctxt);
2692
989
                    ent->content[0] = 0;
2693
989
                    goto int_error;
2694
989
                }
2695
2696
202M
                ent->flags |= XML_ENT_EXPANDING;
2697
202M
    ctxt->depth++;
2698
202M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
202M
                        ent->length, what, 0, 0, 0, check);
2700
202M
    ctxt->depth--;
2701
202M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
202M
    if (rep == NULL) {
2704
24.0k
                    ent->content[0] = 0;
2705
24.0k
                    goto int_error;
2706
24.0k
                }
2707
2708
202M
                current = rep;
2709
38.7G
                while (*current != 0) { /* non input consuming loop */
2710
38.5G
                    buffer[nbchars++] = *current++;
2711
38.5G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
22.9M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
22.9M
                    }
2714
38.5G
                }
2715
202M
                xmlFree(rep);
2716
202M
                rep = NULL;
2717
5.40G
      } else if (ent != NULL) {
2718
28.8M
    int i = xmlStrlen(ent->name);
2719
28.8M
    const xmlChar *cur = ent->name;
2720
2721
28.8M
    buffer[nbchars++] = '&';
2722
28.8M
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
3.35M
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
3.35M
    }
2725
1.37G
    for (;i > 0;i--)
2726
1.34G
        buffer[nbchars++] = *cur++;
2727
28.8M
    buffer[nbchars++] = ';';
2728
28.8M
      }
2729
8.77G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
4.29M
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
4.29M
      ent = xmlParseStringPEReference(ctxt, &str);
2734
4.29M
      if (ent != NULL) {
2735
4.02M
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
8.95k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
8.95k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
8.95k
      (ctxt->validate != 0)) {
2745
8.43k
      xmlLoadEntityContent(ctxt, ent);
2746
8.43k
        } else {
2747
526
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
526
      "not validating will not read content for PE entity %s\n",
2749
526
                          ent->name, NULL);
2750
526
        }
2751
8.95k
    }
2752
2753
4.02M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
259
                    goto int_error;
2755
2756
4.02M
                if (ent->flags & XML_ENT_EXPANDING) {
2757
606
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
606
                    xmlHaltParser(ctxt);
2759
606
                    if (ent->content != NULL)
2760
277
                        ent->content[0] = 0;
2761
606
                    goto int_error;
2762
606
                }
2763
2764
4.02M
                ent->flags |= XML_ENT_EXPANDING;
2765
4.02M
    ctxt->depth++;
2766
4.02M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
4.02M
                        ent->length, what, 0, 0, 0, check);
2768
4.02M
    ctxt->depth--;
2769
4.02M
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
4.02M
    if (rep == NULL) {
2772
5.21k
                    if (ent->content != NULL)
2773
520
                        ent->content[0] = 0;
2774
5.21k
                    goto int_error;
2775
5.21k
                }
2776
4.02M
                current = rep;
2777
3.47G
                while (*current != 0) { /* non input consuming loop */
2778
3.47G
                    buffer[nbchars++] = *current++;
2779
3.47G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
910k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
910k
                    }
2782
3.47G
                }
2783
4.02M
                xmlFree(rep);
2784
4.02M
                rep = NULL;
2785
4.02M
      }
2786
8.76G
  } else {
2787
8.76G
      COPY_BUF(l,buffer,nbchars,c);
2788
8.76G
      str += l;
2789
8.76G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
2.91M
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
2.91M
      }
2792
8.76G
  }
2793
14.3G
  if (str < last)
2794
14.1G
      c = CUR_SCHAR(str, l);
2795
220M
  else
2796
220M
      c = 0;
2797
14.3G
    }
2798
221M
    buffer[nbchars] = 0;
2799
221M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
46.8k
int_error:
2804
46.8k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
46.8k
    if (buffer != NULL)
2807
46.8k
        xmlFree(buffer);
2808
46.8k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
21.8k
                           xmlChar end3) {
2836
21.8k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
21.8k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
21.8k
                                      end, end2, end3, 0));
2840
21.8k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
967k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
967k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
967k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
967k
                                      end, end2, end3, 0));
2868
967k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
16.8M
                     int blank_chars) {
2890
16.8M
    int i, ret;
2891
16.8M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
16.8M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
519k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
16.3M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
16.3M
        (*(ctxt->space) == -2))
2905
3.22M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
13.1M
    if (blank_chars == 0) {
2911
44.6M
  for (i = 0;i < len;i++)
2912
37.7M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
8.33M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
11.6M
    if (ctxt->node == NULL) return(0);
2919
11.5M
    if (ctxt->myDoc != NULL) {
2920
11.5M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
11.5M
        if (ret == 0) return(1);
2922
8.32M
        if (ret == 1) return(0);
2923
8.32M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
8.20M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
8.13M
    if ((ctxt->node->children == NULL) &&
2930
8.13M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
8.12M
    lastChild = xmlGetLastChild(ctxt->node);
2933
8.12M
    if (lastChild == NULL) {
2934
1.29M
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
1.29M
            (ctxt->node->content != NULL)) return(0);
2936
6.83M
    } else if (xmlNodeIsText(lastChild))
2937
63.1k
        return(0);
2938
6.77M
    else if ((ctxt->node->children != NULL) &&
2939
6.77M
             (xmlNodeIsText(ctxt->node->children)))
2940
97.3k
        return(0);
2941
7.96M
    return(1);
2942
8.12M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
51.7M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
51.7M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
51.7M
    xmlChar *buffer = NULL;
2973
51.7M
    int len = 0;
2974
51.7M
    int max = XML_MAX_NAMELEN;
2975
51.7M
    xmlChar *ret = NULL;
2976
51.7M
    const xmlChar *cur = name;
2977
51.7M
    int c;
2978
2979
51.7M
    if (prefix == NULL) return(NULL);
2980
51.7M
    *prefix = NULL;
2981
2982
51.7M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
51.7M
    if (cur[0] == ':')
2993
12.0k
  return(xmlStrdup(name));
2994
2995
51.7M
    c = *cur++;
2996
247M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
195M
  buf[len++] = c;
2998
195M
  c = *cur++;
2999
195M
    }
3000
51.7M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
36.0k
  max = len * 2;
3006
3007
36.0k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
36.0k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
36.0k
  memcpy(buffer, buf, len);
3013
92.6M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
92.5M
      if (len + 10 > max) {
3015
84.5k
          xmlChar *tmp;
3016
3017
84.5k
    max *= 2;
3018
84.5k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
84.5k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
84.5k
    buffer = tmp;
3025
84.5k
      }
3026
92.5M
      buffer[len++] = c;
3027
92.5M
      c = *cur++;
3028
92.5M
  }
3029
36.0k
  buffer[len] = 0;
3030
36.0k
    }
3031
3032
51.7M
    if ((c == ':') && (*cur == 0)) {
3033
59.7k
        if (buffer != NULL)
3034
451
      xmlFree(buffer);
3035
59.7k
  *prefix = NULL;
3036
59.7k
  return(xmlStrdup(name));
3037
59.7k
    }
3038
3039
51.7M
    if (buffer == NULL)
3040
51.6M
  ret = xmlStrndup(buf, len);
3041
35.5k
    else {
3042
35.5k
  ret = buffer;
3043
35.5k
  buffer = NULL;
3044
35.5k
  max = XML_MAX_NAMELEN;
3045
35.5k
    }
3046
3047
3048
51.7M
    if (c == ':') {
3049
2.63M
  c = *cur;
3050
2.63M
        *prefix = ret;
3051
2.63M
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
2.63M
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
2.63M
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
2.63M
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
2.63M
        (c == '_') || (c == ':'))) {
3063
17.6k
      int l;
3064
17.6k
      int first = CUR_SCHAR(cur, l);
3065
3066
17.6k
      if (!IS_LETTER(first) && (first != '_')) {
3067
7.28k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
7.28k
          "Name %s is not XML Namespace compliant\n",
3069
7.28k
          name);
3070
7.28k
      }
3071
17.6k
  }
3072
2.63M
  cur++;
3073
3074
16.8M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
14.2M
      buf[len++] = c;
3076
14.2M
      c = *cur++;
3077
14.2M
  }
3078
2.63M
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
10.8k
      max = len * 2;
3084
3085
10.8k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
10.8k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
10.8k
      memcpy(buffer, buf, len);
3091
27.8M
      while (c != 0) { /* tested bigname2.xml */
3092
27.8M
    if (len + 10 > max) {
3093
23.3k
        xmlChar *tmp;
3094
3095
23.3k
        max *= 2;
3096
23.3k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
23.3k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
23.3k
        buffer = tmp;
3103
23.3k
    }
3104
27.8M
    buffer[len++] = c;
3105
27.8M
    c = *cur++;
3106
27.8M
      }
3107
10.8k
      buffer[len] = 0;
3108
10.8k
  }
3109
3110
2.63M
  if (buffer == NULL)
3111
2.62M
      ret = xmlStrndup(buf, len);
3112
10.8k
  else {
3113
10.8k
      ret = buffer;
3114
10.8k
  }
3115
2.63M
    }
3116
3117
51.7M
    return(ret);
3118
51.7M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
292M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
292M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
210M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
210M
      (((c >= 'a') && (c <= 'z')) ||
3160
210M
       ((c >= 'A') && (c <= 'Z')) ||
3161
210M
       (c == '_') || (c == ':') ||
3162
210M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
210M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
210M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
210M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
210M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
210M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
210M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
210M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
210M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
210M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
210M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
210M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
209M
      return(1);
3175
210M
    } else {
3176
82.8M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
82.4M
      return(1);
3178
82.8M
    }
3179
1.08M
    return(0);
3180
292M
}
3181
3182
static int
3183
7.11G
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
7.11G
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
4.59G
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
4.59G
      (((c >= 'a') && (c <= 'z')) ||
3191
4.59G
       ((c >= 'A') && (c <= 'Z')) ||
3192
4.59G
       ((c >= '0') && (c <= '9')) || /* !start */
3193
4.59G
       (c == '_') || (c == ':') ||
3194
4.59G
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
4.59G
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
4.59G
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
4.59G
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
4.59G
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
4.59G
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
4.59G
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
4.59G
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
4.59G
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
4.59G
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
4.59G
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
4.59G
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
4.59G
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
4.59G
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
4.59G
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
4.38G
       return(1);
3210
4.59G
    } else {
3211
2.51G
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
2.51G
            (c == '.') || (c == '-') ||
3213
2.51G
      (c == '_') || (c == ':') ||
3214
2.51G
      (IS_COMBINING(c)) ||
3215
2.51G
      (IS_EXTENDER(c)))
3216
2.43G
      return(1);
3217
2.51G
    }
3218
299M
    return(0);
3219
7.11G
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
9.58M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
9.58M
    int len = 0, l;
3227
9.58M
    int c;
3228
9.58M
    int count = 0;
3229
9.58M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
3.10M
                    XML_MAX_TEXT_LENGTH :
3231
9.58M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
9.58M
    GROW;
3241
9.58M
    if (ctxt->instate == XML_PARSER_EOF)
3242
129
        return(NULL);
3243
9.58M
    c = CUR_CHAR(l);
3244
9.58M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
6.06M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
6.06M
      (!(((c >= 'a') && (c <= 'z')) ||
3251
5.85M
         ((c >= 'A') && (c <= 'Z')) ||
3252
5.85M
         (c == '_') || (c == ':') ||
3253
5.85M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
5.85M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
5.85M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
5.85M
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
5.85M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
5.85M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
5.85M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
5.85M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
5.85M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
5.85M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
5.85M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
5.85M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
1.84M
      return(NULL);
3266
1.84M
  }
3267
4.22M
  len += l;
3268
4.22M
  NEXTL(l);
3269
4.22M
  c = CUR_CHAR(l);
3270
92.2M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
92.2M
         (((c >= 'a') && (c <= 'z')) ||
3272
92.1M
          ((c >= 'A') && (c <= 'Z')) ||
3273
92.1M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
92.1M
          (c == '_') || (c == ':') ||
3275
92.1M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
92.1M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
92.1M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
92.1M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
92.1M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
92.1M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
92.1M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
92.1M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
92.1M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
92.1M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
92.1M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
92.1M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
92.1M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
92.1M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
92.1M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
92.1M
    )) {
3291
88.0M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
674k
    count = 0;
3293
674k
    GROW;
3294
674k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
674k
      }
3297
88.0M
            if (len <= INT_MAX - l)
3298
88.0M
          len += l;
3299
88.0M
      NEXTL(l);
3300
88.0M
      c = CUR_CHAR(l);
3301
88.0M
  }
3302
4.22M
    } else {
3303
3.52M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
3.52M
      (!IS_LETTER(c) && (c != '_') &&
3305
3.36M
       (c != ':'))) {
3306
1.58M
      return(NULL);
3307
1.58M
  }
3308
1.93M
  len += l;
3309
1.93M
  NEXTL(l);
3310
1.93M
  c = CUR_CHAR(l);
3311
3312
54.7M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
54.7M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
54.7M
    (c == '.') || (c == '-') ||
3315
54.7M
    (c == '_') || (c == ':') ||
3316
54.7M
    (IS_COMBINING(c)) ||
3317
54.7M
    (IS_EXTENDER(c)))) {
3318
52.8M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
425k
    count = 0;
3320
425k
    GROW;
3321
425k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
425k
      }
3324
52.8M
            if (len <= INT_MAX - l)
3325
52.8M
          len += l;
3326
52.8M
      NEXTL(l);
3327
52.8M
      c = CUR_CHAR(l);
3328
52.8M
  }
3329
1.93M
    }
3330
6.16M
    if (len > maxLength) {
3331
271
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
271
        return(NULL);
3333
271
    }
3334
6.16M
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
6.16M
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
2.35k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
6.16M
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
6.16M
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
641M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
641M
    const xmlChar *in;
3370
641M
    const xmlChar *ret;
3371
641M
    size_t count = 0;
3372
641M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
171M
                       XML_MAX_TEXT_LENGTH :
3374
641M
                       XML_MAX_NAME_LENGTH;
3375
3376
641M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
641M
    in = ctxt->input->cur;
3386
641M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
641M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
641M
  (*in == '_') || (*in == ':')) {
3389
637M
  in++;
3390
2.91G
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
2.91G
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
2.91G
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
2.91G
         (*in == '_') || (*in == '-') ||
3394
2.91G
         (*in == ':') || (*in == '.'))
3395
2.27G
      in++;
3396
637M
  if ((*in > 0) && (*in < 0x80)) {
3397
631M
      count = in - ctxt->input->cur;
3398
631M
            if (count > maxLength) {
3399
293
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
293
                return(NULL);
3401
293
            }
3402
631M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
631M
      ctxt->input->cur = in;
3404
631M
      ctxt->input->col += count;
3405
631M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
631M
      return(ret);
3408
631M
  }
3409
637M
    }
3410
    /* accelerator for special cases */
3411
9.58M
    return(xmlParseNameComplex(ctxt));
3412
641M
}
3413
3414
static const xmlChar *
3415
1.28M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
1.28M
    int len = 0, l;
3417
1.28M
    int c;
3418
1.28M
    int count = 0;
3419
1.28M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
285k
                    XML_MAX_TEXT_LENGTH :
3421
1.28M
                    XML_MAX_NAME_LENGTH;
3422
1.28M
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
1.28M
    GROW;
3432
1.28M
    startPosition = CUR_PTR - BASE_PTR;
3433
1.28M
    c = CUR_CHAR(l);
3434
1.28M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
1.28M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
1.11M
  return(NULL);
3437
1.11M
    }
3438
3439
18.4M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
18.4M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
18.2M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
167k
      count = 0;
3443
167k
      GROW;
3444
167k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
167k
  }
3447
18.2M
        if (len <= INT_MAX - l)
3448
18.2M
      len += l;
3449
18.2M
  NEXTL(l);
3450
18.2M
  c = CUR_CHAR(l);
3451
18.2M
  if (c == 0) {
3452
37.0k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
37.0k
      ctxt->input->cur -= l;
3459
37.0k
      GROW;
3460
37.0k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
37.0k
      ctxt->input->cur += l;
3463
37.0k
      c = CUR_CHAR(l);
3464
37.0k
  }
3465
18.2M
    }
3466
171k
    if (len > maxLength) {
3467
185
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
185
        return(NULL);
3469
185
    }
3470
171k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
171k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
51.5M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
51.5M
    const xmlChar *in, *e;
3491
51.5M
    const xmlChar *ret;
3492
51.5M
    size_t count = 0;
3493
51.5M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
7.84M
                       XML_MAX_TEXT_LENGTH :
3495
51.5M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
51.5M
    in = ctxt->input->cur;
3505
51.5M
    e = ctxt->input->end;
3506
51.5M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
51.5M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
51.5M
   (*in == '_')) && (in < e)) {
3509
50.3M
  in++;
3510
198M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
198M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
198M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
198M
          (*in == '_') || (*in == '-') ||
3514
198M
          (*in == '.')) && (in < e))
3515
147M
      in++;
3516
50.3M
  if (in >= e)
3517
13.8k
      goto complex;
3518
50.3M
  if ((*in > 0) && (*in < 0x80)) {
3519
50.2M
      count = in - ctxt->input->cur;
3520
50.2M
            if (count > maxLength) {
3521
163
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
163
                return(NULL);
3523
163
            }
3524
50.2M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
50.2M
      ctxt->input->cur = in;
3526
50.2M
      ctxt->input->col += count;
3527
50.2M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
50.2M
      return(ret);
3531
50.2M
  }
3532
50.3M
    }
3533
1.28M
complex:
3534
1.28M
    return(xmlParseNCNameComplex(ctxt));
3535
51.5M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
24.2M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
24.2M
    register const xmlChar *cmp = other;
3551
24.2M
    register const xmlChar *in;
3552
24.2M
    const xmlChar *ret;
3553
3554
24.2M
    GROW;
3555
24.2M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
24.2M
    in = ctxt->input->cur;
3559
125M
    while (*in != 0 && *in == *cmp) {
3560
101M
  ++in;
3561
101M
  ++cmp;
3562
101M
    }
3563
24.2M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
23.7M
  ctxt->input->col += in - ctxt->input->cur;
3566
23.7M
  ctxt->input->cur = in;
3567
23.7M
  return (const xmlChar*) 1;
3568
23.7M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
517k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
517k
    if (ret == other) {
3573
24.4k
  return (const xmlChar*) 1;
3574
24.4k
    }
3575
493k
    return ret;
3576
517k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
291M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
291M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
291M
    const xmlChar *cur = *str;
3600
291M
    int len = 0, l;
3601
291M
    int c;
3602
291M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
83.8M
                    XML_MAX_TEXT_LENGTH :
3604
291M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
291M
    c = CUR_SCHAR(cur, l);
3611
291M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
124k
  return(NULL);
3613
124k
    }
3614
3615
291M
    COPY_BUF(l,buf,len,c);
3616
291M
    cur += l;
3617
291M
    c = CUR_SCHAR(cur, l);
3618
3.21G
    while (xmlIsNameChar(ctxt, c)) {
3619
2.93G
  COPY_BUF(l,buf,len,c);
3620
2.93G
  cur += l;
3621
2.93G
  c = CUR_SCHAR(cur, l);
3622
2.93G
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
16.4M
      xmlChar *buffer;
3628
16.4M
      int max = len * 2;
3629
3630
16.4M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
16.4M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
16.4M
      memcpy(buffer, buf, len);
3636
3.81G
      while (xmlIsNameChar(ctxt, c)) {
3637
3.79G
    if (len + 10 > max) {
3638
16.4M
        xmlChar *tmp;
3639
3640
16.4M
        max *= 2;
3641
16.4M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
16.4M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
16.4M
        buffer = tmp;
3648
16.4M
    }
3649
3.79G
    COPY_BUF(l,buffer,len,c);
3650
3.79G
    cur += l;
3651
3.79G
    c = CUR_SCHAR(cur, l);
3652
3.79G
                if (len > maxLength) {
3653
83
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
83
                    xmlFree(buffer);
3655
83
                    return(NULL);
3656
83
                }
3657
3.79G
      }
3658
16.4M
      buffer[len] = 0;
3659
16.4M
      *str = cur;
3660
16.4M
      return(buffer);
3661
16.4M
  }
3662
2.93G
    }
3663
275M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
275M
    *str = cur;
3668
275M
    return(xmlStrndup(buf, len));
3669
275M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
7.55M
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
7.55M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
7.55M
    int len = 0, l;
3690
7.55M
    int c;
3691
7.55M
    int count = 0;
3692
7.55M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
1.45M
                    XML_MAX_TEXT_LENGTH :
3694
7.55M
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
7.55M
    GROW;
3701
7.55M
    if (ctxt->instate == XML_PARSER_EOF)
3702
19
        return(NULL);
3703
7.55M
    c = CUR_CHAR(l);
3704
3705
45.2M
    while (xmlIsNameChar(ctxt, c)) {
3706
37.7M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
37.7M
  COPY_BUF(l,buf,len,c);
3711
37.7M
  NEXTL(l);
3712
37.7M
  c = CUR_CHAR(l);
3713
37.7M
  if (c == 0) {
3714
2.59k
      count = 0;
3715
2.59k
      GROW;
3716
2.59k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
2.59k
            c = CUR_CHAR(l);
3719
2.59k
  }
3720
37.7M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
6.15k
      xmlChar *buffer;
3726
6.15k
      int max = len * 2;
3727
3728
6.15k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
6.15k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
6.15k
      memcpy(buffer, buf, len);
3734
23.4M
      while (xmlIsNameChar(ctxt, c)) {
3735
23.4M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
233k
        count = 0;
3737
233k
        GROW;
3738
233k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
233k
    }
3743
23.4M
    if (len + 10 > max) {
3744
13.4k
        xmlChar *tmp;
3745
3746
13.4k
        max *= 2;
3747
13.4k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
13.4k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
13.4k
        buffer = tmp;
3754
13.4k
    }
3755
23.4M
    COPY_BUF(l,buffer,len,c);
3756
23.4M
    NEXTL(l);
3757
23.4M
    c = CUR_CHAR(l);
3758
23.4M
                if (len > maxLength) {
3759
158
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
158
                    xmlFree(buffer);
3761
158
                    return(NULL);
3762
158
                }
3763
23.4M
      }
3764
5.99k
      buffer[len] = 0;
3765
5.99k
      return(buffer);
3766
6.15k
  }
3767
37.7M
    }
3768
7.54M
    if (len == 0)
3769
72.1k
        return(NULL);
3770
7.47M
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
7.47M
    return(xmlStrndup(buf, len));
3775
7.47M
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
9.30M
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
9.30M
    xmlChar *buf = NULL;
3795
9.30M
    int len = 0;
3796
9.30M
    int size = XML_PARSER_BUFFER_SIZE;
3797
9.30M
    int c, l;
3798
9.30M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
1.54M
                    XML_MAX_HUGE_LENGTH :
3800
9.30M
                    XML_MAX_TEXT_LENGTH;
3801
9.30M
    xmlChar stop;
3802
9.30M
    xmlChar *ret = NULL;
3803
9.30M
    const xmlChar *cur = NULL;
3804
9.30M
    xmlParserInputPtr input;
3805
3806
9.30M
    if (RAW == '"') stop = '"';
3807
1.12M
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
9.30M
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
9.30M
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
9.30M
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
9.30M
    input = ctxt->input;
3824
9.30M
    GROW;
3825
9.30M
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
9.30M
    NEXT;
3828
9.30M
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
446M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
446M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
436M
  if (len + 5 >= size) {
3841
1.20M
      xmlChar *tmp;
3842
3843
1.20M
      size *= 2;
3844
1.20M
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
1.20M
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
1.20M
      buf = tmp;
3850
1.20M
  }
3851
436M
  COPY_BUF(l,buf,len,c);
3852
436M
  NEXTL(l);
3853
3854
436M
  GROW;
3855
436M
  c = CUR_CHAR(l);
3856
436M
  if (c == 0) {
3857
4.62k
      GROW;
3858
4.62k
      c = CUR_CHAR(l);
3859
4.62k
  }
3860
3861
436M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
436M
    }
3867
9.30M
    buf[len] = 0;
3868
9.30M
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
9.30M
    if (c != stop) {
3871
6.73k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
6.73k
        goto error;
3873
6.73k
    }
3874
9.29M
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
9.29M
    cur = buf;
3882
293M
    while (*cur != 0) { /* non input consuming */
3883
283M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
5.26M
      xmlChar *name;
3885
5.26M
      xmlChar tmp = *cur;
3886
5.26M
            int nameOk = 0;
3887
3888
5.26M
      cur++;
3889
5.26M
      name = xmlParseStringName(ctxt, &cur);
3890
5.26M
            if (name != NULL) {
3891
5.25M
                nameOk = 1;
3892
5.25M
                xmlFree(name);
3893
5.25M
            }
3894
5.26M
            if ((nameOk == 0) || (*cur != ';')) {
3895
19.2k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
19.2k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
19.2k
                            tmp);
3898
19.2k
                goto error;
3899
19.2k
      }
3900
5.24M
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
5.24M
    (ctxt->inputNr == 1)) {
3902
4.87k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
4.87k
                goto error;
3904
4.87k
      }
3905
5.23M
      if (*cur == 0)
3906
0
          break;
3907
5.23M
  }
3908
283M
  cur++;
3909
283M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
9.27M
    ++ctxt->depth;
3920
9.27M
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
9.27M
                                     0, 0, 0, /* check */ 1);
3922
9.27M
    --ctxt->depth;
3923
3924
9.27M
    if (orig != NULL) {
3925
9.27M
        *orig = buf;
3926
9.27M
        buf = NULL;
3927
9.27M
    }
3928
3929
9.30M
error:
3930
9.30M
    if (buf != NULL)
3931
30.9k
        xmlFree(buf);
3932
9.30M
    return(ret);
3933
9.27M
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
1.41M
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
1.41M
    xmlChar limit = 0;
3950
1.41M
    xmlChar *buf = NULL;
3951
1.41M
    xmlChar *rep = NULL;
3952
1.41M
    size_t len = 0;
3953
1.41M
    size_t buf_size = 0;
3954
1.41M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
424k
                       XML_MAX_HUGE_LENGTH :
3956
1.41M
                       XML_MAX_TEXT_LENGTH;
3957
1.41M
    int c, l, in_space = 0;
3958
1.41M
    xmlChar *current = NULL;
3959
1.41M
    xmlEntityPtr ent;
3960
3961
1.41M
    if (NXT(0) == '"') {
3962
872k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
872k
  limit = '"';
3964
872k
        NEXT;
3965
872k
    } else if (NXT(0) == '\'') {
3966
542k
  limit = '\'';
3967
542k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
542k
        NEXT;
3969
542k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
1.41M
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
1.41M
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
1.41M
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
1.41M
    c = CUR_CHAR(l);
3985
61.8M
    while (((NXT(0) != limit) && /* checked */
3986
61.8M
            (IS_CHAR(c)) && (c != '<')) &&
3987
61.8M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
60.4M
  if (c == '&') {
3989
10.2M
      in_space = 0;
3990
10.2M
      if (NXT(1) == '#') {
3991
933k
    int val = xmlParseCharRef(ctxt);
3992
3993
933k
    if (val == '&') {
3994
8.43k
        if (ctxt->replaceEntities) {
3995
4.16k
      if (len + 10 > buf_size) {
3996
342
          growBuffer(buf, 10);
3997
342
      }
3998
4.16k
      buf[len++] = '&';
3999
4.27k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
4.27k
      if (len + 10 > buf_size) {
4005
392
          growBuffer(buf, 10);
4006
392
      }
4007
4.27k
      buf[len++] = '&';
4008
4.27k
      buf[len++] = '#';
4009
4.27k
      buf[len++] = '3';
4010
4.27k
      buf[len++] = '8';
4011
4.27k
      buf[len++] = ';';
4012
4.27k
        }
4013
924k
    } else if (val != 0) {
4014
789k
        if (len + 10 > buf_size) {
4015
5.65k
      growBuffer(buf, 10);
4016
5.65k
        }
4017
789k
        len += xmlCopyChar(0, &buf[len], val);
4018
789k
    }
4019
9.30M
      } else {
4020
9.30M
    ent = xmlParseEntityRef(ctxt);
4021
9.30M
    if ((ent != NULL) &&
4022
9.30M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
260k
        if (len + 10 > buf_size) {
4024
358
      growBuffer(buf, 10);
4025
358
        }
4026
260k
        if ((ctxt->replaceEntities == 0) &&
4027
260k
            (ent->content[0] == '&')) {
4028
84.0k
      buf[len++] = '&';
4029
84.0k
      buf[len++] = '#';
4030
84.0k
      buf[len++] = '3';
4031
84.0k
      buf[len++] = '8';
4032
84.0k
      buf[len++] = ';';
4033
176k
        } else {
4034
176k
      buf[len++] = ent->content[0];
4035
176k
        }
4036
9.04M
    } else if ((ent != NULL) &&
4037
9.04M
               (ctxt->replaceEntities != 0)) {
4038
5.01M
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
5.01M
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
8
                            goto error;
4041
4042
5.01M
      ++ctxt->depth;
4043
5.01M
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
5.01M
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
5.01M
                                /* check */ 1);
4046
5.01M
      --ctxt->depth;
4047
5.01M
      if (rep != NULL) {
4048
4.97M
          current = rep;
4049
1.18G
          while (*current != 0) { /* non input consuming */
4050
1.18G
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
1.18G
                                    (*current == 0x9)) {
4052
4.42M
                                    buf[len++] = 0x20;
4053
4.42M
                                    current++;
4054
4.42M
                                } else
4055
1.18G
                                    buf[len++] = *current++;
4056
1.18G
        if (len + 10 > buf_size) {
4057
60.9k
            growBuffer(buf, 10);
4058
60.9k
        }
4059
1.18G
          }
4060
4.97M
          xmlFree(rep);
4061
4.97M
          rep = NULL;
4062
4.97M
      }
4063
5.01M
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
5.01M
    } else if (ent != NULL) {
4071
3.67M
        int i = xmlStrlen(ent->name);
4072
3.67M
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
3.67M
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
3.67M
      (ent->content != NULL)) {
4081
3.61M
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
35.5k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
35.5k
                            ctxt->sizeentcopy = ent->length;
4085
4086
35.5k
                            ++ctxt->depth;
4087
35.5k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
35.5k
                                    ent->content, ent->length,
4089
35.5k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
35.5k
                                    /* check */ 1);
4091
35.5k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
35.5k
                            if (ctxt->inSubset == 0) {
4100
31.3k
                                ent->flags |= XML_ENT_CHECKED;
4101
31.3k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
31.3k
                            }
4103
4104
35.5k
                            if (rep != NULL) {
4105
34.3k
                                xmlFree(rep);
4106
34.3k
                                rep = NULL;
4107
34.3k
                            } else {
4108
1.26k
                                ent->content[0] = 0;
4109
1.26k
                            }
4110
4111
35.5k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
703
                                goto error;
4113
3.57M
                        } else {
4114
3.57M
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
155
                                goto error;
4116
3.57M
                        }
4117
3.61M
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
3.67M
        buf[len++] = '&';
4123
3.68M
        while (len + i + 10 > buf_size) {
4124
24.1k
      growBuffer(buf, i + 10);
4125
24.1k
        }
4126
10.1M
        for (;i > 0;i--)
4127
6.50M
      buf[len++] = *cur++;
4128
3.67M
        buf[len++] = ';';
4129
3.67M
    }
4130
9.30M
      }
4131
50.2M
  } else {
4132
50.2M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
8.03M
          if ((len != 0) || (!normalize)) {
4134
7.67M
        if ((!normalize) || (!in_space)) {
4135
7.05M
      COPY_BUF(l,buf,len,0x20);
4136
7.06M
      while (len + 10 > buf_size) {
4137
26.1k
          growBuffer(buf, 10);
4138
26.1k
      }
4139
7.05M
        }
4140
7.67M
        in_space = 1;
4141
7.67M
    }
4142
42.2M
      } else {
4143
42.2M
          in_space = 0;
4144
42.2M
    COPY_BUF(l,buf,len,c);
4145
42.2M
    if (len + 10 > buf_size) {
4146
127k
        growBuffer(buf, 10);
4147
127k
    }
4148
42.2M
      }
4149
50.2M
      NEXTL(l);
4150
50.2M
  }
4151
60.4M
  GROW;
4152
60.4M
  c = CUR_CHAR(l);
4153
60.4M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
60.4M
    }
4159
1.41M
    if (ctxt->instate == XML_PARSER_EOF)
4160
2.02k
        goto error;
4161
4162
1.41M
    if ((in_space) && (normalize)) {
4163
132k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
63.4k
    }
4165
1.41M
    buf[len] = 0;
4166
1.41M
    if (RAW == '<') {
4167
277k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
1.13M
    } else if (RAW != limit) {
4169
245k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
68.9k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
68.9k
         "invalid character in attribute value\n");
4172
176k
  } else {
4173
176k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
176k
         "AttValue: ' expected\n");
4175
176k
        }
4176
245k
    } else
4177
888k
  NEXT;
4178
4179
1.41M
    if (attlen != NULL) *attlen = len;
4180
1.41M
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
2.88k
error:
4185
2.88k
    if (buf != NULL)
4186
2.88k
        xmlFree(buf);
4187
2.88k
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
2.88k
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
15.3M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
15.3M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
15.3M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
15.3M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
1.29M
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
1.29M
    xmlChar *buf = NULL;
4250
1.29M
    int len = 0;
4251
1.29M
    int size = XML_PARSER_BUFFER_SIZE;
4252
1.29M
    int cur, l;
4253
1.29M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
381k
                    XML_MAX_TEXT_LENGTH :
4255
1.29M
                    XML_MAX_NAME_LENGTH;
4256
1.29M
    xmlChar stop;
4257
1.29M
    int state = ctxt->instate;
4258
1.29M
    int count = 0;
4259
4260
1.29M
    SHRINK;
4261
1.29M
    if (RAW == '"') {
4262
517k
        NEXT;
4263
517k
  stop = '"';
4264
772k
    } else if (RAW == '\'') {
4265
738k
        NEXT;
4266
738k
  stop = '\'';
4267
738k
    } else {
4268
33.7k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
33.7k
  return(NULL);
4270
33.7k
    }
4271
4272
1.25M
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
1.25M
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
1.25M
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
1.25M
    cur = CUR_CHAR(l);
4279
27.1M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
25.8M
  if (len + 5 >= size) {
4281
20.5k
      xmlChar *tmp;
4282
4283
20.5k
      size *= 2;
4284
20.5k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
20.5k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
20.5k
      buf = tmp;
4292
20.5k
  }
4293
25.8M
  count++;
4294
25.8M
  if (count > 50) {
4295
324k
      SHRINK;
4296
324k
      GROW;
4297
324k
      count = 0;
4298
324k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
324k
  }
4303
25.8M
  COPY_BUF(l,buf,len,cur);
4304
25.8M
  NEXTL(l);
4305
25.8M
  cur = CUR_CHAR(l);
4306
25.8M
  if (cur == 0) {
4307
19.1k
      GROW;
4308
19.1k
      SHRINK;
4309
19.1k
      cur = CUR_CHAR(l);
4310
19.1k
  }
4311
25.8M
        if (len > maxLength) {
4312
160
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
160
            xmlFree(buf);
4314
160
            ctxt->instate = (xmlParserInputState) state;
4315
160
            return(NULL);
4316
160
        }
4317
25.8M
    }
4318
1.25M
    buf[len] = 0;
4319
1.25M
    ctxt->instate = (xmlParserInputState) state;
4320
1.25M
    if (!IS_CHAR(cur)) {
4321
22.0k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
1.23M
    } else {
4323
1.23M
  NEXT;
4324
1.23M
    }
4325
1.25M
    return(buf);
4326
1.25M
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
732k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
732k
    xmlChar *buf = NULL;
4344
732k
    int len = 0;
4345
732k
    int size = XML_PARSER_BUFFER_SIZE;
4346
732k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
213k
                    XML_MAX_TEXT_LENGTH :
4348
732k
                    XML_MAX_NAME_LENGTH;
4349
732k
    xmlChar cur;
4350
732k
    xmlChar stop;
4351
732k
    int count = 0;
4352
732k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
732k
    SHRINK;
4355
732k
    if (RAW == '"') {
4356
154k
        NEXT;
4357
154k
  stop = '"';
4358
578k
    } else if (RAW == '\'') {
4359
575k
        NEXT;
4360
575k
  stop = '\'';
4361
575k
    } else {
4362
2.66k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
2.66k
  return(NULL);
4364
2.66k
    }
4365
730k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
730k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
730k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
730k
    cur = CUR;
4372
12.6M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
11.9M
  if (len + 1 >= size) {
4374
9.90k
      xmlChar *tmp;
4375
4376
9.90k
      size *= 2;
4377
9.90k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
9.90k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
9.90k
      buf = tmp;
4384
9.90k
  }
4385
11.9M
  buf[len++] = cur;
4386
11.9M
  count++;
4387
11.9M
  if (count > 50) {
4388
118k
      SHRINK;
4389
118k
      GROW;
4390
118k
      count = 0;
4391
118k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
118k
  }
4396
11.9M
  NEXT;
4397
11.9M
  cur = CUR;
4398
11.9M
  if (cur == 0) {
4399
2.77k
      GROW;
4400
2.77k
      SHRINK;
4401
2.77k
      cur = CUR;
4402
2.77k
  }
4403
11.9M
        if (len > maxLength) {
4404
22
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
22
            xmlFree(buf);
4406
22
            return(NULL);
4407
22
        }
4408
11.9M
    }
4409
730k
    buf[len] = 0;
4410
730k
    if (cur != stop) {
4411
39.6k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
690k
    } else {
4413
690k
  NEXT;
4414
690k
    }
4415
730k
    ctxt->instate = oldstate;
4416
730k
    return(buf);
4417
730k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
69.0M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
69.0M
    const xmlChar *in;
4482
69.0M
    int nbchar = 0;
4483
69.0M
    int line = ctxt->input->line;
4484
69.0M
    int col = ctxt->input->col;
4485
69.0M
    int ccol;
4486
4487
69.0M
    SHRINK;
4488
69.0M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
69.0M
    in = ctxt->input->cur;
4494
80.6M
    do {
4495
105M
get_more_space:
4496
144M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
105M
        if (*in == 0xA) {
4498
26.4M
            do {
4499
26.4M
                ctxt->input->line++; ctxt->input->col = 1;
4500
26.4M
                in++;
4501
26.4M
            } while (*in == 0xA);
4502
25.0M
            goto get_more_space;
4503
25.0M
        }
4504
80.6M
        if (*in == '<') {
4505
14.1M
            nbchar = in - ctxt->input->cur;
4506
14.1M
            if (nbchar > 0) {
4507
14.1M
                const xmlChar *tmp = ctxt->input->cur;
4508
14.1M
                ctxt->input->cur = in;
4509
4510
14.1M
                if ((ctxt->sax != NULL) &&
4511
14.1M
                    (ctxt->sax->ignorableWhitespace !=
4512
14.1M
                     ctxt->sax->characters)) {
4513
5.55M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
4.49M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
4.49M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
4.49M
                                                   tmp, nbchar);
4517
4.49M
                    } else {
4518
1.06M
                        if (ctxt->sax->characters != NULL)
4519
1.06M
                            ctxt->sax->characters(ctxt->userData,
4520
1.06M
                                                  tmp, nbchar);
4521
1.06M
                        if (*ctxt->space == -1)
4522
295k
                            *ctxt->space = -2;
4523
1.06M
                    }
4524
8.63M
                } else if ((ctxt->sax != NULL) &&
4525
8.63M
                           (ctxt->sax->characters != NULL)) {
4526
8.63M
                    ctxt->sax->characters(ctxt->userData,
4527
8.63M
                                          tmp, nbchar);
4528
8.63M
                }
4529
14.1M
            }
4530
14.1M
            return;
4531
14.1M
        }
4532
4533
80.9M
get_more:
4534
80.9M
        ccol = ctxt->input->col;
4535
1.40G
        while (test_char_data[*in]) {
4536
1.32G
            in++;
4537
1.32G
            ccol++;
4538
1.32G
        }
4539
80.9M
        ctxt->input->col = ccol;
4540
80.9M
        if (*in == 0xA) {
4541
13.5M
            do {
4542
13.5M
                ctxt->input->line++; ctxt->input->col = 1;
4543
13.5M
                in++;
4544
13.5M
            } while (*in == 0xA);
4545
13.2M
            goto get_more;
4546
13.2M
        }
4547
67.6M
        if (*in == ']') {
4548
1.26M
            if ((in[1] == ']') && (in[2] == '>')) {
4549
42.1k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
42.1k
                ctxt->input->cur = in + 1;
4551
42.1k
                return;
4552
42.1k
            }
4553
1.22M
            in++;
4554
1.22M
            ctxt->input->col++;
4555
1.22M
            goto get_more;
4556
1.26M
        }
4557
66.4M
        nbchar = in - ctxt->input->cur;
4558
66.4M
        if (nbchar > 0) {
4559
51.1M
            if ((ctxt->sax != NULL) &&
4560
51.1M
                (ctxt->sax->ignorableWhitespace !=
4561
51.1M
                 ctxt->sax->characters) &&
4562
51.1M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
10.2M
                const xmlChar *tmp = ctxt->input->cur;
4564
10.2M
                ctxt->input->cur = in;
4565
4566
10.2M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
6.67M
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
6.67M
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
6.67M
                                                       tmp, nbchar);
4570
6.67M
                } else {
4571
3.53M
                    if (ctxt->sax->characters != NULL)
4572
3.53M
                        ctxt->sax->characters(ctxt->userData,
4573
3.53M
                                              tmp, nbchar);
4574
3.53M
                    if (*ctxt->space == -1)
4575
1.46M
                        *ctxt->space = -2;
4576
3.53M
                }
4577
10.2M
                line = ctxt->input->line;
4578
10.2M
                col = ctxt->input->col;
4579
40.9M
            } else if (ctxt->sax != NULL) {
4580
40.9M
                if (ctxt->sax->characters != NULL)
4581
40.9M
                    ctxt->sax->characters(ctxt->userData,
4582
40.9M
                                          ctxt->input->cur, nbchar);
4583
40.9M
                line = ctxt->input->line;
4584
40.9M
                col = ctxt->input->col;
4585
40.9M
            }
4586
51.1M
        }
4587
66.4M
        ctxt->input->cur = in;
4588
66.4M
        if (*in == 0xD) {
4589
11.7M
            in++;
4590
11.7M
            if (*in == 0xA) {
4591
11.6M
                ctxt->input->cur = in;
4592
11.6M
                in++;
4593
11.6M
                ctxt->input->line++; ctxt->input->col = 1;
4594
11.6M
                continue; /* while */
4595
11.6M
            }
4596
80.1k
            in--;
4597
80.1k
        }
4598
54.7M
        if (*in == '<') {
4599
46.1M
            return;
4600
46.1M
        }
4601
8.61M
        if (*in == '&') {
4602
3.34M
            return;
4603
3.34M
        }
4604
5.26M
        SHRINK;
4605
5.26M
        GROW;
4606
5.26M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
5.26M
        in = ctxt->input->cur;
4609
16.9M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
16.9M
             (*in == 0x09) || (*in == 0x0a));
4611
5.28M
    ctxt->input->line = line;
4612
5.28M
    ctxt->input->col = col;
4613
5.28M
    xmlParseCharDataComplex(ctxt);
4614
5.28M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
5.28M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
5.28M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
5.28M
    int nbchar = 0;
4631
5.28M
    int cur, l;
4632
5.28M
    int count = 0;
4633
4634
5.28M
    SHRINK;
4635
5.28M
    GROW;
4636
5.28M
    cur = CUR_CHAR(l);
4637
77.3M
    while ((cur != '<') && /* checked */
4638
77.3M
           (cur != '&') &&
4639
77.3M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
72.0M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
14.5k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
14.5k
  }
4643
72.0M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
72.0M
  NEXTL(l);
4646
72.0M
  cur = CUR_CHAR(l);
4647
72.0M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
203k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
203k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
179k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
3.93k
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
3.93k
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
3.93k
                                     buf, nbchar);
4658
175k
    } else {
4659
175k
        if (ctxt->sax->characters != NULL)
4660
175k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
175k
        if ((ctxt->sax->characters !=
4662
175k
             ctxt->sax->ignorableWhitespace) &&
4663
175k
      (*ctxt->space == -1))
4664
4.35k
      *ctxt->space = -2;
4665
175k
    }
4666
179k
      }
4667
203k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
203k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
203k
  }
4672
72.0M
  count++;
4673
72.0M
  if (count > 50) {
4674
1.16M
      SHRINK;
4675
1.16M
      GROW;
4676
1.16M
      count = 0;
4677
1.16M
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
1.16M
  }
4680
72.0M
    }
4681
5.28M
    if (nbchar != 0) {
4682
1.06M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
1.06M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
924k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
2.94k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
2.94k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
921k
      } else {
4691
921k
    if (ctxt->sax->characters != NULL)
4692
921k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
921k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
921k
        (*ctxt->space == -1))
4695
179k
        *ctxt->space = -2;
4696
921k
      }
4697
924k
  }
4698
1.06M
    }
4699
5.28M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
4.18M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
4.18M
                          "PCDATA invalid Char value %d\n",
4703
4.18M
                    cur ? cur : CUR);
4704
4.18M
  NEXT;
4705
4.18M
    }
4706
5.28M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
1.79M
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
1.79M
    xmlChar *URI = NULL;
4735
4736
1.79M
    SHRINK;
4737
4738
1.79M
    *publicID = NULL;
4739
1.79M
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
694k
        SKIP(6);
4741
694k
  if (SKIP_BLANKS == 0) {
4742
10.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
10.7k
                     "Space required after 'SYSTEM'\n");
4744
10.7k
  }
4745
694k
  URI = xmlParseSystemLiteral(ctxt);
4746
694k
  if (URI == NULL) {
4747
11.7k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
11.7k
        }
4749
1.09M
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
732k
        SKIP(6);
4751
732k
  if (SKIP_BLANKS == 0) {
4752
212k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
212k
        "Space required after 'PUBLIC'\n");
4754
212k
  }
4755
732k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
732k
  if (*publicID == NULL) {
4757
2.69k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
2.69k
  }
4759
732k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
594k
      if (SKIP_BLANKS == 0) {
4764
21.3k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
21.3k
      "Space required after the Public Identifier\n");
4766
21.3k
      }
4767
594k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
138k
      if (SKIP_BLANKS == 0) return(NULL);
4775
3.20k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
3.20k
  }
4777
595k
  URI = xmlParseSystemLiteral(ctxt);
4778
595k
  if (URI == NULL) {
4779
22.0k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
22.0k
        }
4781
595k
    }
4782
1.65M
    return(URI);
4783
1.79M
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
17.5M
                       size_t len, size_t size) {
4802
17.5M
    int q, ql;
4803
17.5M
    int r, rl;
4804
17.5M
    int cur, l;
4805
17.5M
    size_t count = 0;
4806
17.5M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
5.35M
                       XML_MAX_HUGE_LENGTH :
4808
17.5M
                       XML_MAX_TEXT_LENGTH;
4809
17.5M
    int inputid;
4810
4811
17.5M
    inputid = ctxt->input->id;
4812
4813
17.5M
    if (buf == NULL) {
4814
3.04M
        len = 0;
4815
3.04M
  size = XML_PARSER_BUFFER_SIZE;
4816
3.04M
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
3.04M
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
3.04M
    }
4822
17.5M
    GROW; /* Assure there's enough input data */
4823
17.5M
    q = CUR_CHAR(ql);
4824
17.5M
    if (q == 0)
4825
7.35M
        goto not_terminated;
4826
10.1M
    if (!IS_CHAR(q)) {
4827
24.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
24.7k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
24.7k
                    q);
4830
24.7k
  xmlFree (buf);
4831
24.7k
  return;
4832
24.7k
    }
4833
10.1M
    NEXTL(ql);
4834
10.1M
    r = CUR_CHAR(rl);
4835
10.1M
    if (r == 0)
4836
1.97M
        goto not_terminated;
4837
8.19M
    if (!IS_CHAR(r)) {
4838
1.48k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
1.48k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
1.48k
                    r);
4841
1.48k
  xmlFree (buf);
4842
1.48k
  return;
4843
1.48k
    }
4844
8.19M
    NEXTL(rl);
4845
8.19M
    cur = CUR_CHAR(l);
4846
8.19M
    if (cur == 0)
4847
1.13M
        goto not_terminated;
4848
470M
    while (IS_CHAR(cur) && /* checked */
4849
470M
           ((cur != '>') ||
4850
465M
      (r != '-') || (q != '-'))) {
4851
463M
  if ((r == '-') && (q == '-')) {
4852
15.1M
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
15.1M
  }
4854
463M
  if (len + 5 >= size) {
4855
905k
      xmlChar *new_buf;
4856
905k
            size_t new_size;
4857
4858
905k
      new_size = size * 2;
4859
905k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
905k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
905k
      buf = new_buf;
4866
905k
            size = new_size;
4867
905k
  }
4868
463M
  COPY_BUF(ql,buf,len,q);
4869
463M
  q = r;
4870
463M
  ql = rl;
4871
463M
  r = cur;
4872
463M
  rl = l;
4873
4874
463M
  count++;
4875
463M
  if (count > 50) {
4876
7.76M
      SHRINK;
4877
7.76M
      GROW;
4878
7.76M
      count = 0;
4879
7.76M
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
7.76M
  }
4884
463M
  NEXTL(l);
4885
463M
  cur = CUR_CHAR(l);
4886
463M
  if (cur == 0) {
4887
5.14M
      SHRINK;
4888
5.14M
      GROW;
4889
5.14M
      cur = CUR_CHAR(l);
4890
5.14M
  }
4891
4892
463M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
463M
    }
4899
7.06M
    buf[len] = 0;
4900
7.06M
    if (cur == 0) {
4901
5.14M
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
5.14M
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
5.14M
    } else if (!IS_CHAR(cur)) {
4904
7.42k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
7.42k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
7.42k
                    cur);
4907
1.90M
    } else {
4908
1.90M
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
1.90M
        NEXT;
4914
1.90M
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
1.90M
      (!ctxt->disableSAX))
4916
1.88M
      ctxt->sax->comment(ctxt->userData, buf);
4917
1.90M
    }
4918
7.06M
    xmlFree(buf);
4919
7.06M
    return;
4920
10.4M
not_terminated:
4921
10.4M
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
10.4M
       "Comment not terminated\n", NULL);
4923
10.4M
    xmlFree(buf);
4924
10.4M
    return;
4925
7.06M
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
457M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
457M
    xmlChar *buf = NULL;
4943
457M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
457M
    size_t len = 0;
4945
457M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
122M
                       XML_MAX_HUGE_LENGTH :
4947
457M
                       XML_MAX_TEXT_LENGTH;
4948
457M
    xmlParserInputState state;
4949
457M
    const xmlChar *in;
4950
457M
    size_t nbchar = 0;
4951
457M
    int ccol;
4952
457M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
457M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
457M
    SKIP(2);
4960
457M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
1.10k
        return;
4962
457M
    state = ctxt->instate;
4963
457M
    ctxt->instate = XML_PARSER_COMMENT;
4964
457M
    inputid = ctxt->input->id;
4965
457M
    SKIP(2);
4966
457M
    SHRINK;
4967
457M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
457M
    in = ctxt->input->cur;
4974
457M
    do {
4975
457M
  if (*in == 0xA) {
4976
96.0M
      do {
4977
96.0M
    ctxt->input->line++; ctxt->input->col = 1;
4978
96.0M
    in++;
4979
96.0M
      } while (*in == 0xA);
4980
3.34M
  }
4981
577M
get_more:
4982
577M
        ccol = ctxt->input->col;
4983
2.87G
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
2.87G
         ((*in >= 0x20) && (*in < '-')) ||
4985
2.87G
         (*in == 0x09)) {
4986
2.30G
        in++;
4987
2.30G
        ccol++;
4988
2.30G
  }
4989
577M
  ctxt->input->col = ccol;
4990
577M
  if (*in == 0xA) {
4991
23.1M
      do {
4992
23.1M
    ctxt->input->line++; ctxt->input->col = 1;
4993
23.1M
    in++;
4994
23.1M
      } while (*in == 0xA);
4995
17.5M
      goto get_more;
4996
17.5M
  }
4997
559M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
559M
  if (nbchar > 0) {
5002
137M
      if ((ctxt->sax != NULL) &&
5003
137M
    (ctxt->sax->comment != NULL)) {
5004
137M
    if (buf == NULL) {
5005
46.7M
        if ((*in == '-') && (in[1] == '-'))
5006
28.4M
            size = nbchar + 1;
5007
18.3M
        else
5008
18.3M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
46.7M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
46.7M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
46.7M
        len = 0;
5016
90.2M
    } else if (len + nbchar + 1 >= size) {
5017
4.13M
        xmlChar *new_buf;
5018
4.13M
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
4.13M
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
4.13M
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
4.13M
        buf = new_buf;
5027
4.13M
    }
5028
137M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
137M
    len += nbchar;
5030
137M
    buf[len] = 0;
5031
137M
      }
5032
137M
  }
5033
559M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
559M
  ctxt->input->cur = in;
5040
559M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
559M
  if (*in == 0xD) {
5045
10.0M
      in++;
5046
10.0M
      if (*in == 0xA) {
5047
10.0M
    ctxt->input->cur = in;
5048
10.0M
    in++;
5049
10.0M
    ctxt->input->line++; ctxt->input->col = 1;
5050
10.0M
    goto get_more;
5051
10.0M
      }
5052
3.83k
      in--;
5053
3.83k
  }
5054
549M
  SHRINK;
5055
549M
  GROW;
5056
549M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
549M
  in = ctxt->input->cur;
5061
549M
  if (*in == '-') {
5062
532M
      if (in[1] == '-') {
5063
491M
          if (in[2] == '>') {
5064
439M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
439M
        SKIP(3);
5070
439M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
439M
            (!ctxt->disableSAX)) {
5072
311M
      if (buf != NULL)
5073
24.7M
          ctxt->sax->comment(ctxt->userData, buf);
5074
287M
      else
5075
287M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
311M
        }
5077
439M
        if (buf != NULL)
5078
32.2M
            xmlFree(buf);
5079
439M
        if (ctxt->instate != XML_PARSER_EOF)
5080
439M
      ctxt->instate = state;
5081
439M
        return;
5082
439M
    }
5083
52.0M
    if (buf != NULL) {
5084
44.5M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
44.5M
                          "Double hyphen within comment: "
5086
44.5M
                                      "<!--%.50s\n",
5087
44.5M
              buf);
5088
44.5M
    } else
5089
7.48M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
7.48M
                          "Double hyphen within comment\n", NULL);
5091
52.0M
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
52.0M
    in++;
5096
52.0M
    ctxt->input->col++;
5097
52.0M
      }
5098
92.6M
      in++;
5099
92.6M
      ctxt->input->col++;
5100
92.6M
      goto get_more;
5101
532M
  }
5102
549M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
17.5M
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
17.5M
    ctxt->instate = state;
5105
17.5M
    return;
5106
457M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
2.49M
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
2.49M
    const xmlChar *name;
5125
5126
2.49M
    name = xmlParseName(ctxt);
5127
2.49M
    if ((name != NULL) &&
5128
2.49M
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
2.49M
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
2.49M
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
456k
  int i;
5132
456k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
456k
      (name[2] == 'l') && (name[3] == 0)) {
5134
352k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
352k
     "XML declaration allowed only at the start of the document\n");
5136
352k
      return(name);
5137
352k
  } else if (name[3] == 0) {
5138
6.81k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
6.81k
      return(name);
5140
6.81k
  }
5141
173k
  for (i = 0;;i++) {
5142
173k
      if (xmlW3CPIs[i] == NULL) break;
5143
135k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
59.4k
          return(name);
5145
135k
  }
5146
37.6k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
37.6k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
37.6k
          NULL, NULL);
5149
37.6k
    }
5150
2.07M
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
9.98k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
9.98k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
9.98k
    }
5154
2.07M
    return(name);
5155
2.49M
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
4.64k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
4.64k
    xmlChar *URL = NULL;
5176
4.64k
    const xmlChar *tmp, *base;
5177
4.64k
    xmlChar marker;
5178
5179
4.64k
    tmp = catalog;
5180
4.64k
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
4.64k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
1.10k
  goto error;
5183
3.53k
    tmp += 7;
5184
30.6k
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
3.53k
    if (*tmp != '=') {
5186
558
  return;
5187
558
    }
5188
2.97k
    tmp++;
5189
18.0k
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
2.97k
    marker = *tmp;
5191
2.97k
    if ((marker != '\'') && (marker != '"'))
5192
617
  goto error;
5193
2.36k
    tmp++;
5194
2.36k
    base = tmp;
5195
54.0k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
2.36k
    if (*tmp == 0)
5197
195
  goto error;
5198
2.16k
    URL = xmlStrndup(base, tmp - base);
5199
2.16k
    tmp++;
5200
15.5k
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
2.16k
    if (*tmp != 0)
5202
632
  goto error;
5203
5204
1.53k
    if (URL != NULL) {
5205
1.53k
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
1.53k
  xmlFree(URL);
5207
1.53k
    }
5208
1.53k
    return;
5209
5210
2.54k
error:
5211
2.54k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
2.54k
            "Catalog PI syntax error: %s\n",
5213
2.54k
      catalog, NULL);
5214
2.54k
    if (URL != NULL)
5215
632
  xmlFree(URL);
5216
2.54k
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
2.49M
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
2.49M
    xmlChar *buf = NULL;
5235
2.49M
    size_t len = 0;
5236
2.49M
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
2.49M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
371k
                       XML_MAX_HUGE_LENGTH :
5239
2.49M
                       XML_MAX_TEXT_LENGTH;
5240
2.49M
    int cur, l;
5241
2.49M
    const xmlChar *target;
5242
2.49M
    xmlParserInputState state;
5243
2.49M
    int count = 0;
5244
5245
2.49M
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
2.49M
  int inputid = ctxt->input->id;
5247
2.49M
  state = ctxt->instate;
5248
2.49M
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
2.49M
  SKIP(2);
5253
2.49M
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
2.49M
        target = xmlParsePITarget(ctxt);
5260
2.49M
  if (target != NULL) {
5261
2.41M
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
84.8k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
84.8k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
84.8k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
84.8k
        (ctxt->sax->processingInstruction != NULL))
5274
74.2k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
74.2k
                                         target, NULL);
5276
84.8k
    if (ctxt->instate != XML_PARSER_EOF)
5277
84.8k
        ctxt->instate = state;
5278
84.8k
    return;
5279
84.8k
      }
5280
2.32M
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
2.32M
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
2.32M
      if (SKIP_BLANKS == 0) {
5287
1.57M
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
1.57M
        "ParsePI: PI %s space expected\n", target);
5289
1.57M
      }
5290
2.32M
      cur = CUR_CHAR(l);
5291
98.3M
      while (IS_CHAR(cur) && /* checked */
5292
98.3M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
95.9M
    if (len + 5 >= size) {
5294
296k
        xmlChar *tmp;
5295
296k
                    size_t new_size = size * 2;
5296
296k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
296k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
296k
        buf = tmp;
5304
296k
                    size = new_size;
5305
296k
    }
5306
95.9M
    count++;
5307
95.9M
    if (count > 50) {
5308
1.40M
        SHRINK;
5309
1.40M
        GROW;
5310
1.40M
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
1.40M
        count = 0;
5315
1.40M
    }
5316
95.9M
    COPY_BUF(l,buf,len,cur);
5317
95.9M
    NEXTL(l);
5318
95.9M
    cur = CUR_CHAR(l);
5319
95.9M
    if (cur == 0) {
5320
1.81M
        SHRINK;
5321
1.81M
        GROW;
5322
1.81M
        cur = CUR_CHAR(l);
5323
1.81M
    }
5324
95.9M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
95.9M
      }
5332
2.32M
      buf[len] = 0;
5333
2.32M
      if (cur != '?') {
5334
1.84M
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
1.84M
          "ParsePI: PI %s never end ...\n", target);
5336
1.84M
      } else {
5337
484k
    if (inputid != ctxt->input->id) {
5338
135
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
135
                             "PI declaration doesn't start and stop in"
5340
135
                                   " the same entity\n");
5341
135
    }
5342
484k
    SKIP(2);
5343
5344
484k
#ifdef LIBXML_CATALOG_ENABLED
5345
484k
    if (((state == XML_PARSER_MISC) ||
5346
484k
               (state == XML_PARSER_START)) &&
5347
484k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
4.64k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
4.64k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
4.64k
      (allow == XML_CATA_ALLOW_ALL))
5351
4.64k
      xmlParseCatalogPI(ctxt, buf);
5352
4.64k
    }
5353
484k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
484k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
484k
        (ctxt->sax->processingInstruction != NULL))
5361
415k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
415k
                                         target, buf);
5363
484k
      }
5364
2.32M
      xmlFree(buf);
5365
2.32M
  } else {
5366
82.1k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
82.1k
  }
5368
2.40M
  if (ctxt->instate != XML_PARSER_EOF)
5369
2.40M
      ctxt->instate = state;
5370
2.40M
    }
5371
2.49M
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
203k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
203k
    const xmlChar *name;
5394
203k
    xmlChar *Pubid;
5395
203k
    xmlChar *Systemid;
5396
5397
203k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
203k
    SKIP(2);
5400
5401
203k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
202k
  int inputid = ctxt->input->id;
5403
202k
  SHRINK;
5404
202k
  SKIP(8);
5405
202k
  if (SKIP_BLANKS == 0) {
5406
540
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
540
         "Space required after '<!NOTATION'\n");
5408
540
      return;
5409
540
  }
5410
5411
201k
        name = xmlParseName(ctxt);
5412
201k
  if (name == NULL) {
5413
824
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
824
      return;
5415
824
  }
5416
200k
  if (xmlStrchr(name, ':') != NULL) {
5417
567
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
567
         "colons are forbidden from notation names '%s'\n",
5419
567
         name, NULL, NULL);
5420
567
  }
5421
200k
  if (SKIP_BLANKS == 0) {
5422
917
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
917
         "Space required after the NOTATION name'\n");
5424
917
      return;
5425
917
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
199k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
199k
  SKIP_BLANKS;
5432
5433
199k
  if (RAW == '>') {
5434
162k
      if (inputid != ctxt->input->id) {
5435
146
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
146
                         "Notation declaration doesn't start and stop"
5437
146
                               " in the same entity\n");
5438
146
      }
5439
162k
      NEXT;
5440
162k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
162k
    (ctxt->sax->notationDecl != NULL))
5442
139k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
162k
  } else {
5444
37.2k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
37.2k
  }
5446
199k
  if (Systemid != NULL) xmlFree(Systemid);
5447
199k
  if (Pubid != NULL) xmlFree(Pubid);
5448
199k
    }
5449
203k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
11.0M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
11.0M
    const xmlChar *name = NULL;
5478
11.0M
    xmlChar *value = NULL;
5479
11.0M
    xmlChar *URI = NULL, *literal = NULL;
5480
11.0M
    const xmlChar *ndata = NULL;
5481
11.0M
    int isParameter = 0;
5482
11.0M
    xmlChar *orig = NULL;
5483
5484
11.0M
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
11.0M
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
11.0M
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
11.0M
  int inputid = ctxt->input->id;
5491
11.0M
  SHRINK;
5492
11.0M
  SKIP(6);
5493
11.0M
  if (SKIP_BLANKS == 0) {
5494
10.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
10.0k
         "Space required after '<!ENTITY'\n");
5496
10.0k
  }
5497
5498
11.0M
  if (RAW == '%') {
5499
3.90M
      NEXT;
5500
3.90M
      if (SKIP_BLANKS == 0) {
5501
871
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
871
             "Space required after '%%'\n");
5503
871
      }
5504
3.90M
      isParameter = 1;
5505
3.90M
  }
5506
5507
11.0M
        name = xmlParseName(ctxt);
5508
11.0M
  if (name == NULL) {
5509
779k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
779k
                     "xmlParseEntityDecl: no name\n");
5511
779k
            return;
5512
779k
  }
5513
10.2M
  if (xmlStrchr(name, ':') != NULL) {
5514
2.26k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
2.26k
         "colons are forbidden from entities names '%s'\n",
5516
2.26k
         name, NULL, NULL);
5517
2.26k
  }
5518
10.2M
  if (SKIP_BLANKS == 0) {
5519
13.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
13.2k
         "Space required after the entity name\n");
5521
13.2k
  }
5522
5523
10.2M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
10.2M
  if (isParameter) {
5528
3.90M
      if ((RAW == '"') || (RAW == '\'')) {
5529
3.84M
          value = xmlParseEntityValue(ctxt, &orig);
5530
3.84M
    if (value) {
5531
3.83M
        if ((ctxt->sax != NULL) &&
5532
3.83M
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
3.73M
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
3.73M
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
3.73M
            NULL, NULL, value);
5536
3.83M
    }
5537
3.84M
      } else {
5538
55.2k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
55.2k
    if ((URI == NULL) && (literal == NULL)) {
5540
2.20k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
2.20k
    }
5542
55.2k
    if (URI) {
5543
52.7k
        xmlURIPtr uri;
5544
5545
52.7k
        uri = xmlParseURI((const char *) URI);
5546
52.7k
        if (uri == NULL) {
5547
1.69k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
1.69k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
51.0k
        } else {
5555
51.0k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
163
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
50.8k
      } else {
5562
50.8k
          if ((ctxt->sax != NULL) &&
5563
50.8k
        (!ctxt->disableSAX) &&
5564
50.8k
        (ctxt->sax->entityDecl != NULL))
5565
50.0k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
50.0k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
50.0k
              literal, URI, NULL);
5568
50.8k
      }
5569
51.0k
      xmlFreeURI(uri);
5570
51.0k
        }
5571
52.7k
    }
5572
55.2k
      }
5573
6.35M
  } else {
5574
6.35M
      if ((RAW == '"') || (RAW == '\'')) {
5575
5.45M
          value = xmlParseEntityValue(ctxt, &orig);
5576
5.45M
    if ((ctxt->sax != NULL) &&
5577
5.45M
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
4.67M
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
4.67M
        XML_INTERNAL_GENERAL_ENTITY,
5580
4.67M
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
5.45M
    if ((ctxt->myDoc == NULL) ||
5585
5.45M
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
222k
        if (ctxt->myDoc == NULL) {
5587
4.73k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
4.73k
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
4.73k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
4.73k
        }
5594
222k
        if (ctxt->myDoc->intSubset == NULL)
5595
4.73k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
4.73k
              BAD_CAST "fake", NULL, NULL);
5597
5598
222k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
222k
                    NULL, NULL, value);
5600
222k
    }
5601
5.45M
      } else {
5602
895k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
895k
    if ((URI == NULL) && (literal == NULL)) {
5604
27.1k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
27.1k
    }
5606
895k
    if (URI) {
5607
850k
        xmlURIPtr uri;
5608
5609
850k
        uri = xmlParseURI((const char *)URI);
5610
850k
        if (uri == NULL) {
5611
30.0k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
30.0k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
820k
        } else {
5619
820k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
4.35k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
4.35k
      }
5626
820k
      xmlFreeURI(uri);
5627
820k
        }
5628
850k
    }
5629
895k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
19.3k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
19.3k
           "Space required before 'NDATA'\n");
5632
19.3k
    }
5633
895k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
384k
        SKIP(5);
5635
384k
        if (SKIP_BLANKS == 0) {
5636
14.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
14.5k
               "Space required after 'NDATA'\n");
5638
14.5k
        }
5639
384k
        ndata = xmlParseName(ctxt);
5640
384k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
384k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
310k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
310k
            literal, URI, ndata);
5644
510k
    } else {
5645
510k
        if ((ctxt->sax != NULL) &&
5646
510k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
471k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
471k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
471k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
510k
        if ((ctxt->replaceEntities != 0) &&
5655
510k
      ((ctxt->myDoc == NULL) ||
5656
273k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
3.45k
      if (ctxt->myDoc == NULL) {
5658
1.80k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
1.80k
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
1.80k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
1.80k
      }
5665
5666
3.45k
      if (ctxt->myDoc->intSubset == NULL)
5667
1.80k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
1.80k
            BAD_CAST "fake", NULL, NULL);
5669
3.45k
      xmlSAX2EntityDecl(ctxt, name,
5670
3.45k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
3.45k
                  literal, URI, NULL);
5672
3.45k
        }
5673
510k
    }
5674
895k
      }
5675
6.35M
  }
5676
10.2M
  if (ctxt->instate == XML_PARSER_EOF)
5677
881
      goto done;
5678
10.2M
  SKIP_BLANKS;
5679
10.2M
  if (RAW != '>') {
5680
35.0k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
35.0k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
35.0k
      xmlHaltParser(ctxt);
5683
10.2M
  } else {
5684
10.2M
      if (inputid != ctxt->input->id) {
5685
3.12k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
3.12k
                         "Entity declaration doesn't start and stop in"
5687
3.12k
                               " the same entity\n");
5688
3.12k
      }
5689
10.2M
      NEXT;
5690
10.2M
  }
5691
10.2M
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
9.26M
      xmlEntityPtr cur = NULL;
5696
5697
9.26M
      if (isParameter) {
5698
3.83M
          if ((ctxt->sax != NULL) &&
5699
3.83M
        (ctxt->sax->getParameterEntity != NULL))
5700
3.83M
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
5.43M
      } else {
5702
5.43M
          if ((ctxt->sax != NULL) &&
5703
5.43M
        (ctxt->sax->getEntity != NULL))
5704
5.43M
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
5.43M
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
548k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
548k
    }
5708
5.43M
      }
5709
9.26M
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
8.50M
    cur->orig = orig;
5711
8.50M
                orig = NULL;
5712
8.50M
      }
5713
9.26M
  }
5714
5715
10.2M
done:
5716
10.2M
  if (value != NULL) xmlFree(value);
5717
10.2M
  if (URI != NULL) xmlFree(URI);
5718
10.2M
  if (literal != NULL) xmlFree(literal);
5719
10.2M
        if (orig != NULL) xmlFree(orig);
5720
10.2M
    }
5721
11.0M
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
24.8M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
24.8M
    int val;
5757
24.8M
    xmlChar *ret;
5758
5759
24.8M
    *value = NULL;
5760
24.8M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
1.82M
  SKIP(9);
5762
1.82M
  return(XML_ATTRIBUTE_REQUIRED);
5763
1.82M
    }
5764
23.0M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
21.1M
  SKIP(8);
5766
21.1M
  return(XML_ATTRIBUTE_IMPLIED);
5767
21.1M
    }
5768
1.86M
    val = XML_ATTRIBUTE_NONE;
5769
1.86M
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
1.15M
  SKIP(6);
5771
1.15M
  val = XML_ATTRIBUTE_FIXED;
5772
1.15M
  if (SKIP_BLANKS == 0) {
5773
492
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
492
         "Space required after '#FIXED'\n");
5775
492
  }
5776
1.15M
    }
5777
1.86M
    ret = xmlParseAttValue(ctxt);
5778
1.86M
    ctxt->instate = XML_PARSER_DTD;
5779
1.86M
    if (ret == NULL) {
5780
11.1k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
11.1k
           "Attribute default value declaration error\n");
5782
11.1k
    } else
5783
1.84M
        *value = ret;
5784
1.86M
    return(val);
5785
23.0M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
21.7k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
21.7k
    const xmlChar *name;
5809
21.7k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
21.7k
    if (RAW != '(') {
5812
969
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
969
  return(NULL);
5814
969
    }
5815
20.7k
    SHRINK;
5816
29.0k
    do {
5817
29.0k
        NEXT;
5818
29.0k
  SKIP_BLANKS;
5819
29.0k
        name = xmlParseName(ctxt);
5820
29.0k
  if (name == NULL) {
5821
848
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
848
         "Name expected in NOTATION declaration\n");
5823
848
            xmlFreeEnumeration(ret);
5824
848
      return(NULL);
5825
848
  }
5826
28.2k
  tmp = ret;
5827
50.8k
  while (tmp != NULL) {
5828
24.1k
      if (xmlStrEqual(name, tmp->name)) {
5829
1.51k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
1.51k
    "standalone: attribute notation value token %s duplicated\n",
5831
1.51k
         name, NULL);
5832
1.51k
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
1.51k
    break;
5835
1.51k
      }
5836
22.5k
      tmp = tmp->next;
5837
22.5k
  }
5838
28.2k
  if (tmp == NULL) {
5839
26.7k
      cur = xmlCreateEnumeration(name);
5840
26.7k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
26.7k
      if (last == NULL) ret = last = cur;
5845
6.61k
      else {
5846
6.61k
    last->next = cur;
5847
6.61k
    last = cur;
5848
6.61k
      }
5849
26.7k
  }
5850
28.2k
  SKIP_BLANKS;
5851
28.2k
    } while (RAW == '|');
5852
19.9k
    if (RAW != ')') {
5853
5.55k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
5.55k
        xmlFreeEnumeration(ret);
5855
5.55k
  return(NULL);
5856
5.55k
    }
5857
14.3k
    NEXT;
5858
14.3k
    return(ret);
5859
19.9k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
2.38M
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
2.38M
    xmlChar *name;
5881
2.38M
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
2.38M
    if (RAW != '(') {
5884
20.8k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
20.8k
  return(NULL);
5886
20.8k
    }
5887
2.36M
    SHRINK;
5888
7.46M
    do {
5889
7.46M
        NEXT;
5890
7.46M
  SKIP_BLANKS;
5891
7.46M
        name = xmlParseNmtoken(ctxt);
5892
7.46M
  if (name == NULL) {
5893
1.74k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
1.74k
      return(ret);
5895
1.74k
  }
5896
7.46M
  tmp = ret;
5897
19.9M
  while (tmp != NULL) {
5898
12.4M
      if (xmlStrEqual(name, tmp->name)) {
5899
4.91k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
4.91k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
4.91k
         name, NULL);
5902
4.91k
    if (!xmlDictOwns(ctxt->dict, name))
5903
4.91k
        xmlFree(name);
5904
4.91k
    break;
5905
4.91k
      }
5906
12.4M
      tmp = tmp->next;
5907
12.4M
  }
5908
7.46M
  if (tmp == NULL) {
5909
7.45M
      cur = xmlCreateEnumeration(name);
5910
7.45M
      if (!xmlDictOwns(ctxt->dict, name))
5911
7.45M
    xmlFree(name);
5912
7.45M
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
7.45M
      if (last == NULL) ret = last = cur;
5917
5.09M
      else {
5918
5.09M
    last->next = cur;
5919
5.09M
    last = cur;
5920
5.09M
      }
5921
7.45M
  }
5922
7.46M
  SKIP_BLANKS;
5923
7.46M
    } while (RAW == '|');
5924
2.36M
    if (RAW != ')') {
5925
6.54k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
6.54k
  return(ret);
5927
6.54k
    }
5928
2.35M
    NEXT;
5929
2.35M
    return(ret);
5930
2.36M
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
2.40M
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
2.40M
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
22.0k
  SKIP(8);
5953
22.0k
  if (SKIP_BLANKS == 0) {
5954
276
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
276
         "Space required after 'NOTATION'\n");
5956
276
      return(0);
5957
276
  }
5958
21.7k
  *tree = xmlParseNotationType(ctxt);
5959
21.7k
  if (*tree == NULL) return(0);
5960
14.3k
  return(XML_ATTRIBUTE_NOTATION);
5961
21.7k
    }
5962
2.38M
    *tree = xmlParseEnumerationType(ctxt);
5963
2.38M
    if (*tree == NULL) return(0);
5964
2.36M
    return(XML_ATTRIBUTE_ENUMERATION);
5965
2.38M
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
24.9M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
24.9M
    SHRINK;
6017
24.9M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
8.59M
  SKIP(5);
6019
8.59M
  return(XML_ATTRIBUTE_CDATA);
6020
16.3M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
82.3k
  SKIP(6);
6022
82.3k
  return(XML_ATTRIBUTE_IDREFS);
6023
16.2M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
366k
  SKIP(5);
6025
366k
  return(XML_ATTRIBUTE_IDREF);
6026
15.8M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
6.90M
        SKIP(2);
6028
6.90M
  return(XML_ATTRIBUTE_ID);
6029
8.95M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
186k
  SKIP(6);
6031
186k
  return(XML_ATTRIBUTE_ENTITY);
6032
8.76M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
86.9k
  SKIP(8);
6034
86.9k
  return(XML_ATTRIBUTE_ENTITIES);
6035
8.68M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
887k
  SKIP(8);
6037
887k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
7.79M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
5.38M
  SKIP(7);
6040
5.38M
  return(XML_ATTRIBUTE_NMTOKEN);
6041
5.38M
     }
6042
2.40M
     return(xmlParseEnumeratedType(ctxt, tree));
6043
24.9M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
8.77M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
8.77M
    const xmlChar *elemName;
6061
8.77M
    const xmlChar *attrName;
6062
8.77M
    xmlEnumerationPtr tree;
6063
6064
8.77M
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
8.77M
    SKIP(2);
6067
6068
8.77M
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
8.77M
  int inputid = ctxt->input->id;
6070
6071
8.77M
  SKIP(7);
6072
8.77M
  if (SKIP_BLANKS == 0) {
6073
10.8k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
10.8k
                     "Space required after '<!ATTLIST'\n");
6075
10.8k
  }
6076
8.77M
        elemName = xmlParseName(ctxt);
6077
8.77M
  if (elemName == NULL) {
6078
9.13k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
9.13k
         "ATTLIST: no name for Element\n");
6080
9.13k
      return;
6081
9.13k
  }
6082
8.76M
  SKIP_BLANKS;
6083
8.76M
  GROW;
6084
33.6M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
24.9M
      int type;
6086
24.9M
      int def;
6087
24.9M
      xmlChar *defaultValue = NULL;
6088
6089
24.9M
      GROW;
6090
24.9M
            tree = NULL;
6091
24.9M
      attrName = xmlParseName(ctxt);
6092
24.9M
      if (attrName == NULL) {
6093
18.1k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
18.1k
             "ATTLIST: no name for Attribute\n");
6095
18.1k
    break;
6096
18.1k
      }
6097
24.9M
      GROW;
6098
24.9M
      if (SKIP_BLANKS == 0) {
6099
9.60k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
9.60k
            "Space required after the attribute name\n");
6101
9.60k
    break;
6102
9.60k
      }
6103
6104
24.9M
      type = xmlParseAttributeType(ctxt, &tree);
6105
24.9M
      if (type <= 0) {
6106
29.4k
          break;
6107
29.4k
      }
6108
6109
24.8M
      GROW;
6110
24.8M
      if (SKIP_BLANKS == 0) {
6111
10.3k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
10.3k
             "Space required after the attribute type\n");
6113
10.3k
          if (tree != NULL)
6114
7.94k
        xmlFreeEnumeration(tree);
6115
10.3k
    break;
6116
10.3k
      }
6117
6118
24.8M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
24.8M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
24.8M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
727k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
24.8M
      GROW;
6130
24.8M
            if (RAW != '>') {
6131
23.1M
    if (SKIP_BLANKS == 0) {
6132
24.5k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
24.5k
      "Space required after the attribute default value\n");
6134
24.5k
        if (defaultValue != NULL)
6135
14.1k
      xmlFree(defaultValue);
6136
24.5k
        if (tree != NULL)
6137
2.93k
      xmlFreeEnumeration(tree);
6138
24.5k
        break;
6139
24.5k
    }
6140
23.1M
      }
6141
24.8M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
24.8M
    (ctxt->sax->attributeDecl != NULL))
6143
23.5M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
23.5M
                          type, def, defaultValue, tree);
6145
1.28M
      else if (tree != NULL)
6146
102k
    xmlFreeEnumeration(tree);
6147
6148
24.8M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
24.8M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
24.8M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
1.21M
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
1.21M
      }
6153
24.8M
      if (ctxt->sax2) {
6154
16.6M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
16.6M
      }
6156
24.8M
      if (defaultValue != NULL)
6157
1.83M
          xmlFree(defaultValue);
6158
24.8M
      GROW;
6159
24.8M
  }
6160
8.76M
  if (RAW == '>') {
6161
8.69M
      if (inputid != ctxt->input->id) {
6162
39.7k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
39.7k
                               "Attribute list declaration doesn't start and"
6164
39.7k
                               " stop in the same entity\n");
6165
39.7k
      }
6166
8.69M
      NEXT;
6167
8.69M
  }
6168
8.76M
    }
6169
8.77M
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
4.68M
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
4.68M
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
4.68M
    const xmlChar *elem = NULL;
6196
6197
4.68M
    GROW;
6198
4.68M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
4.68M
  SKIP(7);
6200
4.68M
  SKIP_BLANKS;
6201
4.68M
  SHRINK;
6202
4.68M
  if (RAW == ')') {
6203
2.92M
      if (ctxt->input->id != inputchk) {
6204
78
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
78
                               "Element content declaration doesn't start and"
6206
78
                               " stop in the same entity\n");
6207
78
      }
6208
2.92M
      NEXT;
6209
2.92M
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
2.92M
      if (ret == NULL)
6211
0
          return(NULL);
6212
2.92M
      if (RAW == '*') {
6213
356
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
356
    NEXT;
6215
356
      }
6216
2.92M
      return(ret);
6217
2.92M
  }
6218
1.76M
  if ((RAW == '(') || (RAW == '|')) {
6219
1.76M
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
1.76M
      if (ret == NULL) return(NULL);
6221
1.76M
  }
6222
20.3M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
18.5M
      NEXT;
6224
18.5M
      if (elem == NULL) {
6225
1.76M
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
1.76M
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
1.76M
    ret->c1 = cur;
6231
1.76M
    if (cur != NULL)
6232
1.76M
        cur->parent = ret;
6233
1.76M
    cur = ret;
6234
16.7M
      } else {
6235
16.7M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
16.7M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
16.7M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
16.7M
    if (n->c1 != NULL)
6242
16.7M
        n->c1->parent = n;
6243
16.7M
          cur->c2 = n;
6244
16.7M
    if (n != NULL)
6245
16.7M
        n->parent = cur;
6246
16.7M
    cur = n;
6247
16.7M
      }
6248
18.5M
      SKIP_BLANKS;
6249
18.5M
      elem = xmlParseName(ctxt);
6250
18.5M
      if (elem == NULL) {
6251
941
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
941
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
941
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
941
    return(NULL);
6255
941
      }
6256
18.5M
      SKIP_BLANKS;
6257
18.5M
      GROW;
6258
18.5M
  }
6259
1.76M
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
1.65M
      if (elem != NULL) {
6261
1.65M
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
1.65M
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
1.65M
    if (cur->c2 != NULL)
6264
1.65M
        cur->c2->parent = cur;
6265
1.65M
            }
6266
1.65M
            if (ret != NULL)
6267
1.65M
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
1.65M
      if (ctxt->input->id != inputchk) {
6269
864
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
864
                               "Element content declaration doesn't start and"
6271
864
                               " stop in the same entity\n");
6272
864
      }
6273
1.65M
      SKIP(2);
6274
1.65M
  } else {
6275
116k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
116k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
116k
      return(NULL);
6278
116k
  }
6279
6280
1.76M
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
1.65M
    return(ret);
6284
4.68M
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
5.75M
                                       int depth) {
6321
5.75M
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
5.75M
    const xmlChar *elem;
6323
5.75M
    xmlChar type = 0;
6324
6325
5.75M
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
5.75M
        (depth >  2048)) {
6327
121
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
121
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
121
                          depth);
6330
121
  return(NULL);
6331
121
    }
6332
5.75M
    SKIP_BLANKS;
6333
5.75M
    GROW;
6334
5.75M
    if (RAW == '(') {
6335
330k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
330k
  NEXT;
6339
330k
  SKIP_BLANKS;
6340
330k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
330k
                                                           depth + 1);
6342
330k
        if (cur == NULL)
6343
146k
            return(NULL);
6344
184k
  SKIP_BLANKS;
6345
184k
  GROW;
6346
5.42M
    } else {
6347
5.42M
  elem = xmlParseName(ctxt);
6348
5.42M
  if (elem == NULL) {
6349
369k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
369k
      return(NULL);
6351
369k
  }
6352
5.05M
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
5.05M
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
5.05M
  GROW;
6358
5.05M
  if (RAW == '?') {
6359
380k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
380k
      NEXT;
6361
4.67M
  } else if (RAW == '*') {
6362
301k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
301k
      NEXT;
6364
4.36M
  } else if (RAW == '+') {
6365
913k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
913k
      NEXT;
6367
3.45M
  } else {
6368
3.45M
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
3.45M
  }
6370
5.05M
  GROW;
6371
5.05M
    }
6372
5.23M
    SKIP_BLANKS;
6373
5.23M
    SHRINK;
6374
26.1M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
21.0M
        if (RAW == ',') {
6379
4.15M
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
2.43M
      else if (type != CUR) {
6385
223
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
223
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
223
                      type);
6388
223
    if ((last != NULL) && (last != ret))
6389
223
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
223
    if (ret != NULL)
6391
223
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
223
    return(NULL);
6393
223
      }
6394
4.15M
      NEXT;
6395
6396
4.15M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
4.15M
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
4.15M
      if (last == NULL) {
6404
1.71M
    op->c1 = ret;
6405
1.71M
    if (ret != NULL)
6406
1.71M
        ret->parent = op;
6407
1.71M
    ret = cur = op;
6408
2.43M
      } else {
6409
2.43M
          cur->c2 = op;
6410
2.43M
    if (op != NULL)
6411
2.43M
        op->parent = cur;
6412
2.43M
    op->c1 = last;
6413
2.43M
    if (last != NULL)
6414
2.43M
        last->parent = op;
6415
2.43M
    cur =op;
6416
2.43M
    last = NULL;
6417
2.43M
      }
6418
16.8M
  } else if (RAW == '|') {
6419
16.8M
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
14.9M
      else if (type != CUR) {
6425
169
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
169
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
169
          type);
6428
169
    if ((last != NULL) && (last != ret))
6429
169
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
169
    if (ret != NULL)
6431
169
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
169
    return(NULL);
6433
169
      }
6434
16.8M
      NEXT;
6435
6436
16.8M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
16.8M
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
16.8M
      if (last == NULL) {
6445
1.89M
    op->c1 = ret;
6446
1.89M
    if (ret != NULL)
6447
1.89M
        ret->parent = op;
6448
1.89M
    ret = cur = op;
6449
14.9M
      } else {
6450
14.9M
          cur->c2 = op;
6451
14.9M
    if (op != NULL)
6452
14.9M
        op->parent = cur;
6453
14.9M
    op->c1 = last;
6454
14.9M
    if (last != NULL)
6455
14.9M
        last->parent = op;
6456
14.9M
    cur =op;
6457
14.9M
    last = NULL;
6458
14.9M
      }
6459
16.8M
  } else {
6460
68.6k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
68.6k
      if ((last != NULL) && (last != ret))
6462
17.0k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
68.6k
      if (ret != NULL)
6464
68.6k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
68.6k
      return(NULL);
6466
68.6k
  }
6467
20.9M
  GROW;
6468
20.9M
  SKIP_BLANKS;
6469
20.9M
  GROW;
6470
20.9M
  if (RAW == '(') {
6471
892k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
892k
      NEXT;
6474
892k
      SKIP_BLANKS;
6475
892k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
892k
                                                          depth + 1);
6477
892k
            if (last == NULL) {
6478
5.97k
    if (ret != NULL)
6479
5.97k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
5.97k
    return(NULL);
6481
5.97k
            }
6482
886k
      SKIP_BLANKS;
6483
20.0M
  } else {
6484
20.0M
      elem = xmlParseName(ctxt);
6485
20.0M
      if (elem == NULL) {
6486
5.13k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
5.13k
    if (ret != NULL)
6488
5.13k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
5.13k
    return(NULL);
6490
5.13k
      }
6491
20.0M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
20.0M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
20.0M
      if (RAW == '?') {
6498
2.26M
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
2.26M
    NEXT;
6500
17.8M
      } else if (RAW == '*') {
6501
902k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
902k
    NEXT;
6503
16.9M
      } else if (RAW == '+') {
6504
327k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
327k
    NEXT;
6506
16.5M
      } else {
6507
16.5M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
16.5M
      }
6509
20.0M
  }
6510
20.9M
  SKIP_BLANKS;
6511
20.9M
  GROW;
6512
20.9M
    }
6513
5.15M
    if ((cur != NULL) && (last != NULL)) {
6514
3.58M
        cur->c2 = last;
6515
3.58M
  if (last != NULL)
6516
3.58M
      last->parent = cur;
6517
3.58M
    }
6518
5.15M
    if (ctxt->input->id != inputchk) {
6519
5.40k
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
5.40k
                       "Element content declaration doesn't start and stop in"
6521
5.40k
                       " the same entity\n");
6522
5.40k
    }
6523
5.15M
    NEXT;
6524
5.15M
    if (RAW == '?') {
6525
125k
  if (ret != NULL) {
6526
125k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
125k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
524
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
124k
      else
6530
124k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
125k
  }
6532
125k
  NEXT;
6533
5.03M
    } else if (RAW == '*') {
6534
1.17M
  if (ret != NULL) {
6535
1.17M
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
1.17M
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
10.4M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
9.25M
    if ((cur->c1 != NULL) &&
6543
9.25M
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
9.25M
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
764k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
9.25M
    if ((cur->c2 != NULL) &&
6547
9.25M
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
9.25M
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
125k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
9.25M
    cur = cur->c2;
6551
9.25M
      }
6552
1.17M
  }
6553
1.17M
  NEXT;
6554
3.85M
    } else if (RAW == '+') {
6555
1.02M
  if (ret != NULL) {
6556
1.02M
      int found = 0;
6557
6558
1.02M
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
1.02M
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
344
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
1.02M
      else
6562
1.02M
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
1.69M
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
672k
    if ((cur->c1 != NULL) &&
6570
672k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
672k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
1.00k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
1.00k
        found = 1;
6574
1.00k
    }
6575
672k
    if ((cur->c2 != NULL) &&
6576
672k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
672k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
421
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
421
        found = 1;
6580
421
    }
6581
672k
    cur = cur->c2;
6582
672k
      }
6583
1.02M
      if (found)
6584
1.08k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
1.02M
  }
6586
1.02M
  NEXT;
6587
1.02M
    }
6588
5.15M
    return(ret);
6589
5.23M
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
9.21M
                           xmlElementContentPtr *result) {
6648
6649
9.21M
    xmlElementContentPtr tree = NULL;
6650
9.21M
    int inputid = ctxt->input->id;
6651
9.21M
    int res;
6652
6653
9.21M
    *result = NULL;
6654
6655
9.21M
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
9.21M
    NEXT;
6661
9.21M
    GROW;
6662
9.21M
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
9.21M
    SKIP_BLANKS;
6665
9.21M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
4.68M
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
4.68M
  res = XML_ELEMENT_TYPE_MIXED;
6668
4.68M
    } else {
6669
4.52M
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
4.52M
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
4.52M
    }
6672
9.21M
    SKIP_BLANKS;
6673
9.21M
    *result = tree;
6674
9.21M
    return(res);
6675
9.21M
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
10.6M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
10.6M
    const xmlChar *name;
6695
10.6M
    int ret = -1;
6696
10.6M
    xmlElementContentPtr content  = NULL;
6697
6698
10.6M
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
10.6M
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
10.6M
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
10.6M
  int inputid = ctxt->input->id;
6705
6706
10.6M
  SKIP(7);
6707
10.6M
  if (SKIP_BLANKS == 0) {
6708
28.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
28.9k
               "Space required after 'ELEMENT'\n");
6710
28.9k
      return(-1);
6711
28.9k
  }
6712
10.6M
        name = xmlParseName(ctxt);
6713
10.6M
  if (name == NULL) {
6714
1.80k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
1.80k
         "xmlParseElementDecl: no name for Element\n");
6716
1.80k
      return(-1);
6717
1.80k
  }
6718
10.6M
  if (SKIP_BLANKS == 0) {
6719
12.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
12.9k
         "Space required after the element name\n");
6721
12.9k
  }
6722
10.6M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
1.29M
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
1.29M
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
9.31M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
9.31M
             (NXT(2) == 'Y')) {
6730
79.1k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
79.1k
      ret = XML_ELEMENT_TYPE_ANY;
6735
9.23M
  } else if (RAW == '(') {
6736
9.21M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
9.21M
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
16.7k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
16.7k
          (ctxt->inputNr == 1)) {
6743
388
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
388
    "PEReference: forbidden within markup decl in internal subset\n");
6745
16.3k
      } else {
6746
16.3k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
16.3k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
16.3k
            }
6749
16.7k
      return(-1);
6750
16.7k
  }
6751
6752
10.5M
  SKIP_BLANKS;
6753
6754
10.5M
  if (RAW != '>') {
6755
278k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
278k
      if (content != NULL) {
6757
10.3k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
10.3k
      }
6759
10.3M
  } else {
6760
10.3M
      if (inputid != ctxt->input->id) {
6761
4.28k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
4.28k
                               "Element declaration doesn't start and stop in"
6763
4.28k
                               " the same entity\n");
6764
4.28k
      }
6765
6766
10.3M
      NEXT;
6767
10.3M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
10.3M
    (ctxt->sax->elementDecl != NULL)) {
6769
9.44M
    if (content != NULL)
6770
7.91M
        content->parent = NULL;
6771
9.44M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
9.44M
                           content);
6773
9.44M
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
293k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
293k
    }
6782
9.44M
      } else if (content != NULL) {
6783
728k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
728k
      }
6785
10.3M
  }
6786
10.5M
    }
6787
10.5M
    return(ret);
6788
10.6M
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
18.1k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
18.1k
    int *inputIds = NULL;
6806
18.1k
    size_t inputIdsSize = 0;
6807
18.1k
    size_t depth = 0;
6808
6809
86.3k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
86.0k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
42.6k
            int id = ctxt->input->id;
6812
6813
42.6k
            SKIP(3);
6814
42.6k
            SKIP_BLANKS;
6815
6816
42.6k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
33.0k
                SKIP(7);
6818
33.0k
                SKIP_BLANKS;
6819
33.0k
                if (RAW != '[') {
6820
215
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
215
                    xmlHaltParser(ctxt);
6822
215
                    goto error;
6823
215
                }
6824
32.8k
                if (ctxt->input->id != id) {
6825
57
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
57
                                   "All markup of the conditional section is"
6827
57
                                   " not in the same entity\n");
6828
57
                }
6829
32.8k
                NEXT;
6830
6831
32.8k
                if (inputIdsSize <= depth) {
6832
11.7k
                    int *tmp;
6833
6834
11.7k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
11.7k
                    tmp = (int *) xmlRealloc(inputIds,
6836
11.7k
                            inputIdsSize * sizeof(int));
6837
11.7k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
11.7k
                    inputIds = tmp;
6842
11.7k
                }
6843
32.8k
                inputIds[depth] = id;
6844
32.8k
                depth++;
6845
32.8k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
7.50k
                size_t ignoreDepth = 0;
6847
6848
7.50k
                SKIP(6);
6849
7.50k
                SKIP_BLANKS;
6850
7.50k
                if (RAW != '[') {
6851
166
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
166
                    xmlHaltParser(ctxt);
6853
166
                    goto error;
6854
166
                }
6855
7.33k
                if (ctxt->input->id != id) {
6856
31
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
31
                                   "All markup of the conditional section is"
6858
31
                                   " not in the same entity\n");
6859
31
                }
6860
7.33k
                NEXT;
6861
6862
2.98M
                while (RAW != 0) {
6863
2.97M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
28.2k
                        SKIP(3);
6865
28.2k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
28.2k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
2.95M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
2.95M
                               (NXT(2) == '>')) {
6873
11.2k
                        if (ignoreDepth == 0)
6874
4.75k
                            break;
6875
6.48k
                        SKIP(3);
6876
6.48k
                        ignoreDepth--;
6877
2.93M
                    } else {
6878
2.93M
                        NEXT;
6879
2.93M
                    }
6880
2.97M
                }
6881
6882
7.33k
    if (RAW == 0) {
6883
2.58k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
2.58k
                    goto error;
6885
2.58k
    }
6886
4.75k
                if (ctxt->input->id != id) {
6887
6
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
6
                                   "All markup of the conditional section is"
6889
6
                                   " not in the same entity\n");
6890
6
                }
6891
4.75k
                SKIP(3);
6892
4.75k
            } else {
6893
2.06k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
2.06k
                xmlHaltParser(ctxt);
6895
2.06k
                goto error;
6896
2.06k
            }
6897
43.4k
        } else if ((depth > 0) &&
6898
43.4k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
18.6k
            depth--;
6900
18.6k
            if (ctxt->input->id != inputIds[depth]) {
6901
437
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
437
                               "All markup of the conditional section is not"
6903
437
                               " in the same entity\n");
6904
437
            }
6905
18.6k
            SKIP(3);
6906
24.7k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
21.6k
            xmlParseMarkupDecl(ctxt);
6908
21.6k
        } else {
6909
3.16k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
3.16k
            xmlHaltParser(ctxt);
6911
3.16k
            goto error;
6912
3.16k
        }
6913
6914
77.8k
        if (depth == 0)
6915
9.66k
            break;
6916
6917
68.2k
        SKIP_BLANKS;
6918
68.2k
        GROW;
6919
68.2k
    }
6920
6921
18.1k
error:
6922
18.1k
    xmlFree(inputIds);
6923
18.1k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
489M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
489M
    GROW;
6952
489M
    if (CUR == '<') {
6953
489M
        if (NXT(1) == '!') {
6954
487M
      switch (NXT(2)) {
6955
21.6M
          case 'E':
6956
21.6M
        if (NXT(3) == 'L')
6957
10.6M
      xmlParseElementDecl(ctxt);
6958
11.0M
        else if (NXT(3) == 'N')
6959
11.0M
      xmlParseEntityDecl(ctxt);
6960
1.70k
                    else
6961
1.70k
                        SKIP(2);
6962
21.6M
        break;
6963
8.77M
          case 'A':
6964
8.77M
        xmlParseAttributeListDecl(ctxt);
6965
8.77M
        break;
6966
203k
          case 'N':
6967
203k
        xmlParseNotationDecl(ctxt);
6968
203k
        break;
6969
455M
          case '-':
6970
455M
        xmlParseComment(ctxt);
6971
455M
        break;
6972
1.07M
    default:
6973
        /* there is an error but it will be detected later */
6974
1.07M
                    SKIP(2);
6975
1.07M
        break;
6976
487M
      }
6977
487M
  } else if (NXT(1) == '?') {
6978
1.91M
      xmlParsePI(ctxt);
6979
1.91M
  }
6980
489M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
489M
    if (ctxt->instate == XML_PARSER_EOF)
6987
36.4k
        return;
6988
6989
488M
    ctxt->instate = XML_PARSER_DTD;
6990
488M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
29.0k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
29.0k
    xmlChar *version;
7006
29.0k
    const xmlChar *encoding;
7007
29.0k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
29.0k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
28.6k
  SKIP(5);
7014
28.6k
    } else {
7015
413
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
413
  return;
7017
413
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
28.6k
    oldstate = ctxt->instate;
7021
28.6k
    ctxt->instate = XML_PARSER_START;
7022
7023
28.6k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
28.6k
    version = xmlParseVersionInfo(ctxt);
7032
28.6k
    if (version == NULL)
7033
3.09k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
25.5k
    else {
7035
25.5k
  if (SKIP_BLANKS == 0) {
7036
1.86k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
1.86k
               "Space needed here\n");
7038
1.86k
  }
7039
25.5k
    }
7040
28.6k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
28.6k
    encoding = xmlParseEncodingDecl(ctxt);
7046
28.6k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
28.6k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
511
        ctxt->instate = oldstate;
7053
511
        return;
7054
511
    }
7055
28.1k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
6.86k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
6.86k
           "Missing encoding in text declaration\n");
7058
6.86k
    }
7059
7060
28.1k
    SKIP_BLANKS;
7061
28.1k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
20.8k
        SKIP(2);
7063
20.8k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
258
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
258
  NEXT;
7067
6.98k
    } else {
7068
6.98k
        int c;
7069
7070
6.98k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
1.34M
        while ((c = CUR) != 0) {
7072
1.34M
            NEXT;
7073
1.34M
            if (c == '>')
7074
4.40k
                break;
7075
1.34M
        }
7076
6.98k
    }
7077
7078
28.1k
    ctxt->instate = oldstate;
7079
28.1k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
133k
                       const xmlChar *SystemID) {
7096
133k
    xmlDetectSAX2(ctxt);
7097
133k
    GROW;
7098
7099
133k
    if ((ctxt->encoding == NULL) &&
7100
133k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
133k
        xmlChar start[4];
7102
133k
  xmlCharEncoding enc;
7103
7104
133k
  start[0] = RAW;
7105
133k
  start[1] = NXT(1);
7106
133k
  start[2] = NXT(2);
7107
133k
  start[3] = NXT(3);
7108
133k
  enc = xmlDetectCharEncoding(start, 4);
7109
133k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
21.0k
      xmlSwitchEncoding(ctxt, enc);
7111
133k
    }
7112
7113
133k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
19.0k
  xmlParseTextDecl(ctxt);
7115
19.0k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
390
      xmlHaltParser(ctxt);
7120
390
      return;
7121
390
  }
7122
19.0k
    }
7123
133k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
133k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
133k
    ctxt->instate = XML_PARSER_DTD;
7135
133k
    ctxt->external = 1;
7136
133k
    SKIP_BLANKS;
7137
66.7M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
66.6M
  GROW;
7139
66.6M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
18.1k
            xmlParseConditionalSections(ctxt);
7141
66.6M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
66.6M
            xmlParseMarkupDecl(ctxt);
7143
66.6M
        } else {
7144
37.7k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
37.7k
            xmlHaltParser(ctxt);
7146
37.7k
            return;
7147
37.7k
        }
7148
66.6M
        SKIP_BLANKS;
7149
66.6M
    }
7150
7151
95.2k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
95.2k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
16.9M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
16.9M
    xmlEntityPtr ent;
7175
16.9M
    xmlChar *val;
7176
16.9M
    int was_checked;
7177
16.9M
    xmlNodePtr list = NULL;
7178
16.9M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
16.9M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
16.9M
    if (NXT(1) == '#') {
7188
868k
  int i = 0;
7189
868k
  xmlChar out[16];
7190
868k
  int hex = NXT(2);
7191
868k
  int value = xmlParseCharRef(ctxt);
7192
7193
868k
  if (value == 0)
7194
105k
      return;
7195
762k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
474k
      if (value <= 0xFF) {
7202
459k
    out[0] = value;
7203
459k
    out[1] = 0;
7204
459k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
459k
        (!ctxt->disableSAX))
7206
386k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
459k
      } else {
7208
14.9k
    if ((hex == 'x') || (hex == 'X'))
7209
1.93k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
13.0k
    else
7211
13.0k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
14.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
14.9k
        (!ctxt->disableSAX))
7214
12.3k
        ctxt->sax->reference(ctxt->userData, out);
7215
14.9k
      }
7216
474k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
288k
      COPY_BUF(0 ,out, i, value);
7221
288k
      out[i] = 0;
7222
288k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
288k
    (!ctxt->disableSAX))
7224
232k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
288k
  }
7226
762k
  return;
7227
868k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
16.0M
    ent = xmlParseEntityRef(ctxt);
7233
16.0M
    if (ent == NULL) return;
7234
15.0M
    if (!ctxt->wellFormed)
7235
9.40M
  return;
7236
5.59M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
5.59M
    if ((ent->name == NULL) ||
7240
5.59M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
623k
  val = ent->content;
7242
623k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
623k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
623k
      (!ctxt->disableSAX))
7248
623k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
623k
  return;
7250
623k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
4.96M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
4.96M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
228k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
221k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
221k
  void *user_data;
7273
221k
  if (ctxt->userData == ctxt)
7274
221k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
221k
        ctxt->sizeentcopy = 0;
7280
7281
221k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
1.50k
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
1.50k
            xmlHaltParser(ctxt);
7284
1.50k
            return;
7285
1.50k
        }
7286
7287
220k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
220k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
125k
      ctxt->depth++;
7297
125k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
125k
                                                user_data, &list);
7299
125k
      ctxt->depth--;
7300
7301
125k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
95.2k
      ctxt->depth++;
7303
95.2k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
95.2k
                                     user_data, ctxt->depth, ent->URI,
7305
95.2k
             ent->ExternalID, &list);
7306
95.2k
      ctxt->depth--;
7307
95.2k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
220k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
220k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
220k
        ent->expandedSize = ctxt->sizeentcopy;
7316
220k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
16.0k
            xmlHaltParser(ctxt);
7318
16.0k
      xmlFreeNodeList(list);
7319
16.0k
      return;
7320
16.0k
  }
7321
204k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
6
      xmlFreeNodeList(list);
7323
6
      return;
7324
6
  }
7325
7326
204k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
105k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
105k
            if ((ctxt->replaceEntities == 0) ||
7333
105k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
105k
                ((list->type == XML_TEXT_NODE) &&
7335
92.7k
                 (list->next == NULL))) {
7336
92.7k
                ent->owner = 1;
7337
1.97M
                while (list != NULL) {
7338
1.88M
                    list->parent = (xmlNodePtr) ent;
7339
1.88M
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
1.88M
                    if (list->next == NULL)
7342
92.7k
                        ent->last = list;
7343
1.88M
                    list = list->next;
7344
1.88M
                }
7345
92.7k
                list = NULL;
7346
92.7k
            } else {
7347
12.9k
                ent->owner = 0;
7348
5.22M
                while (list != NULL) {
7349
5.21M
                    list->parent = (xmlNodePtr) ctxt->node;
7350
5.21M
                    list->doc = ctxt->myDoc;
7351
5.21M
                    if (list->next == NULL)
7352
12.9k
                        ent->last = list;
7353
5.21M
                    list = list->next;
7354
5.21M
                }
7355
12.9k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
12.9k
            }
7361
105k
  } else if ((ret != XML_ERR_OK) &&
7362
98.5k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
57.8k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
57.8k
         "Entity '%s' failed to parse\n", ent->name);
7365
57.8k
            if (ent->content != NULL)
7366
21.7k
                ent->content[0] = 0;
7367
57.8k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
204k
        was_checked = 0;
7374
204k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
4.95M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
1.03M
  if (was_checked != 0) {
7389
933k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
933k
      if (ctxt->userData == ctxt)
7396
933k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
933k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
2.84k
    ctxt->depth++;
7402
2.84k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
2.84k
           ent->content, user_data, NULL);
7404
2.84k
    ctxt->depth--;
7405
930k
      } else if (ent->etype ==
7406
930k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
930k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
930k
    ctxt->depth++;
7410
930k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
930k
         ctxt->sax, user_data, ctxt->depth,
7412
930k
         ent->URI, ent->ExternalID, NULL);
7413
930k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
930k
                ctxt->sizeentities = oldsizeentities;
7417
930k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
933k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
933k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
933k
  }
7429
1.03M
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
1.03M
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
183k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
183k
  }
7437
1.03M
  return;
7438
1.03M
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
3.91M
    if ((was_checked != 0) &&
7445
3.91M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
846
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
3.91M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
3.91M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
1.82M
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
1.82M
  return;
7458
1.82M
    }
7459
7460
2.08M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
2.08M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
2.08M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
2.08M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
621k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
621k
    cur = ent->children;
7492
2.39M
    while (cur != NULL) {
7493
2.39M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
2.39M
        if (nw != NULL) {
7495
2.39M
      if (nw->_private == NULL)
7496
2.39M
          nw->_private = cur->_private;
7497
2.39M
      if (firstChild == NULL){
7498
621k
          firstChild = nw;
7499
621k
      }
7500
2.39M
      nw = xmlAddChild(ctxt->node, nw);
7501
2.39M
        }
7502
2.39M
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
621k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
621k
          (nw != NULL) &&
7509
621k
          (nw->type == XML_ELEMENT_NODE) &&
7510
621k
          (nw->children == NULL))
7511
9.77k
          nw->extra = 1;
7512
7513
621k
      break;
7514
621k
        }
7515
1.77M
        cur = cur->next;
7516
1.77M
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
1.46M
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
1.46M
    xmlNodePtr nw = NULL, cur, next, last,
7523
1.46M
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
1.46M
    cur = ent->children;
7532
1.46M
    ent->children = NULL;
7533
1.46M
    last = ent->last;
7534
1.46M
    ent->last = NULL;
7535
20.0M
    while (cur != NULL) {
7536
20.0M
        next = cur->next;
7537
20.0M
        cur->next = NULL;
7538
20.0M
        cur->parent = NULL;
7539
20.0M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
20.0M
        if (nw != NULL) {
7541
20.0M
      if (nw->_private == NULL)
7542
20.0M
          nw->_private = cur->_private;
7543
20.0M
      if (firstChild == NULL){
7544
1.46M
          firstChild = cur;
7545
1.46M
      }
7546
20.0M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
20.0M
        }
7548
20.0M
        xmlAddChild(ctxt->node, cur);
7549
20.0M
        if (cur == last)
7550
1.46M
      break;
7551
18.6M
        cur = next;
7552
18.6M
    }
7553
1.46M
    if (ent->owner == 0)
7554
12.9k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
1.46M
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
2.08M
      ctxt->nodemem = 0;
7582
2.08M
      ctxt->nodelen = 0;
7583
2.08M
      return;
7584
2.08M
  }
7585
2.08M
    }
7586
2.08M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
25.3M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
25.3M
    const xmlChar *name;
7621
25.3M
    xmlEntityPtr ent = NULL;
7622
7623
25.3M
    GROW;
7624
25.3M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
25.3M
    if (RAW != '&')
7628
0
        return(NULL);
7629
25.3M
    NEXT;
7630
25.3M
    name = xmlParseName(ctxt);
7631
25.3M
    if (name == NULL) {
7632
449k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
449k
           "xmlParseEntityRef: no name\n");
7634
449k
        return(NULL);
7635
449k
    }
7636
24.9M
    if (RAW != ';') {
7637
116k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
116k
  return(NULL);
7639
116k
    }
7640
24.8M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
24.8M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
18.0M
        ent = xmlGetPredefinedEntity(name);
7647
18.0M
        if (ent != NULL)
7648
1.44M
            return(ent);
7649
18.0M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
23.3M
    if (ctxt->sax != NULL) {
7656
23.3M
  if (ctxt->sax->getEntity != NULL)
7657
23.3M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
23.3M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
23.3M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
30.8k
      ent = xmlGetPredefinedEntity(name);
7661
23.3M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
23.3M
      (ctxt->userData==ctxt)) {
7663
113k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
113k
  }
7665
23.3M
    }
7666
23.3M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
23.3M
    if (ent == NULL) {
7690
852k
  if ((ctxt->standalone == 1) ||
7691
852k
      ((ctxt->hasExternalSubset == 0) &&
7692
830k
       (ctxt->hasPErefs == 0))) {
7693
459k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
459k
         "Entity '%s' not defined\n", name);
7695
459k
  } else {
7696
393k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
393k
         "Entity '%s' not defined\n", name);
7698
393k
      if ((ctxt->inSubset == 0) &&
7699
393k
    (ctxt->sax != NULL) &&
7700
393k
    (ctxt->sax->reference != NULL)) {
7701
373k
    ctxt->sax->reference(ctxt->userData, name);
7702
373k
      }
7703
393k
  }
7704
852k
  ctxt->valid = 0;
7705
852k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
22.5M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
16.4k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
16.4k
     "Entity reference to unparsed entity %s\n", name);
7715
16.4k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
22.4M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
22.4M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
36.3k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
36.3k
       "Attribute references external entity '%s'\n", name);
7726
36.3k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
22.4M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
22.4M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
8.64M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
69.4k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
2.61k
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
69.4k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
69.4k
        }
7740
8.64M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
132k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
132k
                    "'<' in entity '%s' is not allowed in attributes "
7743
132k
                    "values\n", name);
7744
8.64M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
13.8M
    else {
7750
13.8M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
13.8M
      default:
7758
13.8M
      break;
7759
13.8M
  }
7760
13.8M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
23.3M
    return(ent);
7769
23.3M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
5.60G
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
5.60G
    xmlChar *name;
7805
5.60G
    const xmlChar *ptr;
7806
5.60G
    xmlChar cur;
7807
5.60G
    xmlEntityPtr ent = NULL;
7808
7809
5.60G
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
5.60G
    ptr = *str;
7812
5.60G
    cur = *ptr;
7813
5.60G
    if (cur != '&')
7814
5.32G
  return(NULL);
7815
7816
282M
    ptr++;
7817
282M
    name = xmlParseStringName(ctxt, &ptr);
7818
282M
    if (name == NULL) {
7819
5.03k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
5.03k
           "xmlParseStringEntityRef: no name\n");
7821
5.03k
  *str = ptr;
7822
5.03k
  return(NULL);
7823
5.03k
    }
7824
282M
    if (*ptr != ';') {
7825
175k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
175k
        xmlFree(name);
7827
175k
  *str = ptr;
7828
175k
  return(NULL);
7829
175k
    }
7830
282M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
282M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
197M
        ent = xmlGetPredefinedEntity(name);
7838
197M
        if (ent != NULL) {
7839
777k
            xmlFree(name);
7840
777k
            *str = ptr;
7841
777k
            return(ent);
7842
777k
        }
7843
197M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
281M
    if (ctxt->sax != NULL) {
7850
281M
  if (ctxt->sax->getEntity != NULL)
7851
281M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
281M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
16.2M
      ent = xmlGetPredefinedEntity(name);
7854
281M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
49.2M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
49.2M
  }
7857
281M
    }
7858
281M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
281M
    if (ent == NULL) {
7885
49.2M
  if ((ctxt->standalone == 1) ||
7886
49.2M
      ((ctxt->hasExternalSubset == 0) &&
7887
49.2M
       (ctxt->hasPErefs == 0))) {
7888
46.7M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
46.7M
         "Entity '%s' not defined\n", name);
7890
46.7M
  } else {
7891
2.46M
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
2.46M
        "Entity '%s' not defined\n",
7893
2.46M
        name);
7894
2.46M
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
49.2M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
232M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
1.52k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
1.52k
     "Entity reference to unparsed entity %s\n", name);
7906
1.52k
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
232M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
232M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
1.22M
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
1.22M
   "Attribute references external entity '%s'\n", name);
7917
1.22M
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
230M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
230M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
227M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
48.6k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
1.61k
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
48.6k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
48.6k
        }
7931
227M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
6.10M
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
6.10M
                    "'<' in entity '%s' is not allowed in attributes "
7934
6.10M
                    "values\n", name);
7935
227M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
3.39M
    else {
7941
3.39M
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
3.39M
      default:
7949
3.39M
      break;
7950
3.39M
  }
7951
3.39M
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
281M
    xmlFree(name);
7961
281M
    *str = ptr;
7962
281M
    return(ent);
7963
281M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
481M
{
8000
481M
    const xmlChar *name;
8001
481M
    xmlEntityPtr entity = NULL;
8002
481M
    xmlParserInputPtr input;
8003
8004
481M
    if (RAW != '%')
8005
0
        return;
8006
481M
    NEXT;
8007
481M
    name = xmlParseName(ctxt);
8008
481M
    if (name == NULL) {
8009
717k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
717k
  return;
8011
717k
    }
8012
480M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
480M
    if (RAW != ';') {
8016
7.83M
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
7.83M
        return;
8018
7.83M
    }
8019
8020
472M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
472M
    if ((ctxt->sax != NULL) &&
8026
472M
  (ctxt->sax->getParameterEntity != NULL))
8027
472M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
472M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
472M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
58.0M
  if ((ctxt->standalone == 1) ||
8040
58.0M
      ((ctxt->hasExternalSubset == 0) &&
8041
58.0M
       (ctxt->hasPErefs == 0))) {
8042
9.37k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
9.37k
            "PEReference: %%%s; not found\n",
8044
9.37k
            name);
8045
58.0M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
58.0M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
11.6M
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
11.6M
                                 "PEReference: %%%s; not found\n",
8056
11.6M
                                 name, NULL);
8057
11.6M
            } else
8058
46.3M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
46.3M
                              "PEReference: %%%s; not found\n",
8060
46.3M
                              name, NULL);
8061
58.0M
            ctxt->valid = 0;
8062
58.0M
  }
8063
414M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
414M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
414M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
414M
  } else {
8073
414M
            xmlChar start[4];
8074
414M
            xmlCharEncoding enc;
8075
414M
            unsigned long parentConsumed;
8076
414M
            xmlEntityPtr oldEnt;
8077
8078
414M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
414M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
414M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
414M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
414M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
414M
    (ctxt->replaceEntities == 0) &&
8084
414M
    (ctxt->validate == 0))
8085
269
    return;
8086
8087
414M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
1.08k
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
1.08k
                xmlHaltParser(ctxt);
8090
1.08k
                return;
8091
1.08k
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
414M
            parentConsumed = ctxt->input->parentConsumed;
8095
414M
            oldEnt = ctxt->input->entity;
8096
414M
            if ((oldEnt == NULL) ||
8097
414M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
406M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
11.3M
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
11.3M
                xmlSaturatedAddSizeT(&parentConsumed,
8101
11.3M
                                     ctxt->input->cur - ctxt->input->base);
8102
11.3M
            }
8103
8104
414M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
414M
      if (xmlPushInput(ctxt, input) < 0) {
8106
8.54k
                xmlFreeInputStream(input);
8107
8.54k
    return;
8108
8.54k
            }
8109
8110
414M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
414M
            input->parentConsumed = parentConsumed;
8113
8114
414M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
30.6k
                GROW
8125
30.6k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
30.6k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
30.4k
                    start[0] = RAW;
8129
30.4k
                    start[1] = NXT(1);
8130
30.4k
                    start[2] = NXT(2);
8131
30.4k
                    start[3] = NXT(3);
8132
30.4k
                    enc = xmlDetectCharEncoding(start, 4);
8133
30.4k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
2.43k
                        xmlSwitchEncoding(ctxt, enc);
8135
2.43k
                    }
8136
30.4k
                }
8137
8138
30.6k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
30.6k
                    (IS_BLANK_CH(NXT(5)))) {
8140
1.86k
                    xmlParseTextDecl(ctxt);
8141
1.86k
                }
8142
30.6k
            }
8143
414M
  }
8144
414M
    }
8145
472M
    ctxt->hasPErefs = 1;
8146
472M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
8.43k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
8.43k
    xmlParserInputPtr input;
8162
8.43k
    xmlBufferPtr buf;
8163
8.43k
    int l, c;
8164
8.43k
    int count = 0;
8165
8166
8.43k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
8.43k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
8.43k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
8.43k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
8.43k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
8.43k
    buf = xmlBufferCreate();
8180
8.43k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
8.43k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
8.43k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
8.43k
    if (input == NULL) {
8189
1.58k
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
1.58k
              "xmlLoadEntityContent input error");
8191
1.58k
  xmlBufferFree(buf);
8192
1.58k
        return(-1);
8193
1.58k
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
6.84k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
6.84k
    GROW;
8206
6.84k
    c = CUR_CHAR(l);
8207
17.6M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
17.6M
           (IS_CHAR(c))) {
8209
17.6M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
17.6M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
169k
      count = 0;
8212
169k
      GROW;
8213
169k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
169k
  }
8218
17.6M
  NEXTL(l);
8219
17.6M
  c = CUR_CHAR(l);
8220
17.6M
  if (c == 0) {
8221
5.83k
      count = 0;
8222
5.83k
      GROW;
8223
5.83k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
5.83k
      c = CUR_CHAR(l);
8228
5.83k
  }
8229
17.6M
    }
8230
8231
6.84k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
3.93k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
3.93k
        xmlPopInput(ctxt);
8234
3.93k
    } else if (!IS_CHAR(c)) {
8235
2.91k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
2.91k
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
2.91k
                    c);
8238
2.91k
  xmlBufferFree(buf);
8239
2.91k
  return(-1);
8240
2.91k
    }
8241
3.93k
    entity->content = buf->content;
8242
3.93k
    entity->length = buf->use;
8243
3.93k
    buf->content = NULL;
8244
3.93k
    xmlBufferFree(buf);
8245
8246
3.93k
    return(0);
8247
6.84k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
4.29M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
4.29M
    const xmlChar *ptr;
8283
4.29M
    xmlChar cur;
8284
4.29M
    xmlChar *name;
8285
4.29M
    xmlEntityPtr entity = NULL;
8286
8287
4.29M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
4.29M
    ptr = *str;
8289
4.29M
    cur = *ptr;
8290
4.29M
    if (cur != '%')
8291
0
        return(NULL);
8292
4.29M
    ptr++;
8293
4.29M
    name = xmlParseStringName(ctxt, &ptr);
8294
4.29M
    if (name == NULL) {
8295
113k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
113k
           "xmlParseStringPEReference: no name\n");
8297
113k
  *str = ptr;
8298
113k
  return(NULL);
8299
113k
    }
8300
4.18M
    cur = *ptr;
8301
4.18M
    if (cur != ';') {
8302
4.13k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
4.13k
  xmlFree(name);
8304
4.13k
  *str = ptr;
8305
4.13k
  return(NULL);
8306
4.13k
    }
8307
4.18M
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
4.18M
    if ((ctxt->sax != NULL) &&
8313
4.18M
  (ctxt->sax->getParameterEntity != NULL))
8314
4.18M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
4.18M
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
4.18M
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
154k
  if ((ctxt->standalone == 1) ||
8330
154k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
3.46k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
3.46k
     "PEReference: %%%s; not found\n", name);
8333
150k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
150k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
150k
        "PEReference: %%%s; not found\n",
8343
150k
        name, NULL);
8344
150k
      ctxt->valid = 0;
8345
150k
  }
8346
4.02M
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
4.02M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
4.02M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
4.02M
    }
8357
4.18M
    ctxt->hasPErefs = 1;
8358
4.18M
    xmlFree(name);
8359
4.18M
    *str = ptr;
8360
4.18M
    return(entity);
8361
4.18M
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
640k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
640k
    const xmlChar *name = NULL;
8382
640k
    xmlChar *ExternalID = NULL;
8383
640k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
640k
    SKIP(9);
8389
8390
640k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
640k
    name = xmlParseName(ctxt);
8396
640k
    if (name == NULL) {
8397
1.72k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
1.72k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
1.72k
    }
8400
640k
    ctxt->intSubName = name;
8401
8402
640k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
640k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
640k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
295k
        ctxt->hasExternalSubset = 1;
8411
295k
    }
8412
640k
    ctxt->extSubURI = URI;
8413
640k
    ctxt->extSubSystem = ExternalID;
8414
8415
640k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
640k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
640k
  (!ctxt->disableSAX))
8422
619k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
640k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
640k
    if (RAW == '[')
8431
481k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
158k
    if (RAW != '>') {
8437
23.6k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
23.6k
    }
8439
158k
    NEXT;
8440
158k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
482k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
482k
    if (RAW == '[') {
8457
482k
        int baseInputNr = ctxt->inputNr;
8458
482k
        ctxt->instate = XML_PARSER_DTD;
8459
482k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
482k
  SKIP_BLANKS;
8466
423M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
423M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
423M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
423M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
423M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
422M
          xmlParseMarkupDecl(ctxt);
8478
422M
            } else if (RAW == '%') {
8479
745k
          xmlParsePEReference(ctxt);
8480
745k
            } else {
8481
85.1k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
85.1k
                        "xmlParseInternalSubset: error detected in"
8483
85.1k
                        " Markup declaration\n");
8484
85.1k
                xmlHaltParser(ctxt);
8485
85.1k
                return;
8486
85.1k
            }
8487
423M
      SKIP_BLANKS;
8488
423M
  }
8489
397k
  if (RAW == ']') {
8490
368k
      NEXT;
8491
368k
      SKIP_BLANKS;
8492
368k
  }
8493
397k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
397k
    if (RAW != '>') {
8499
31.4k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
31.4k
  return;
8501
31.4k
    }
8502
365k
    NEXT;
8503
365k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
14.3M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
14.3M
    const xmlChar *name;
8544
14.3M
    xmlChar *val;
8545
8546
14.3M
    *value = NULL;
8547
14.3M
    GROW;
8548
14.3M
    name = xmlParseName(ctxt);
8549
14.3M
    if (name == NULL) {
8550
614k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
614k
                 "error parsing attribute name\n");
8552
614k
        return(NULL);
8553
614k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
13.7M
    SKIP_BLANKS;
8559
13.7M
    if (RAW == '=') {
8560
13.4M
        NEXT;
8561
13.4M
  SKIP_BLANKS;
8562
13.4M
  val = xmlParseAttValue(ctxt);
8563
13.4M
  ctxt->instate = XML_PARSER_CONTENT;
8564
13.4M
    } else {
8565
311k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
311k
         "Specification mandates value for attribute %s\n", name);
8567
311k
  return(name);
8568
311k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
13.4M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
47.5k
  if (!xmlCheckLanguageID(val)) {
8577
18.6k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
18.6k
              "Malformed value for xml:lang : %s\n",
8579
18.6k
        val, NULL);
8580
18.6k
  }
8581
47.5k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
13.4M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
2.88k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
142
      *(ctxt->space) = 0;
8589
2.74k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
983
      *(ctxt->space) = 1;
8591
1.76k
  else {
8592
1.76k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
1.76k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
1.76k
                                 val, NULL);
8595
1.76k
  }
8596
2.88k
    }
8597
8598
13.4M
    *value = val;
8599
13.4M
    return(name);
8600
13.7M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
16.4M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
16.4M
    const xmlChar *name;
8634
16.4M
    const xmlChar *attname;
8635
16.4M
    xmlChar *attvalue;
8636
16.4M
    const xmlChar **atts = ctxt->atts;
8637
16.4M
    int nbatts = 0;
8638
16.4M
    int maxatts = ctxt->maxatts;
8639
16.4M
    int i;
8640
8641
16.4M
    if (RAW != '<') return(NULL);
8642
16.4M
    NEXT1;
8643
8644
16.4M
    name = xmlParseName(ctxt);
8645
16.4M
    if (name == NULL) {
8646
318k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
318k
       "xmlParseStartTag: invalid element name\n");
8648
318k
        return(NULL);
8649
318k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
16.1M
    SKIP_BLANKS;
8657
16.1M
    GROW;
8658
8659
21.6M
    while (((RAW != '>') &&
8660
21.6M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
21.6M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
14.3M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
14.3M
        if (attname == NULL) {
8664
614k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
614k
         "xmlParseStartTag: problem parsing attributes\n");
8666
614k
      break;
8667
614k
  }
8668
13.7M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
18.4M
      for (i = 0; i < nbatts;i += 2) {
8675
5.06M
          if (xmlStrEqual(atts[i], attname)) {
8676
18.6k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
18.6k
        xmlFree(attvalue);
8678
18.6k
        goto failed;
8679
18.6k
    }
8680
5.06M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
13.3M
      if (atts == NULL) {
8685
152k
          maxatts = 22; /* allow for 10 attrs by default */
8686
152k
          atts = (const xmlChar **)
8687
152k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
152k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
152k
    ctxt->atts = atts;
8695
152k
    ctxt->maxatts = maxatts;
8696
13.2M
      } else if (nbatts + 4 > maxatts) {
8697
499
          const xmlChar **n;
8698
8699
499
          maxatts *= 2;
8700
499
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
499
               maxatts * sizeof(const xmlChar *));
8702
499
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
499
    atts = n;
8709
499
    ctxt->atts = atts;
8710
499
    ctxt->maxatts = maxatts;
8711
499
      }
8712
13.3M
      atts[nbatts++] = attname;
8713
13.3M
      atts[nbatts++] = attvalue;
8714
13.3M
      atts[nbatts] = NULL;
8715
13.3M
      atts[nbatts + 1] = NULL;
8716
13.3M
  } else {
8717
347k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
347k
  }
8720
8721
13.7M
failed:
8722
8723
13.7M
  GROW
8724
13.7M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
8.28M
      break;
8726
5.46M
  if (SKIP_BLANKS == 0) {
8727
599k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
599k
         "attributes construct error\n");
8729
599k
  }
8730
5.46M
  SHRINK;
8731
5.46M
        GROW;
8732
5.46M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
16.1M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
16.1M
  (!ctxt->disableSAX)) {
8739
15.2M
  if (nbatts > 0)
8740
7.97M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
7.23M
  else
8742
7.23M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
15.2M
    }
8744
8745
16.1M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
28.8M
        for (i = 1;i < nbatts;i+=2)
8748
13.3M
      if (atts[i] != NULL)
8749
13.3M
         xmlFree((xmlChar *) atts[i]);
8750
15.5M
    }
8751
16.1M
    return(name);
8752
16.1M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
9.95M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
9.95M
    const xmlChar *name;
8772
8773
9.95M
    GROW;
8774
9.95M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
9.95M
    SKIP(2);
8780
8781
9.95M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
9.95M
    GROW;
8787
9.95M
    SKIP_BLANKS;
8788
9.95M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
106k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
106k
    } else
8791
9.85M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
9.95M
    if (name != (xmlChar*)1) {
8800
271k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
271k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
271k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
271k
                    ctxt->name, line, name);
8804
271k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
9.95M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
9.95M
  (!ctxt->disableSAX))
8811
9.54M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
9.95M
    namePop(ctxt);
8814
9.95M
    spacePop(ctxt);
8815
9.95M
    return;
8816
9.95M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
27.6M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
27.6M
    int i;
8858
8859
27.6M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
39.2M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
15.0M
        if (ctxt->nsTab[i] == prefix) {
8862
1.87M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
42.1k
          return(NULL);
8864
1.83M
      return(ctxt->nsTab[i + 1]);
8865
1.87M
  }
8866
24.1M
    return(NULL);
8867
26.0M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
48.8M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
48.8M
    const xmlChar *l, *p;
8886
8887
48.8M
    GROW;
8888
8889
48.8M
    l = xmlParseNCName(ctxt);
8890
48.8M
    if (l == NULL) {
8891
1.02M
        if (CUR == ':') {
8892
10.7k
      l = xmlParseName(ctxt);
8893
10.7k
      if (l != NULL) {
8894
10.7k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
10.7k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
10.7k
    *prefix = NULL;
8897
10.7k
    return(l);
8898
10.7k
      }
8899
10.7k
  }
8900
1.01M
        return(NULL);
8901
1.02M
    }
8902
47.8M
    if (CUR == ':') {
8903
2.63M
        NEXT;
8904
2.63M
  p = l;
8905
2.63M
  l = xmlParseNCName(ctxt);
8906
2.63M
  if (l == NULL) {
8907
88.2k
      xmlChar *tmp;
8908
8909
88.2k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
88.2k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
88.2k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
88.2k
      l = xmlParseNmtoken(ctxt);
8914
88.2k
      if (l == NULL) {
8915
70.5k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
70.5k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
70.5k
            } else {
8919
17.6k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
17.6k
    xmlFree((char *)l);
8921
17.6k
      }
8922
88.2k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
88.2k
      if (tmp != NULL) xmlFree(tmp);
8924
88.2k
      *prefix = NULL;
8925
88.2k
      return(p);
8926
88.2k
  }
8927
2.54M
  if (CUR == ':') {
8928
38.5k
      xmlChar *tmp;
8929
8930
38.5k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
38.5k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
38.5k
      NEXT;
8933
38.5k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
38.5k
      if (tmp != NULL) {
8935
23.7k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
23.7k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
23.7k
    if (tmp != NULL) xmlFree(tmp);
8938
23.7k
    *prefix = p;
8939
23.7k
    return(l);
8940
23.7k
      }
8941
14.8k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
14.8k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
14.8k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
14.8k
      if (tmp != NULL) xmlFree(tmp);
8946
14.8k
      *prefix = p;
8947
14.8k
      return(l);
8948
14.8k
  }
8949
2.50M
  *prefix = p;
8950
2.50M
    } else
8951
45.2M
        *prefix = NULL;
8952
47.7M
    return(l);
8953
47.8M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
562k
                        xmlChar const *prefix) {
8971
562k
    const xmlChar *cmp;
8972
562k
    const xmlChar *in;
8973
562k
    const xmlChar *ret;
8974
562k
    const xmlChar *prefix2;
8975
8976
562k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
562k
    GROW;
8979
562k
    in = ctxt->input->cur;
8980
8981
562k
    cmp = prefix;
8982
2.03M
    while (*in != 0 && *in == *cmp) {
8983
1.46M
  ++in;
8984
1.46M
  ++cmp;
8985
1.46M
    }
8986
562k
    if ((*cmp == 0) && (*in == ':')) {
8987
521k
        in++;
8988
521k
  cmp = name;
8989
4.40M
  while (*in != 0 && *in == *cmp) {
8990
3.88M
      ++in;
8991
3.88M
      ++cmp;
8992
3.88M
  }
8993
521k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
454k
            ctxt->input->col += in - ctxt->input->cur;
8996
454k
      ctxt->input->cur = in;
8997
454k
      return((const xmlChar*) 1);
8998
454k
  }
8999
521k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
108k
    ret = xmlParseQName (ctxt, &prefix2);
9004
108k
    if ((ret == name) && (prefix == prefix2))
9005
1.36k
  return((const xmlChar*) 1);
9006
106k
    return ret;
9007
108k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
60.1k
    const xmlChar *oldbase = ctxt->input->base;\
9045
60.1k
    GROW;\
9046
60.1k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
60.1k
        return(NULL);\
9048
60.1k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
60.1k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
37.6M
{
9059
37.6M
    xmlChar limit = 0;
9060
37.6M
    const xmlChar *in = NULL, *start, *end, *last;
9061
37.6M
    xmlChar *ret = NULL;
9062
37.6M
    int line, col;
9063
37.6M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
8.52M
                    XML_MAX_HUGE_LENGTH :
9065
37.6M
                    XML_MAX_TEXT_LENGTH;
9066
9067
37.6M
    GROW;
9068
37.6M
    in = (xmlChar *) CUR_PTR;
9069
37.6M
    line = ctxt->input->line;
9070
37.6M
    col = ctxt->input->col;
9071
37.6M
    if (*in != '"' && *in != '\'') {
9072
85.4k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
85.4k
        return (NULL);
9074
85.4k
    }
9075
37.5M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
37.5M
    limit = *in++;
9083
37.5M
    col++;
9084
37.5M
    end = ctxt->input->end;
9085
37.5M
    start = in;
9086
37.5M
    if (in >= end) {
9087
2.13k
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
2.13k
    }
9089
37.5M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
2.72M
  while ((in < end) && (*in != limit) &&
9094
2.72M
         ((*in == 0x20) || (*in == 0x9) ||
9095
2.69M
          (*in == 0xA) || (*in == 0xD))) {
9096
566k
      if (*in == 0xA) {
9097
206k
          line++; col = 1;
9098
360k
      } else {
9099
360k
          col++;
9100
360k
      }
9101
566k
      in++;
9102
566k
      start = in;
9103
566k
      if (in >= end) {
9104
441
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
441
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
441
      }
9111
566k
  }
9112
21.2M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
21.2M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
19.0M
      col++;
9115
19.0M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
19.0M
      if (in >= end) {
9117
597
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
597
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
597
      }
9124
19.0M
  }
9125
2.15M
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
2.17M
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
2.67M
  while ((in < end) && (*in != limit) &&
9131
2.67M
         ((*in == 0x20) || (*in == 0x9) ||
9132
661k
          (*in == 0xA) || (*in == 0xD))) {
9133
514k
      if (*in == 0xA) {
9134
51.9k
          line++, col = 1;
9135
462k
      } else {
9136
462k
          col++;
9137
462k
      }
9138
514k
      in++;
9139
514k
      if (in >= end) {
9140
422
    const xmlChar *oldbase = ctxt->input->base;
9141
422
    GROW;
9142
422
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
422
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
422
    end = ctxt->input->end;
9151
422
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
422
      }
9157
514k
  }
9158
2.15M
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
2.15M
  if (*in != limit) goto need_complex;
9164
35.4M
    } else {
9165
489M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
489M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
454M
      in++;
9168
454M
      col++;
9169
454M
      if (in >= end) {
9170
56.9k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
56.9k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
56.9k
      }
9177
454M
  }
9178
35.4M
  last = in;
9179
35.4M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
35.4M
  if (*in != limit) goto need_complex;
9185
35.4M
    }
9186
36.1M
    in++;
9187
36.1M
    col++;
9188
36.1M
    if (len != NULL) {
9189
21.6M
        if (alloc) *alloc = 0;
9190
21.6M
        *len = last - start;
9191
21.6M
        ret = (xmlChar *) start;
9192
21.6M
    } else {
9193
14.5M
        if (alloc) *alloc = 1;
9194
14.5M
        ret = xmlStrndup(start, last - start);
9195
14.5M
    }
9196
36.1M
    CUR_PTR = in;
9197
36.1M
    ctxt->input->line = line;
9198
36.1M
    ctxt->input->col = col;
9199
36.1M
    return ret;
9200
1.41M
need_complex:
9201
1.41M
    if (alloc) *alloc = 1;
9202
1.41M
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
37.5M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
22.9M
{
9226
22.9M
    const xmlChar *name;
9227
22.9M
    xmlChar *val, *internal_val = NULL;
9228
22.9M
    int normalize = 0;
9229
9230
22.9M
    *value = NULL;
9231
22.9M
    GROW;
9232
22.9M
    name = xmlParseQName(ctxt, prefix);
9233
22.9M
    if (name == NULL) {
9234
332k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
332k
                       "error parsing attribute name\n");
9236
332k
        return (NULL);
9237
332k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
22.5M
    if (ctxt->attsSpecial != NULL) {
9243
4.49M
        int type;
9244
9245
4.49M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
4.49M
                                                 pref, elem, *prefix, name);
9247
4.49M
        if (type != 0)
9248
2.16M
            normalize = 1;
9249
4.49M
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
22.5M
    SKIP_BLANKS;
9255
22.5M
    if (RAW == '=') {
9256
22.3M
        NEXT;
9257
22.3M
        SKIP_BLANKS;
9258
22.3M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
22.3M
        if (val == NULL)
9260
41.2k
            return (NULL);
9261
22.3M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
2.15M
      if (*alloc) {
9269
149k
          const xmlChar *val2;
9270
9271
149k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
149k
    if ((val2 != NULL) && (val2 != val)) {
9273
21.2k
        xmlFree(val);
9274
21.2k
        val = (xmlChar *) val2;
9275
21.2k
    }
9276
149k
      }
9277
2.15M
  }
9278
22.3M
        ctxt->instate = XML_PARSER_CONTENT;
9279
22.3M
    } else {
9280
215k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
215k
                          "Specification mandates value for attribute %s\n",
9282
215k
                          name);
9283
215k
        return (name);
9284
215k
    }
9285
9286
22.3M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
156k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
25.2k
            internal_val = xmlStrndup(val, *len);
9294
25.2k
            if (!xmlCheckLanguageID(internal_val)) {
9295
9.45k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
9.45k
                              "Malformed value for xml:lang : %s\n",
9297
9.45k
                              internal_val, NULL);
9298
9.45k
            }
9299
25.2k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
156k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
3.11k
            internal_val = xmlStrndup(val, *len);
9306
3.11k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
148
                *(ctxt->space) = 0;
9308
2.97k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
969
                *(ctxt->space) = 1;
9310
2.00k
            else {
9311
2.00k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
2.00k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
2.00k
                              internal_val, NULL);
9314
2.00k
            }
9315
3.11k
        }
9316
156k
        if (internal_val) {
9317
28.3k
            xmlFree(internal_val);
9318
28.3k
        }
9319
156k
    }
9320
9321
22.3M
    *value = val;
9322
22.3M
    return (name);
9323
22.5M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
25.8M
                  const xmlChar **URI, int *tlen) {
9356
25.8M
    const xmlChar *localname;
9357
25.8M
    const xmlChar *prefix;
9358
25.8M
    const xmlChar *attname;
9359
25.8M
    const xmlChar *aprefix;
9360
25.8M
    const xmlChar *nsname;
9361
25.8M
    xmlChar *attvalue;
9362
25.8M
    const xmlChar **atts = ctxt->atts;
9363
25.8M
    int maxatts = ctxt->maxatts;
9364
25.8M
    int nratts, nbatts, nbdef, inputid;
9365
25.8M
    int i, j, nbNs, attval;
9366
25.8M
    unsigned long cur;
9367
25.8M
    int nsNr = ctxt->nsNr;
9368
9369
25.8M
    if (RAW != '<') return(NULL);
9370
25.8M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
25.8M
    SHRINK;
9380
25.8M
    cur = ctxt->input->cur - ctxt->input->base;
9381
25.8M
    inputid = ctxt->input->id;
9382
25.8M
    nbatts = 0;
9383
25.8M
    nratts = 0;
9384
25.8M
    nbdef = 0;
9385
25.8M
    nbNs = 0;
9386
25.8M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
25.8M
    ctxt->nsNr = nsNr;
9389
9390
25.8M
    localname = xmlParseQName(ctxt, &prefix);
9391
25.8M
    if (localname == NULL) {
9392
675k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
675k
           "StartTag: invalid element name\n");
9394
675k
        return(NULL);
9395
675k
    }
9396
25.1M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
25.1M
    SKIP_BLANKS;
9404
25.1M
    GROW;
9405
9406
33.7M
    while (((RAW != '>') &&
9407
33.7M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
33.7M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
22.9M
  int len = -1, alloc = 0;
9410
9411
22.9M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
22.9M
                               &aprefix, &attvalue, &len, &alloc);
9413
22.9M
        if (attname == NULL) {
9414
373k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
373k
           "xmlParseStartTag: problem parsing attributes\n");
9416
373k
      break;
9417
373k
  }
9418
22.5M
        if (attvalue == NULL)
9419
215k
            goto next_attr;
9420
22.3M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
22.3M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
174k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
174k
            xmlURIPtr uri;
9425
9426
174k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
174k
            if (*URL != 0) {
9434
170k
                uri = xmlParseURI((const char *) URL);
9435
170k
                if (uri == NULL) {
9436
49.5k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
49.5k
                             "xmlns: '%s' is not a valid URI\n",
9438
49.5k
                                       URL, NULL, NULL);
9439
121k
                } else {
9440
121k
                    if (uri->scheme == NULL) {
9441
50.4k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
50.4k
                                  "xmlns: URI %s is not absolute\n",
9443
50.4k
                                  URL, NULL, NULL);
9444
50.4k
                    }
9445
121k
                    xmlFreeURI(uri);
9446
121k
                }
9447
170k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
170k
                if ((len == 29) &&
9456
170k
                    (xmlStrEqual(URL,
9457
3.06k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
219
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
219
                         "reuse of the xmlns namespace name is forbidden\n",
9460
219
                             NULL, NULL, NULL);
9461
219
                    goto next_attr;
9462
219
                }
9463
170k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
221k
            for (j = 1;j <= nbNs;j++)
9468
56.3k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
10.0k
                    break;
9470
174k
            if (j <= nbNs)
9471
10.0k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
164k
            else
9473
164k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
22.1M
        } else if (aprefix == ctxt->str_xmlns) {
9476
292k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
292k
            xmlURIPtr uri;
9478
9479
292k
            if (attname == ctxt->str_xml) {
9480
2.69k
                if (URL != ctxt->str_xml_ns) {
9481
2.69k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
2.69k
                             "xml namespace prefix mapped to wrong URI\n",
9483
2.69k
                             NULL, NULL, NULL);
9484
2.69k
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
2.69k
                goto next_attr;
9489
2.69k
            }
9490
290k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
290k
            if (attname == ctxt->str_xmlns) {
9499
3.24k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
3.24k
                         "redefinition of the xmlns prefix is forbidden\n",
9501
3.24k
                         NULL, NULL, NULL);
9502
3.24k
                goto next_attr;
9503
3.24k
            }
9504
287k
            if ((len == 29) &&
9505
287k
                (xmlStrEqual(URL,
9506
3.36k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
118
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
118
                         "reuse of the xmlns namespace name is forbidden\n",
9509
118
                         NULL, NULL, NULL);
9510
118
                goto next_attr;
9511
118
            }
9512
286k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
1.29k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
1.29k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
1.29k
                              attname, NULL, NULL);
9516
1.29k
                goto next_attr;
9517
285k
            } else {
9518
285k
                uri = xmlParseURI((const char *) URL);
9519
285k
                if (uri == NULL) {
9520
58.7k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
58.7k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
58.7k
                                       attname, URL, NULL);
9523
226k
                } else {
9524
226k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
11.7k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
11.7k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
11.7k
                                  attname, URL, NULL);
9528
11.7k
                    }
9529
226k
                    xmlFreeURI(uri);
9530
226k
                }
9531
285k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
463k
            for (j = 1;j <= nbNs;j++)
9537
194k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
16.4k
                    break;
9539
285k
            if (j <= nbNs)
9540
16.4k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
269k
            else
9542
269k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
21.8M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
21.8M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
201k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
201k
                maxatts = ctxt->maxatts;
9553
201k
                atts = ctxt->atts;
9554
201k
            }
9555
21.8M
            ctxt->attallocs[nratts++] = alloc;
9556
21.8M
            atts[nbatts++] = attname;
9557
21.8M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
21.8M
            if (alloc)
9565
633k
                atts[nbatts++] = NULL;
9566
21.2M
            else
9567
21.2M
                atts[nbatts++] = ctxt->input->base;
9568
21.8M
            atts[nbatts++] = attvalue;
9569
21.8M
            attvalue += len;
9570
21.8M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
21.8M
            if (alloc != 0) attval = 1;
9575
21.8M
            attvalue = NULL; /* moved into atts */
9576
21.8M
        }
9577
9578
22.5M
next_attr:
9579
22.5M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
96.9k
            xmlFree(attvalue);
9581
96.9k
            attvalue = NULL;
9582
96.9k
        }
9583
9584
22.5M
  GROW
9585
22.5M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
22.5M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
13.4M
      break;
9589
9.13M
  if (SKIP_BLANKS == 0) {
9590
529k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
529k
         "attributes construct error\n");
9592
529k
      break;
9593
529k
  }
9594
8.60M
        GROW;
9595
8.60M
    }
9596
9597
25.1M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
47.0M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
21.8M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
21.2M
            const xmlChar *old = atts[i+2];
9612
21.2M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
21.2M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
21.2M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
21.2M
        }
9616
21.8M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
25.1M
    if (ctxt->attsDefault != NULL) {
9622
9.75M
        xmlDefAttrsPtr defaults;
9623
9624
9.75M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
9.75M
  if (defaults != NULL) {
9626
3.38M
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
2.39M
          attname = defaults->values[5 * i];
9628
2.39M
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
2.39M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
34.3k
        for (j = 1;j <= nbNs;j++)
9638
8.79k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
3.91k
          break;
9640
29.4k
              if (j <= nbNs) continue;
9641
9642
25.5k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
25.5k
        if (nsname != defaults->values[5 * i + 2]) {
9644
14.0k
      if (nsPush(ctxt, NULL,
9645
14.0k
                 defaults->values[5 * i + 2]) > 0)
9646
12.4k
          nbNs++;
9647
14.0k
        }
9648
2.36M
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
65.0k
        for (j = 1;j <= nbNs;j++)
9653
27.2k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
11.1k
          break;
9655
48.8k
              if (j <= nbNs) continue;
9656
9657
37.7k
        nsname = xmlGetNamespace(ctxt, attname);
9658
37.7k
        if (nsname != defaults->values[5 * i + 2]) {
9659
17.5k
      if (nsPush(ctxt, attname,
9660
17.5k
                 defaults->values[5 * i + 2]) > 0)
9661
16.2k
          nbNs++;
9662
17.5k
        }
9663
2.31M
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
6.31M
        for (j = 0;j < nbatts;j+=5) {
9668
4.00M
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
13.0k
          break;
9670
4.00M
        }
9671
2.31M
        if (j < nbatts) continue;
9672
9673
2.30M
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
7.00k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
7.00k
      maxatts = ctxt->maxatts;
9679
7.00k
      atts = ctxt->atts;
9680
7.00k
        }
9681
2.30M
        atts[nbatts++] = attname;
9682
2.30M
        atts[nbatts++] = aprefix;
9683
2.30M
        if (aprefix == NULL)
9684
1.55M
      atts[nbatts++] = NULL;
9685
747k
        else
9686
747k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
2.30M
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
2.30M
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
2.30M
        if ((ctxt->standalone == 1) &&
9690
2.30M
            (defaults->values[5 * i + 4] != NULL)) {
9691
147
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
147
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
147
                                   attname, localname);
9694
147
        }
9695
2.30M
        nbdef++;
9696
2.30M
    }
9697
2.39M
      }
9698
987k
  }
9699
9.75M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
49.3M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
24.1M
  if (atts[i + 1] != NULL) {
9709
1.60M
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
1.60M
      if (nsname == NULL) {
9711
229k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
229k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
229k
        atts[i + 1], atts[i], localname);
9714
229k
      }
9715
1.60M
      atts[i + 2] = nsname;
9716
1.60M
  } else
9717
22.5M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
36.7M
        for (j = 0; j < i;j += 5) {
9725
12.5M
      if (atts[i] == atts[j]) {
9726
79.3k
          if (atts[i+1] == atts[j+1]) {
9727
26.2k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
26.2k
        break;
9729
26.2k
    }
9730
53.0k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
2.83k
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
2.83k
           "Namespaced Attribute %s in '%s' redefined\n",
9733
2.83k
           atts[i], nsname, NULL);
9734
2.83k
        break;
9735
2.83k
    }
9736
53.0k
      }
9737
12.5M
  }
9738
24.1M
    }
9739
9740
25.1M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
25.1M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
459k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
459k
           "Namespace prefix %s on %s is not defined\n",
9744
459k
     prefix, localname, NULL);
9745
459k
    }
9746
25.1M
    *pref = prefix;
9747
25.1M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
25.1M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
25.1M
  (!ctxt->disableSAX)) {
9754
21.5M
  if (nbNs > 0)
9755
235k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
235k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
235k
        nbatts / 5, nbdef, atts);
9758
21.3M
  else
9759
21.3M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
21.3M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
21.5M
    }
9762
9763
25.1M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
25.1M
    if (attval != 0) {
9768
1.30M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
734k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
633k
          xmlFree((xmlChar *) atts[i]);
9771
574k
    }
9772
9773
25.1M
    return(localname);
9774
25.1M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
14.8M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
14.8M
    const xmlChar *name;
9794
9795
14.8M
    GROW;
9796
14.8M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
14.8M
    SKIP(2);
9801
9802
14.8M
    if (tag->prefix == NULL)
9803
14.2M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
562k
    else
9805
562k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
14.8M
    GROW;
9811
14.8M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
14.8M
    SKIP_BLANKS;
9814
14.8M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
127k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
127k
    } else
9817
14.7M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
14.8M
    if (name != (xmlChar*)1) {
9826
328k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
328k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
328k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
328k
                    ctxt->name, tag->line, name);
9830
328k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
14.8M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
14.8M
  (!ctxt->disableSAX))
9837
12.6M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
12.6M
                                tag->URI);
9839
9840
14.8M
    spacePop(ctxt);
9841
14.8M
    if (tag->nsNr != 0)
9842
29.3k
  nsPop(ctxt, tag->nsNr);
9843
14.8M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
115k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
115k
    xmlChar *buf = NULL;
9864
115k
    int len = 0;
9865
115k
    int size = XML_PARSER_BUFFER_SIZE;
9866
115k
    int r, rl;
9867
115k
    int s, sl;
9868
115k
    int cur, l;
9869
115k
    int count = 0;
9870
115k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
42.5k
                    XML_MAX_HUGE_LENGTH :
9872
115k
                    XML_MAX_TEXT_LENGTH;
9873
9874
115k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
115k
    SKIP(3);
9877
9878
115k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
115k
    SKIP(6);
9881
9882
115k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
115k
    r = CUR_CHAR(rl);
9884
115k
    if (!IS_CHAR(r)) {
9885
8.49k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
8.49k
        goto out;
9887
8.49k
    }
9888
107k
    NEXTL(rl);
9889
107k
    s = CUR_CHAR(sl);
9890
107k
    if (!IS_CHAR(s)) {
9891
11.9k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
11.9k
        goto out;
9893
11.9k
    }
9894
95.0k
    NEXTL(sl);
9895
95.0k
    cur = CUR_CHAR(l);
9896
95.0k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
95.0k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
42.4M
    while (IS_CHAR(cur) &&
9902
42.4M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
42.3M
  if (len + 5 >= size) {
9904
80.0k
      xmlChar *tmp;
9905
9906
80.0k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
80.0k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
80.0k
      buf = tmp;
9912
80.0k
      size *= 2;
9913
80.0k
  }
9914
42.3M
  COPY_BUF(rl,buf,len,r);
9915
42.3M
  r = s;
9916
42.3M
  rl = sl;
9917
42.3M
  s = cur;
9918
42.3M
  sl = l;
9919
42.3M
  count++;
9920
42.3M
  if (count > 50) {
9921
799k
      SHRINK;
9922
799k
      GROW;
9923
799k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
799k
      count = 0;
9927
799k
  }
9928
42.3M
  NEXTL(l);
9929
42.3M
  cur = CUR_CHAR(l);
9930
42.3M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
42.3M
    }
9936
95.0k
    buf[len] = 0;
9937
95.0k
    if (cur != '>') {
9938
14.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
14.5k
                       "CData section not finished\n%.50s\n", buf);
9940
14.5k
        goto out;
9941
14.5k
    }
9942
80.5k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
80.5k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
53.1k
  if (ctxt->sax->cdataBlock != NULL)
9949
30.1k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
22.9k
  else if (ctxt->sax->characters != NULL)
9951
22.9k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
53.1k
    }
9953
9954
115k
out:
9955
115k
    if (ctxt->instate != XML_PARSER_EOF)
9956
115k
        ctxt->instate = XML_PARSER_CONTENT;
9957
115k
    xmlFree(buf);
9958
115k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
479k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
479k
    int nameNr = ctxt->nameNr;
9971
9972
479k
    GROW;
9973
66.5M
    while ((RAW != 0) &&
9974
66.5M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
66.1M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
66.1M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
143k
      xmlParsePI(ctxt);
9982
143k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
66.0M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
115k
      xmlParseCDSect(ctxt);
9990
115k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
65.9M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
65.9M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
765k
      xmlParseComment(ctxt);
9998
765k
      ctxt->instate = XML_PARSER_CONTENT;
9999
765k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
65.1M
  else if (*cur == '<') {
10005
29.4M
            if (NXT(1) == '/') {
10006
10.8M
                if (ctxt->nameNr <= nameNr)
10007
81.8k
                    break;
10008
10.7M
          xmlParseElementEnd(ctxt);
10009
18.6M
            } else {
10010
18.6M
          xmlParseElementStart(ctxt);
10011
18.6M
            }
10012
29.4M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
35.7M
  else if (*cur == '&') {
10020
6.71M
      xmlParseReference(ctxt);
10021
6.71M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
29.0M
  else {
10027
29.0M
      xmlParseCharData(ctxt, 0);
10028
29.0M
  }
10029
10030
66.1M
  GROW;
10031
66.1M
  SHRINK;
10032
66.1M
    }
10033
479k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
312k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
312k
    int nameNr = ctxt->nameNr;
10047
10048
312k
    xmlParseContentInternal(ctxt);
10049
10050
312k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
6.48k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
6.48k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
6.48k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
6.48k
                "Premature end of data in tag %s line %d\n",
10055
6.48k
    name, line, NULL);
10056
6.48k
    }
10057
312k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
246k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
246k
    if (xmlParseElementStart(ctxt) != 0)
10078
79.8k
        return;
10079
10080
166k
    xmlParseContentInternal(ctxt);
10081
166k
    if (ctxt->instate == XML_PARSER_EOF)
10082
1.05k
  return;
10083
10084
165k
    if (CUR == 0) {
10085
86.1k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
86.1k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
86.1k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
86.1k
                "Premature end of data in tag %s line %d\n",
10089
86.1k
    name, line, NULL);
10090
86.1k
        return;
10091
86.1k
    }
10092
10093
79.7k
    xmlParseElementEnd(ctxt);
10094
79.7k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
18.8M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
18.8M
    const xmlChar *name;
10108
18.8M
    const xmlChar *prefix = NULL;
10109
18.8M
    const xmlChar *URI = NULL;
10110
18.8M
    xmlParserNodeInfo node_info;
10111
18.8M
    int line, tlen = 0;
10112
18.8M
    xmlNodePtr ret;
10113
18.8M
    int nsNr = ctxt->nsNr;
10114
10115
18.8M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
18.8M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
183
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
183
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
183
        xmlParserMaxDepth);
10120
183
  xmlHaltParser(ctxt);
10121
183
  return(-1);
10122
183
    }
10123
10124
    /* Capture start position */
10125
18.8M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
18.8M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
18.8M
    else if (*ctxt->space == -2)
10134
1.92M
  spacePush(ctxt, -1);
10135
16.9M
    else
10136
16.9M
  spacePush(ctxt, *ctxt->space);
10137
10138
18.8M
    line = ctxt->input->line;
10139
18.8M
#ifdef LIBXML_SAX1_ENABLED
10140
18.8M
    if (ctxt->sax2)
10141
11.9M
#endif /* LIBXML_SAX1_ENABLED */
10142
11.9M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
6.94M
#ifdef LIBXML_SAX1_ENABLED
10144
6.94M
    else
10145
6.94M
  name = xmlParseStartTag(ctxt);
10146
18.8M
#endif /* LIBXML_SAX1_ENABLED */
10147
18.8M
    if (ctxt->instate == XML_PARSER_EOF)
10148
743
  return(-1);
10149
18.8M
    if (name == NULL) {
10150
965k
  spacePop(ctxt);
10151
965k
        return(-1);
10152
965k
    }
10153
17.8M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
17.8M
    ret = ctxt->node;
10155
10156
17.8M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
17.8M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
17.8M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
17.8M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
17.8M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
5.73M
        SKIP(2);
10172
5.73M
  if (ctxt->sax2) {
10173
3.73M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
3.73M
    (!ctxt->disableSAX))
10175
2.56M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
3.73M
#ifdef LIBXML_SAX1_ENABLED
10177
3.73M
  } else {
10178
1.99M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
1.99M
    (!ctxt->disableSAX))
10180
1.54M
    ctxt->sax->endElement(ctxt->userData, name);
10181
1.99M
#endif /* LIBXML_SAX1_ENABLED */
10182
1.99M
  }
10183
5.73M
  namePop(ctxt);
10184
5.73M
  spacePop(ctxt);
10185
5.73M
  if (nsNr != ctxt->nsNr)
10186
11.5k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
5.73M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
5.73M
  return(1);
10195
5.73M
    }
10196
12.1M
    if (RAW == '>') {
10197
11.4M
        NEXT1;
10198
11.4M
    } else {
10199
715k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
715k
         "Couldn't find end of Start Tag %s line %d\n",
10201
715k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
715k
  nodePop(ctxt);
10207
715k
  namePop(ctxt);
10208
715k
  spacePop(ctxt);
10209
715k
  if (nsNr != ctxt->nsNr)
10210
69.4k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
715k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
715k
  return(-1);
10223
715k
    }
10224
10225
11.4M
    return(0);
10226
12.1M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
10.8M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
10.8M
    xmlParserNodeInfo node_info;
10237
10.8M
    xmlNodePtr ret = ctxt->node;
10238
10239
10.8M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
10.8M
    if (ctxt->sax2) {
10249
6.75M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
6.75M
  namePop(ctxt);
10251
6.75M
    }
10252
4.07M
#ifdef LIBXML_SAX1_ENABLED
10253
4.07M
    else
10254
4.07M
  xmlParseEndTag1(ctxt, 0);
10255
10.8M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
10.8M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
10.8M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
554k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
554k
    xmlChar *buf = NULL;
10286
554k
    int len = 0;
10287
554k
    int size = 10;
10288
554k
    xmlChar cur;
10289
10290
554k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
554k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
554k
    cur = CUR;
10296
554k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
5.84k
  xmlFree(buf);
10298
5.84k
  return(NULL);
10299
5.84k
    }
10300
548k
    buf[len++] = cur;
10301
548k
    NEXT;
10302
548k
    cur=CUR;
10303
548k
    if (cur != '.') {
10304
5.92k
  xmlFree(buf);
10305
5.92k
  return(NULL);
10306
5.92k
    }
10307
542k
    buf[len++] = cur;
10308
542k
    NEXT;
10309
542k
    cur=CUR;
10310
2.52M
    while ((cur >= '0') && (cur <= '9')) {
10311
1.98M
  if (len + 1 >= size) {
10312
4.19k
      xmlChar *tmp;
10313
10314
4.19k
      size *= 2;
10315
4.19k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
4.19k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
4.19k
      buf = tmp;
10322
4.19k
  }
10323
1.98M
  buf[len++] = cur;
10324
1.98M
  NEXT;
10325
1.98M
  cur=CUR;
10326
1.98M
    }
10327
542k
    buf[len] = 0;
10328
542k
    return(buf);
10329
542k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
630k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
630k
    xmlChar *version = NULL;
10349
10350
630k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
568k
  SKIP(7);
10352
568k
  SKIP_BLANKS;
10353
568k
  if (RAW != '=') {
10354
6.00k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
6.00k
      return(NULL);
10356
6.00k
        }
10357
562k
  NEXT;
10358
562k
  SKIP_BLANKS;
10359
562k
  if (RAW == '"') {
10360
520k
      NEXT;
10361
520k
      version = xmlParseVersionNum(ctxt);
10362
520k
      if (RAW != '"') {
10363
25.0k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
25.0k
      } else
10365
495k
          NEXT;
10366
520k
  } else if (RAW == '\''){
10367
34.2k
      NEXT;
10368
34.2k
      version = xmlParseVersionNum(ctxt);
10369
34.2k
      if (RAW != '\'') {
10370
3.50k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
3.50k
      } else
10372
30.7k
          NEXT;
10373
34.2k
  } else {
10374
8.45k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
8.45k
  }
10376
562k
    }
10377
624k
    return(version);
10378
630k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
306k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
306k
    xmlChar *buf = NULL;
10395
306k
    int len = 0;
10396
306k
    int size = 10;
10397
306k
    xmlChar cur;
10398
10399
306k
    cur = CUR;
10400
306k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
306k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
305k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
305k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
305k
  buf[len++] = cur;
10409
305k
  NEXT;
10410
305k
  cur = CUR;
10411
3.93M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
3.93M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
3.93M
         ((cur >= '0') && (cur <= '9')) ||
10414
3.93M
         (cur == '.') || (cur == '_') ||
10415
3.93M
         (cur == '-')) {
10416
3.63M
      if (len + 1 >= size) {
10417
141k
          xmlChar *tmp;
10418
10419
141k
    size *= 2;
10420
141k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
141k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
141k
    buf = tmp;
10427
141k
      }
10428
3.63M
      buf[len++] = cur;
10429
3.63M
      NEXT;
10430
3.63M
      cur = CUR;
10431
3.63M
      if (cur == 0) {
10432
964
          SHRINK;
10433
964
    GROW;
10434
964
    cur = CUR;
10435
964
      }
10436
3.63M
        }
10437
305k
  buf[len] = 0;
10438
305k
    } else {
10439
1.05k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
1.05k
    }
10441
306k
    return(buf);
10442
306k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
468k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
468k
    xmlChar *encoding = NULL;
10462
10463
468k
    SKIP_BLANKS;
10464
468k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
309k
  SKIP(8);
10466
309k
  SKIP_BLANKS;
10467
309k
  if (RAW != '=') {
10468
1.68k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
1.68k
      return(NULL);
10470
1.68k
        }
10471
308k
  NEXT;
10472
308k
  SKIP_BLANKS;
10473
308k
  if (RAW == '"') {
10474
293k
      NEXT;
10475
293k
      encoding = xmlParseEncName(ctxt);
10476
293k
      if (RAW != '"') {
10477
9.45k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
9.45k
    xmlFree((xmlChar *) encoding);
10479
9.45k
    return(NULL);
10480
9.45k
      } else
10481
283k
          NEXT;
10482
293k
  } else if (RAW == '\''){
10483
13.6k
      NEXT;
10484
13.6k
      encoding = xmlParseEncName(ctxt);
10485
13.6k
      if (RAW != '\'') {
10486
461
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
461
    xmlFree((xmlChar *) encoding);
10488
461
    return(NULL);
10489
461
      } else
10490
13.1k
          NEXT;
10491
13.6k
  } else {
10492
1.41k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
1.41k
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
298k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
81.0k
      xmlFree((xmlChar *) encoding);
10500
81.0k
            return(NULL);
10501
81.0k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
217k
        if ((encoding != NULL) &&
10508
217k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
216k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
3.16k
      if ((ctxt->encoding == NULL) &&
10517
3.16k
          (ctxt->input->buf != NULL) &&
10518
3.16k
          (ctxt->input->buf->encoder == NULL)) {
10519
3.15k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
3.15k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
3.15k
      }
10522
3.16k
      if (ctxt->encoding != NULL)
10523
10
    xmlFree((xmlChar *) ctxt->encoding);
10524
3.16k
      ctxt->encoding = encoding;
10525
3.16k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
214k
        else if ((encoding != NULL) &&
10530
214k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
213k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
94.2k
      if (ctxt->encoding != NULL)
10533
56
    xmlFree((xmlChar *) ctxt->encoding);
10534
94.2k
      ctxt->encoding = encoding;
10535
94.2k
  }
10536
119k
  else if (encoding != NULL) {
10537
118k
      xmlCharEncodingHandlerPtr handler;
10538
10539
118k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
118k
      ctxt->input->encoding = encoding;
10542
10543
118k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
118k
      if (handler != NULL) {
10545
117k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
223
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
223
        return(NULL);
10549
223
    }
10550
117k
      } else {
10551
1.44k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
1.44k
      "Unsupported encoding %s\n", encoding);
10553
1.44k
    return(NULL);
10554
1.44k
      }
10555
118k
  }
10556
217k
    }
10557
374k
    return(encoding);
10558
468k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
340k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
340k
    int standalone = -2;
10596
10597
340k
    SKIP_BLANKS;
10598
340k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
37.9k
  SKIP(10);
10600
37.9k
        SKIP_BLANKS;
10601
37.9k
  if (RAW != '=') {
10602
261
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
261
      return(standalone);
10604
261
        }
10605
37.6k
  NEXT;
10606
37.6k
  SKIP_BLANKS;
10607
37.6k
        if (RAW == '\''){
10608
15.6k
      NEXT;
10609
15.6k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
10.2k
          standalone = 0;
10611
10.2k
                SKIP(2);
10612
10.2k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
5.41k
                 (NXT(2) == 's')) {
10614
4.88k
          standalone = 1;
10615
4.88k
    SKIP(3);
10616
4.88k
            } else {
10617
531
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
531
      }
10619
15.6k
      if (RAW != '\'') {
10620
825
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
825
      } else
10622
14.8k
          NEXT;
10623
22.0k
  } else if (RAW == '"'){
10624
21.7k
      NEXT;
10625
21.7k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
10.9k
          standalone = 0;
10627
10.9k
    SKIP(2);
10628
10.9k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
10.8k
                 (NXT(2) == 's')) {
10630
9.81k
          standalone = 1;
10631
9.81k
                SKIP(3);
10632
9.81k
            } else {
10633
1.02k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
1.02k
      }
10635
21.7k
      if (RAW != '"') {
10636
1.38k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
1.38k
      } else
10638
20.4k
          NEXT;
10639
21.7k
  } else {
10640
240
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
240
        }
10642
37.6k
    }
10643
339k
    return(standalone);
10644
340k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
601k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
601k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
601k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
601k
    SKIP(5);
10672
10673
601k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
601k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
601k
    version = xmlParseVersionInfo(ctxt);
10683
601k
    if (version == NULL) {
10684
84.6k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
517k
    } else {
10686
517k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
10.3k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
2.30k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
2.30k
                "Unsupported version '%s'\n",
10693
2.30k
                version);
10694
8.08k
      } else {
10695
8.08k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
6.84k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
6.84k
                      "Unsupported version '%s'\n",
10698
6.84k
          version, NULL);
10699
6.84k
    } else {
10700
1.24k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
1.24k
              "Unsupported version '%s'\n",
10702
1.24k
              version);
10703
1.24k
    }
10704
8.08k
      }
10705
10.3k
  }
10706
517k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
517k
  ctxt->version = version;
10709
517k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
601k
    if (!IS_BLANK_CH(RAW)) {
10715
262k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
161k
      SKIP(2);
10717
161k
      return;
10718
161k
  }
10719
100k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
100k
    }
10721
440k
    xmlParseEncodingDecl(ctxt);
10722
440k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
440k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
1.16k
        return;
10728
1.16k
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
438k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
103k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
98.9k
      SKIP(2);
10736
98.9k
      return;
10737
98.9k
  }
10738
4.10k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
4.10k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
340k
    GROW;
10745
10746
340k
    SKIP_BLANKS;
10747
340k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
340k
    SKIP_BLANKS;
10750
340k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
195k
        SKIP(2);
10752
195k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
1.60k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
1.60k
  NEXT;
10756
143k
    } else {
10757
143k
        int c;
10758
10759
143k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
6.21M
        while ((c = CUR) != 0) {
10761
6.19M
            NEXT;
10762
6.19M
            if (c == '>')
10763
129k
                break;
10764
6.19M
        }
10765
143k
    }
10766
340k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
747k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
895k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
895k
        SKIP_BLANKS;
10783
895k
        GROW;
10784
895k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
102k
      xmlParsePI(ctxt);
10786
793k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
45.5k
      xmlParseComment(ctxt);
10788
747k
        } else {
10789
747k
            break;
10790
747k
        }
10791
895k
    }
10792
747k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
344k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
344k
    xmlChar start[4];
10812
344k
    xmlCharEncoding enc;
10813
10814
344k
    xmlInitParser();
10815
10816
344k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
344k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
344k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
344k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
344k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
344k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
344k
    if ((ctxt->encoding == NULL) &&
10835
344k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
339k
  start[0] = RAW;
10842
339k
  start[1] = NXT(1);
10843
339k
  start[2] = NXT(2);
10844
339k
  start[3] = NXT(3);
10845
339k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
339k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
213k
      xmlSwitchEncoding(ctxt, enc);
10848
213k
  }
10849
339k
    }
10850
10851
10852
344k
    if (CUR == 0) {
10853
1.92k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
1.92k
  return(-1);
10855
1.92k
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
342k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
19.6k
       GROW;
10865
19.6k
    }
10866
342k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
201k
  xmlParseXMLDecl(ctxt);
10872
201k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
201k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
412
      return(-1);
10878
412
  }
10879
201k
  ctxt->standalone = ctxt->input->standalone;
10880
201k
  SKIP_BLANKS;
10881
201k
    } else {
10882
140k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
140k
    }
10884
342k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
318k
        ctxt->sax->startDocument(ctxt->userData);
10886
342k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
342k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
342k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
342k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
342k
    GROW;
10903
342k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
225k
  ctxt->inSubset = 1;
10906
225k
  xmlParseDocTypeDecl(ctxt);
10907
225k
  if (RAW == '[') {
10908
174k
      ctxt->instate = XML_PARSER_DTD;
10909
174k
      xmlParseInternalSubset(ctxt);
10910
174k
      if (ctxt->instate == XML_PARSER_EOF)
10911
48.4k
    return(-1);
10912
174k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
177k
  ctxt->inSubset = 2;
10918
177k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
177k
      (!ctxt->disableSAX))
10920
163k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
163k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
177k
  if (ctxt->instate == XML_PARSER_EOF)
10923
18.0k
      return(-1);
10924
158k
  ctxt->inSubset = 0;
10925
10926
158k
        xmlCleanSpecialAttr(ctxt);
10927
10928
158k
  ctxt->instate = XML_PARSER_PROLOG;
10929
158k
  xmlParseMisc(ctxt);
10930
158k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
275k
    GROW;
10936
275k
    if (RAW != '<') {
10937
28.7k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
28.7k
           "Start tag expected, '<' not found\n");
10939
246k
    } else {
10940
246k
  ctxt->instate = XML_PARSER_CONTENT;
10941
246k
  xmlParseElement(ctxt);
10942
246k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
246k
  xmlParseMisc(ctxt);
10949
10950
246k
  if (RAW != 0) {
10951
69.0k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
69.0k
  }
10953
246k
  ctxt->instate = XML_PARSER_EOF;
10954
246k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
275k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
275k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
275k
    if ((ctxt->myDoc != NULL) &&
10966
275k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
2.93k
  xmlFreeDoc(ctxt->myDoc);
10968
2.93k
  ctxt->myDoc = NULL;
10969
2.93k
    }
10970
10971
275k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
36.2k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
36.2k
  if (ctxt->valid)
10974
21.9k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
36.2k
  if (ctxt->nsWellFormed)
10976
34.7k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
36.2k
  if (ctxt->options & XML_PARSE_OLD10)
10978
6.00k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
36.2k
    }
10980
275k
    if (! ctxt->wellFormed) {
10981
239k
  ctxt->valid = 0;
10982
239k
  return(-1);
10983
239k
    }
10984
36.2k
    return(0);
10985
275k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
20.2M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
20.2M
    const xmlChar *cur;
11110
11111
20.2M
    if (ctxt->checkIndex == 0) {
11112
19.6M
        cur = ctxt->input->cur + 1;
11113
19.6M
    } else {
11114
586k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
586k
    }
11116
11117
20.2M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
610k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
610k
        return(0);
11120
19.6M
    } else {
11121
19.6M
        ctxt->checkIndex = 0;
11122
19.6M
        return(1);
11123
19.6M
    }
11124
20.2M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
3.70M
                     const char *str, size_t strLen) {
11138
3.70M
    const xmlChar *cur, *term;
11139
11140
3.70M
    if (ctxt->checkIndex == 0) {
11141
2.17M
        cur = ctxt->input->cur + startDelta;
11142
2.17M
    } else {
11143
1.53M
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
1.53M
    }
11145
11146
3.70M
    term = BAD_CAST strstr((const char *) cur, str);
11147
3.70M
    if (term == NULL) {
11148
1.85M
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
1.85M
        if ((size_t) (end - cur) < strLen)
11152
50.0k
            end = cur;
11153
1.80M
        else
11154
1.80M
            end -= strLen - 1;
11155
1.85M
        ctxt->checkIndex = end - ctxt->input->cur;
11156
1.85M
    } else {
11157
1.84M
        ctxt->checkIndex = 0;
11158
1.84M
    }
11159
11160
3.70M
    return(term);
11161
3.70M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
31.9M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
31.9M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
31.9M
    const xmlChar *end = ctxt->input->end;
11173
11174
650M
    while (cur < end) {
11175
646M
        if ((*cur == '<') || (*cur == '&')) {
11176
27.7M
            ctxt->checkIndex = 0;
11177
27.7M
            return(1);
11178
27.7M
        }
11179
618M
        cur++;
11180
618M
    }
11181
11182
4.13M
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
4.13M
    return(0);
11184
31.9M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
26.5M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
26.5M
    const xmlChar *cur;
11196
26.5M
    const xmlChar *end = ctxt->input->end;
11197
26.5M
    int state = ctxt->endCheckState;
11198
11199
26.5M
    if (ctxt->checkIndex == 0)
11200
21.6M
        cur = ctxt->input->cur + 1;
11201
4.93M
    else
11202
4.93M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
996M
    while (cur < end) {
11205
991M
        if (state) {
11206
518M
            if (*cur == state)
11207
23.9M
                state = 0;
11208
518M
        } else if (*cur == '\'' || *cur == '"') {
11209
24.0M
            state = *cur;
11210
449M
        } else if (*cur == '>') {
11211
21.4M
            ctxt->checkIndex = 0;
11212
21.4M
            ctxt->endCheckState = 0;
11213
21.4M
            return(1);
11214
21.4M
        }
11215
970M
        cur++;
11216
970M
    }
11217
11218
5.08M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
5.08M
    ctxt->endCheckState = state;
11220
5.08M
    return(0);
11221
26.5M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
3.06M
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
3.06M
    const xmlChar *cur, *start;
11240
3.06M
    const xmlChar *end = ctxt->input->end;
11241
3.06M
    int state = ctxt->endCheckState;
11242
11243
3.06M
    if (ctxt->checkIndex == 0) {
11244
309k
        cur = ctxt->input->cur + 1;
11245
2.75M
    } else {
11246
2.75M
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
2.75M
    }
11248
3.06M
    start = cur;
11249
11250
590M
    while (cur < end) {
11251
587M
        if (state == '-') {
11252
166M
            if ((*cur == '-') &&
11253
166M
                (cur[1] == '-') &&
11254
166M
                (cur[2] == '>')) {
11255
4.16M
                state = 0;
11256
4.16M
                cur += 3;
11257
4.16M
                start = cur;
11258
4.16M
                continue;
11259
4.16M
            }
11260
166M
        }
11261
421M
        else if (state == ']') {
11262
349k
            if (*cur == '>') {
11263
270k
                ctxt->checkIndex = 0;
11264
270k
                ctxt->endCheckState = 0;
11265
270k
                return(1);
11266
270k
            }
11267
79.1k
            if (IS_BLANK_CH(*cur)) {
11268
21.9k
                state = ' ';
11269
57.2k
            } else if (*cur != ']') {
11270
7.14k
                state = 0;
11271
7.14k
                start = cur;
11272
7.14k
                continue;
11273
7.14k
            }
11274
79.1k
        }
11275
421M
        else if (state == ' ') {
11276
61.9k
            if (*cur == '>') {
11277
365
                ctxt->checkIndex = 0;
11278
365
                ctxt->endCheckState = 0;
11279
365
                return(1);
11280
365
            }
11281
61.5k
            if (!IS_BLANK_CH(*cur)) {
11282
21.4k
                state = 0;
11283
21.4k
                start = cur;
11284
21.4k
                continue;
11285
21.4k
            }
11286
61.5k
        }
11287
421M
        else if (state != 0) {
11288
199M
            if (*cur == state) {
11289
5.78M
                state = 0;
11290
5.78M
                start = cur + 1;
11291
5.78M
            }
11292
199M
        }
11293
222M
        else if (*cur == '<') {
11294
11.3M
            if ((cur[1] == '!') &&
11295
11.3M
                (cur[2] == '-') &&
11296
11.3M
                (cur[3] == '-')) {
11297
4.16M
                state = '-';
11298
4.16M
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
4.16M
                start = cur;
11301
4.16M
                continue;
11302
4.16M
            }
11303
11.3M
        }
11304
210M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
6.10M
            state = *cur;
11306
6.10M
        }
11307
11308
579M
        cur++;
11309
579M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
2.79M
    if ((state == 0) || (state == '-')) {
11316
1.83M
        if (cur - start < 3)
11317
276k
            cur = start;
11318
1.55M
        else
11319
1.55M
            cur -= 3;
11320
1.83M
    }
11321
2.79M
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
2.79M
    ctxt->endCheckState = state;
11323
2.79M
    return(0);
11324
3.06M
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
573k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
573k
    int ix;
11340
573k
    unsigned char c;
11341
573k
    int codepoint;
11342
11343
573k
    if ((utf == NULL) || (len <= 0))
11344
14.7k
        return(0);
11345
11346
39.9M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
39.7M
        c = utf[ix];
11348
39.7M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
35.2M
      if (c >= 0x20)
11350
32.2M
    ix++;
11351
2.95M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
2.90M
          ix++;
11353
56.3k
      else
11354
56.3k
          return(-ix);
11355
35.2M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
1.29M
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
1.29M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
167k
          return(-ix);
11359
1.12M
      codepoint = (utf[ix] & 0x1f) << 6;
11360
1.12M
      codepoint |= utf[ix+1] & 0x3f;
11361
1.12M
      if (!xmlIsCharQ(codepoint))
11362
10.4k
          return(-ix);
11363
1.11M
      ix += 2;
11364
3.19M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
504k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
495k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
495k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
21.5k
        return(-ix);
11369
473k
      codepoint = (utf[ix] & 0xf) << 12;
11370
473k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
473k
      codepoint |= utf[ix+2] & 0x3f;
11372
473k
      if (!xmlIsCharQ(codepoint))
11373
10.1k
          return(-ix);
11374
463k
      ix += 3;
11375
2.69M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
2.65M
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
2.63M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
2.63M
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
2.63M
    ((utf[ix+3] & 0xc0) != 0x80))
11380
37.8k
        return(-ix);
11381
2.59M
      codepoint = (utf[ix] & 0x7) << 18;
11382
2.59M
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
2.59M
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
2.59M
      codepoint |= utf[ix+3] & 0x3f;
11385
2.59M
      if (!xmlIsCharQ(codepoint))
11386
10.8k
          return(-ix);
11387
2.58M
      ix += 4;
11388
2.58M
  } else       /* unknown encoding */
11389
42.4k
      return(-ix);
11390
39.7M
      }
11391
171k
      return(ix);
11392
559k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
16.3M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
16.3M
    int ret = 0;
11406
16.3M
    int avail, tlen;
11407
16.3M
    xmlChar cur, next;
11408
11409
16.3M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
16.3M
    if ((ctxt->input != NULL) &&
11466
16.3M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
318k
        xmlParserInputShrink(ctxt->input);
11468
318k
    }
11469
11470
145M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
145M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
202k
      return(0);
11473
11474
145M
  if (ctxt->input == NULL) break;
11475
145M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
145M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
145M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
145M
          (ctxt->input->buf->raw != NULL) &&
11488
145M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
329k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
329k
                                                 ctxt->input);
11491
329k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
329k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
329k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
329k
                                      base, current);
11496
329k
      }
11497
145M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
145M
        (ctxt->input->cur - ctxt->input->base);
11499
145M
  }
11500
145M
        if (avail < 1)
11501
736k
      goto done;
11502
144M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
1.62M
            case XML_PARSER_START:
11509
1.62M
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
489k
        xmlChar start[4];
11511
489k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
489k
        if (avail < 4)
11517
24.3k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
465k
        start[0] = RAW;
11527
465k
        start[1] = NXT(1);
11528
465k
        start[2] = NXT(2);
11529
465k
        start[3] = NXT(3);
11530
465k
        enc = xmlDetectCharEncoding(start, 4);
11531
465k
        xmlSwitchEncoding(ctxt, enc);
11532
465k
        break;
11533
489k
    }
11534
11535
1.13M
    if (avail < 2)
11536
265
        goto done;
11537
1.13M
    cur = ctxt->input->cur[0];
11538
1.13M
    next = ctxt->input->cur[1];
11539
1.13M
    if (cur == 0) {
11540
3.09k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
3.09k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
3.09k
                  &xmlDefaultSAXLocator);
11543
3.09k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
3.09k
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
3.09k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
3.09k
      ctxt->sax->endDocument(ctxt->userData);
11551
3.09k
        goto done;
11552
3.09k
    }
11553
1.13M
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
896k
        if (avail < 5) goto done;
11556
895k
        if ((!terminate) &&
11557
895k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
464k
      goto done;
11559
431k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
431k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
431k
                  &xmlDefaultSAXLocator);
11562
431k
        if ((ctxt->input->cur[2] == 'x') &&
11563
431k
      (ctxt->input->cur[3] == 'm') &&
11564
431k
      (ctxt->input->cur[4] == 'l') &&
11565
431k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
399k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
399k
      xmlParseXMLDecl(ctxt);
11572
399k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
748
          xmlHaltParser(ctxt);
11578
748
          return(0);
11579
748
      }
11580
399k
      ctxt->standalone = ctxt->input->standalone;
11581
399k
      if ((ctxt->encoding == NULL) &&
11582
399k
          (ctxt->input->encoding != NULL))
11583
75.9k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
399k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
399k
          (!ctxt->disableSAX))
11586
354k
          ctxt->sax->startDocument(ctxt->userData);
11587
399k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
399k
        } else {
11593
31.7k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
31.7k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
31.7k
          (!ctxt->disableSAX))
11596
31.7k
          ctxt->sax->startDocument(ctxt->userData);
11597
31.7k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
31.7k
        }
11603
431k
    } else {
11604
239k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
239k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
239k
                  &xmlDefaultSAXLocator);
11607
239k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
239k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
239k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
239k
            (!ctxt->disableSAX))
11614
239k
      ctxt->sax->startDocument(ctxt->userData);
11615
239k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
239k
    }
11621
669k
    break;
11622
28.2M
            case XML_PARSER_START_TAG: {
11623
28.2M
          const xmlChar *name;
11624
28.2M
    const xmlChar *prefix = NULL;
11625
28.2M
    const xmlChar *URI = NULL;
11626
28.2M
                int line = ctxt->input->line;
11627
28.2M
    int nsNr = ctxt->nsNr;
11628
11629
28.2M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
28.2M
    cur = ctxt->input->cur[0];
11632
28.2M
          if (cur != '<') {
11633
21.9k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
21.9k
        xmlHaltParser(ctxt);
11635
21.9k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
21.9k
      ctxt->sax->endDocument(ctxt->userData);
11637
21.9k
        goto done;
11638
21.9k
    }
11639
28.2M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
4.72M
                    goto done;
11641
23.5M
    if (ctxt->spaceNr == 0)
11642
104k
        spacePush(ctxt, -1);
11643
23.3M
    else if (*ctxt->space == -2)
11644
1.93M
        spacePush(ctxt, -1);
11645
21.4M
    else
11646
21.4M
        spacePush(ctxt, *ctxt->space);
11647
23.5M
#ifdef LIBXML_SAX1_ENABLED
11648
23.5M
    if (ctxt->sax2)
11649
13.9M
#endif /* LIBXML_SAX1_ENABLED */
11650
13.9M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
9.55M
#ifdef LIBXML_SAX1_ENABLED
11652
9.55M
    else
11653
9.55M
        name = xmlParseStartTag(ctxt);
11654
23.5M
#endif /* LIBXML_SAX1_ENABLED */
11655
23.5M
    if (ctxt->instate == XML_PARSER_EOF)
11656
1.38k
        goto done;
11657
23.4M
    if (name == NULL) {
11658
28.2k
        spacePop(ctxt);
11659
28.2k
        xmlHaltParser(ctxt);
11660
28.2k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
28.2k
      ctxt->sax->endDocument(ctxt->userData);
11662
28.2k
        goto done;
11663
28.2k
    }
11664
23.4M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
23.4M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
23.4M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
23.4M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
23.4M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
7.72M
        SKIP(2);
11680
11681
7.72M
        if (ctxt->sax2) {
11682
4.84M
      if ((ctxt->sax != NULL) &&
11683
4.84M
          (ctxt->sax->endElementNs != NULL) &&
11684
4.84M
          (!ctxt->disableSAX))
11685
4.84M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
4.84M
                                  prefix, URI);
11687
4.84M
      if (ctxt->nsNr - nsNr > 0)
11688
14.5k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
4.84M
#ifdef LIBXML_SAX1_ENABLED
11690
4.84M
        } else {
11691
2.87M
      if ((ctxt->sax != NULL) &&
11692
2.87M
          (ctxt->sax->endElement != NULL) &&
11693
2.87M
          (!ctxt->disableSAX))
11694
2.87M
          ctxt->sax->endElement(ctxt->userData, name);
11695
2.87M
#endif /* LIBXML_SAX1_ENABLED */
11696
2.87M
        }
11697
7.72M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
7.72M
        spacePop(ctxt);
11700
7.72M
        if (ctxt->nameNr == 0) {
11701
18.7k
      ctxt->instate = XML_PARSER_EPILOG;
11702
7.70M
        } else {
11703
7.70M
      ctxt->instate = XML_PARSER_CONTENT;
11704
7.70M
        }
11705
7.72M
        break;
11706
7.72M
    }
11707
15.7M
    if (RAW == '>') {
11708
14.7M
        NEXT;
11709
14.7M
    } else {
11710
975k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
975k
           "Couldn't find end of Start Tag %s\n",
11712
975k
           name);
11713
975k
        nodePop(ctxt);
11714
975k
        spacePop(ctxt);
11715
975k
    }
11716
15.7M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
15.7M
    ctxt->instate = XML_PARSER_CONTENT;
11719
15.7M
                break;
11720
23.4M
      }
11721
94.5M
            case XML_PARSER_CONTENT: {
11722
94.5M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
422k
        goto done;
11724
94.1M
    cur = ctxt->input->cur[0];
11725
94.1M
    next = ctxt->input->cur[1];
11726
11727
94.1M
    if ((cur == '<') && (next == '/')) {
11728
13.9M
        ctxt->instate = XML_PARSER_END_TAG;
11729
13.9M
        break;
11730
80.1M
          } else if ((cur == '<') && (next == '?')) {
11731
282k
        if ((!terminate) &&
11732
282k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
116k
      goto done;
11734
165k
        xmlParsePI(ctxt);
11735
165k
        ctxt->instate = XML_PARSER_CONTENT;
11736
79.8M
    } else if ((cur == '<') && (next != '!')) {
11737
23.0M
        ctxt->instate = XML_PARSER_START_TAG;
11738
23.0M
        break;
11739
56.7M
    } else if ((cur == '<') && (next == '!') &&
11740
56.7M
               (ctxt->input->cur[2] == '-') &&
11741
56.7M
         (ctxt->input->cur[3] == '-')) {
11742
1.51M
        if ((!terminate) &&
11743
1.51M
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
608k
      goto done;
11745
911k
        xmlParseComment(ctxt);
11746
911k
        ctxt->instate = XML_PARSER_CONTENT;
11747
55.2M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
55.2M
        (ctxt->input->cur[2] == '[') &&
11749
55.2M
        (ctxt->input->cur[3] == 'C') &&
11750
55.2M
        (ctxt->input->cur[4] == 'D') &&
11751
55.2M
        (ctxt->input->cur[5] == 'A') &&
11752
55.2M
        (ctxt->input->cur[6] == 'T') &&
11753
55.2M
        (ctxt->input->cur[7] == 'A') &&
11754
55.2M
        (ctxt->input->cur[8] == '[')) {
11755
76.9k
        SKIP(9);
11756
76.9k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
76.9k
        break;
11758
55.1M
    } else if ((cur == '<') && (next == '!') &&
11759
55.1M
               (avail < 9)) {
11760
36.4k
        goto done;
11761
55.1M
    } else if (cur == '<') {
11762
561k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
561k
                    "detected an error in element content\n");
11764
561k
                    SKIP(1);
11765
54.5M
    } else if (cur == '&') {
11766
10.4M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
210k
      goto done;
11768
10.2M
        xmlParseReference(ctxt);
11769
44.1M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
44.1M
        if ((ctxt->inputNr == 1) &&
11783
44.1M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
32.9M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
4.13M
          goto done;
11786
32.9M
                    }
11787
40.0M
                    ctxt->checkIndex = 0;
11788
40.0M
        xmlParseCharData(ctxt, 0);
11789
40.0M
    }
11790
51.8M
    break;
11791
94.1M
      }
11792
51.8M
            case XML_PARSER_END_TAG:
11793
14.3M
    if (avail < 2)
11794
0
        goto done;
11795
14.3M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
399k
        goto done;
11797
13.9M
    if (ctxt->sax2) {
11798
8.10M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
8.10M
        nameNsPop(ctxt);
11800
8.10M
    }
11801
5.88M
#ifdef LIBXML_SAX1_ENABLED
11802
5.88M
      else
11803
5.88M
        xmlParseEndTag1(ctxt, 0);
11804
13.9M
#endif /* LIBXML_SAX1_ENABLED */
11805
13.9M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
13.9M
    } else if (ctxt->nameNr == 0) {
11808
93.5k
        ctxt->instate = XML_PARSER_EPILOG;
11809
13.8M
    } else {
11810
13.8M
        ctxt->instate = XML_PARSER_CONTENT;
11811
13.8M
    }
11812
13.9M
    break;
11813
854k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
854k
    const xmlChar *term;
11819
11820
854k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
16.0k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
16.0k
                                           "]]>");
11827
838k
                } else {
11828
838k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
838k
                }
11830
11831
854k
    if (term == NULL) {
11832
554k
        int tmp, size;
11833
11834
554k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
5.63k
                        size = ctxt->input->end - ctxt->input->cur;
11837
548k
                    } else {
11838
548k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
280k
                            goto done;
11840
267k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
267k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
267k
                    }
11844
273k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
273k
                    if (tmp <= 0) {
11846
140k
                        tmp = -tmp;
11847
140k
                        ctxt->input->cur += tmp;
11848
140k
                        goto encoding_error;
11849
140k
                    }
11850
132k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
132k
                        if (ctxt->sax->cdataBlock != NULL)
11852
68.9k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
68.9k
                                                  ctxt->input->cur, tmp);
11854
63.6k
                        else if (ctxt->sax->characters != NULL)
11855
63.6k
                            ctxt->sax->characters(ctxt->userData,
11856
63.6k
                                                  ctxt->input->cur, tmp);
11857
132k
                    }
11858
132k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
132k
                    SKIPL(tmp);
11861
300k
    } else {
11862
300k
                    int base = term - CUR_PTR;
11863
300k
        int tmp;
11864
11865
300k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
300k
        if ((tmp < 0) || (tmp != base)) {
11867
239k
      tmp = -tmp;
11868
239k
      ctxt->input->cur += tmp;
11869
239k
      goto encoding_error;
11870
239k
        }
11871
61.5k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
61.5k
            (ctxt->sax->cdataBlock != NULL) &&
11873
61.5k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
7.30k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
7.30k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
7.30k
                     "<![CDATA[", 9)))
11882
7.27k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
7.27k
                                 BAD_CAST "", 0);
11884
54.2k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
54.2k
      (!ctxt->disableSAX)) {
11886
46.7k
      if (ctxt->sax->cdataBlock != NULL)
11887
29.9k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
29.9k
              ctxt->input->cur, base);
11889
16.8k
      else if (ctxt->sax->characters != NULL)
11890
16.8k
          ctxt->sax->characters(ctxt->userData,
11891
16.8k
              ctxt->input->cur, base);
11892
46.7k
        }
11893
61.5k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
61.5k
        SKIPL(base + 3);
11896
61.5k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
61.5k
    }
11902
194k
    break;
11903
854k
      }
11904
1.17M
            case XML_PARSER_MISC:
11905
1.64M
            case XML_PARSER_PROLOG:
11906
1.76M
            case XML_PARSER_EPILOG:
11907
1.76M
    SKIP_BLANKS;
11908
1.76M
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
1.76M
    else
11912
1.76M
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
1.76M
                (ctxt->input->cur - ctxt->input->base);
11914
1.76M
    if (avail < 2)
11915
99.2k
        goto done;
11916
1.67M
    cur = ctxt->input->cur[0];
11917
1.67M
    next = ctxt->input->cur[1];
11918
1.67M
          if ((cur == '<') && (next == '?')) {
11919
221k
        if ((!terminate) &&
11920
221k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
55.6k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
165k
        xmlParsePI(ctxt);
11927
165k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
1.44M
    } else if ((cur == '<') && (next == '!') &&
11930
1.44M
        (ctxt->input->cur[2] == '-') &&
11931
1.44M
        (ctxt->input->cur[3] == '-')) {
11932
147k
        if ((!terminate) &&
11933
147k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
66.5k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
81.0k
        xmlParseComment(ctxt);
11940
81.0k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
1.30M
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
1.30M
                    (cur == '<') && (next == '!') &&
11944
1.30M
        (ctxt->input->cur[2] == 'D') &&
11945
1.30M
        (ctxt->input->cur[3] == 'O') &&
11946
1.30M
        (ctxt->input->cur[4] == 'C') &&
11947
1.30M
        (ctxt->input->cur[5] == 'T') &&
11948
1.30M
        (ctxt->input->cur[6] == 'Y') &&
11949
1.30M
        (ctxt->input->cur[7] == 'P') &&
11950
1.30M
        (ctxt->input->cur[8] == 'E')) {
11951
775k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
360k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
415k
        ctxt->inSubset = 1;
11958
415k
        xmlParseDocTypeDecl(ctxt);
11959
415k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
415k
        if (RAW == '[') {
11962
318k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
318k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
96.3k
      ctxt->inSubset = 2;
11972
96.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
96.3k
          (ctxt->sax->externalSubset != NULL))
11974
92.2k
          ctxt->sax->externalSubset(ctxt->userData,
11975
92.2k
            ctxt->intSubName, ctxt->extSubSystem,
11976
92.2k
            ctxt->extSubURI);
11977
96.3k
      ctxt->inSubset = 0;
11978
96.3k
      xmlCleanSpecialAttr(ctxt);
11979
96.3k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
96.3k
        }
11985
525k
    } else if ((cur == '<') && (next == '!') &&
11986
525k
               (avail <
11987
52.7k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
48.4k
        goto done;
11989
477k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
13.5k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
13.5k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
13.5k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
13.5k
      ctxt->sax->endDocument(ctxt->userData);
11998
13.5k
        goto done;
11999
463k
                } else {
12000
463k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
463k
    }
12006
1.12M
    break;
12007
3.10M
            case XML_PARSER_DTD: {
12008
3.10M
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
2.79M
                    goto done;
12010
307k
    xmlParseInternalSubset(ctxt);
12011
307k
    if (ctxt->instate == XML_PARSER_EOF)
12012
65.8k
        goto done;
12013
241k
    ctxt->inSubset = 2;
12014
241k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
241k
        (ctxt->sax->externalSubset != NULL))
12016
232k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
232k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
241k
    ctxt->inSubset = 0;
12019
241k
    xmlCleanSpecialAttr(ctxt);
12020
241k
    if (ctxt->instate == XML_PARSER_EOF)
12021
17.9k
        goto done;
12022
223k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
223k
                break;
12028
241k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
144M
  }
12102
144M
    }
12103
15.7M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
15.7M
    return(ret);
12108
379k
encoding_error:
12109
379k
    {
12110
379k
        char buffer[150];
12111
12112
379k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
379k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
379k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
379k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
379k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
379k
         BAD_CAST buffer, NULL);
12118
379k
    }
12119
379k
    return(0);
12120
16.3M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
22.7M
              int terminate) {
12136
22.7M
    int end_in_lf = 0;
12137
22.7M
    int remain = 0;
12138
12139
22.7M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
22.7M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
6.43M
        return(ctxt->errNo);
12143
16.2M
    if (ctxt->instate == XML_PARSER_EOF)
12144
1.49k
        return(-1);
12145
16.2M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
16.2M
    ctxt->progressive = 1;
12149
16.2M
    if (ctxt->instate == XML_PARSER_START)
12150
1.11M
        xmlDetectSAX2(ctxt);
12151
16.2M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
16.2M
        (chunk[size - 1] == '\r')) {
12153
77.8k
  end_in_lf = 1;
12154
77.8k
  size--;
12155
77.8k
    }
12156
12157
16.3M
xmldecl_done:
12158
12159
16.3M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
16.3M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
15.8M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
15.8M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
15.8M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
15.8M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
15.8M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
76.9k
            unsigned int len = 45;
12173
12174
76.9k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
76.9k
                               BAD_CAST "UTF-16")) ||
12176
76.9k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
42.1k
                               BAD_CAST "UTF16")))
12178
34.8k
                len = 90;
12179
42.1k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
42.1k
                                    BAD_CAST "UCS-4")) ||
12181
42.1k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
41.7k
                                    BAD_CAST "UCS4")))
12183
395
                len = 180;
12184
12185
76.9k
            if (ctxt->input->buf->rawconsumed < len)
12186
8.13k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
76.9k
            if ((unsigned int) size > len) {
12194
54.5k
                remain = size - len;
12195
54.5k
                size = len;
12196
54.5k
            } else {
12197
22.4k
                remain = 0;
12198
22.4k
            }
12199
76.9k
        }
12200
15.8M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
15.8M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
15.8M
  if (res < 0) {
12203
3.20k
      ctxt->errNo = XML_PARSER_EOF;
12204
3.20k
      xmlHaltParser(ctxt);
12205
3.20k
      return (XML_PARSER_EOF);
12206
3.20k
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
15.8M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
456k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
456k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
456k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
456k
        (in->raw != NULL)) {
12216
54.7k
    int nbchars;
12217
54.7k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
54.7k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
54.7k
    nbchars = xmlCharEncInput(in, terminate);
12221
54.7k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
54.7k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
3.10k
        xmlGenericError(xmlGenericErrorContext,
12225
3.10k
            "xmlParseChunk: encoder error\n");
12226
3.10k
                    xmlHaltParser(ctxt);
12227
3.10k
        return(XML_ERR_INVALID_ENCODING);
12228
3.10k
    }
12229
54.7k
      }
12230
456k
  }
12231
456k
    }
12232
12233
16.3M
    if (remain != 0) {
12234
53.4k
        xmlParseTryOrFinish(ctxt, 0);
12235
16.2M
    } else {
12236
16.2M
        xmlParseTryOrFinish(ctxt, terminate);
12237
16.2M
    }
12238
16.3M
    if (ctxt->instate == XML_PARSER_EOF)
12239
154k
        return(ctxt->errNo);
12240
12241
16.1M
    if ((ctxt->input != NULL) &&
12242
16.1M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
16.1M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
16.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
16.1M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
206k
        return(ctxt->errNo);
12250
12251
15.9M
    if (remain != 0) {
12252
52.2k
        chunk += size;
12253
52.2k
        size = remain;
12254
52.2k
        remain = 0;
12255
52.2k
        goto xmldecl_done;
12256
52.2k
    }
12257
15.9M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
15.9M
        (ctxt->input->buf != NULL)) {
12259
76.9k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
76.9k
           ctxt->input);
12261
76.9k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
76.9k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
76.9k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
76.9k
            base, current);
12267
76.9k
    }
12268
15.9M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
229k
  int cur_avail = 0;
12273
12274
229k
  if (ctxt->input != NULL) {
12275
229k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
229k
      else
12279
229k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
229k
                    (ctxt->input->cur - ctxt->input->base);
12281
229k
  }
12282
12283
229k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
229k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
142k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
142k
  }
12287
229k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
1.31k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
1.31k
  }
12290
229k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
229k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
229k
    ctxt->sax->endDocument(ctxt->userData);
12293
229k
  }
12294
229k
  ctxt->instate = XML_PARSER_EOF;
12295
229k
    }
12296
15.9M
    if (ctxt->wellFormed == 0)
12297
6.17M
  return((xmlParserErrors) ctxt->errNo);
12298
9.73M
    else
12299
9.73M
        return(0);
12300
15.9M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
690k
                        const char *chunk, int size, const char *filename) {
12330
690k
    xmlParserCtxtPtr ctxt;
12331
690k
    xmlParserInputPtr inputStream;
12332
690k
    xmlParserInputBufferPtr buf;
12333
690k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
690k
    if ((chunk != NULL) && (size >= 4))
12339
339k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
690k
    buf = xmlAllocParserInputBuffer(enc);
12342
690k
    if (buf == NULL) return(NULL);
12343
12344
690k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
690k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
690k
    ctxt->dictNames = 1;
12351
690k
    if (filename == NULL) {
12352
345k
  ctxt->directory = NULL;
12353
345k
    } else {
12354
345k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
345k
    }
12356
12357
690k
    inputStream = xmlNewInputStream(ctxt);
12358
690k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
690k
    if (filename == NULL)
12365
345k
  inputStream->filename = NULL;
12366
345k
    else {
12367
345k
  inputStream->filename = (char *)
12368
345k
      xmlCanonicPath((const xmlChar *) filename);
12369
345k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
345k
    }
12376
690k
    inputStream->buf = buf;
12377
690k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
690k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
690k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
690k
    if ((size != 0) && (chunk != NULL) &&
12388
690k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
339k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
339k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
339k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
339k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
339k
    }
12399
12400
690k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
213k
        xmlSwitchEncoding(ctxt, enc);
12402
213k
    }
12403
12404
690k
    return(ctxt);
12405
690k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
610k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
610k
    if (ctxt == NULL)
12418
0
        return;
12419
610k
    ctxt->instate = XML_PARSER_EOF;
12420
610k
    ctxt->disableSAX = 1;
12421
705k
    while (ctxt->inputNr > 1)
12422
94.6k
        xmlFreeInputStream(inputPop(ctxt));
12423
610k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
610k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
610k
        if (ctxt->input->buf != NULL) {
12433
548k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
548k
            ctxt->input->buf = NULL;
12435
548k
        }
12436
610k
  ctxt->input->cur = BAD_CAST"";
12437
610k
        ctxt->input->length = 0;
12438
610k
  ctxt->input->base = ctxt->input->cur;
12439
610k
        ctxt->input->end = ctxt->input->cur;
12440
610k
    }
12441
610k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
345k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
345k
    if (ctxt == NULL)
12452
0
        return;
12453
345k
    xmlHaltParser(ctxt);
12454
345k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
345k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
1.02M
          const xmlChar *ID, xmlNodePtr *list) {
12832
1.02M
    xmlParserCtxtPtr ctxt;
12833
1.02M
    xmlDocPtr newDoc;
12834
1.02M
    xmlNodePtr newRoot;
12835
1.02M
    xmlParserErrors ret = XML_ERR_OK;
12836
1.02M
    xmlChar start[4];
12837
1.02M
    xmlCharEncoding enc;
12838
12839
1.02M
    if (((depth > 40) &&
12840
1.02M
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
1.02M
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
1.02M
    if (list != NULL)
12848
95.2k
        *list = NULL;
12849
1.02M
    if ((URL == NULL) && (ID == NULL))
12850
896
  return(XML_ERR_INTERNAL_ERROR);
12851
1.02M
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
1.02M
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
1.02M
                                             oldctxt);
12856
1.02M
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
188k
    if (oldctxt != NULL) {
12858
188k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
188k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
188k
    }
12861
188k
    xmlDetectSAX2(ctxt);
12862
12863
188k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
188k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
188k
    newDoc->properties = XML_DOC_INTERNAL;
12869
188k
    if (doc) {
12870
188k
        newDoc->intSubset = doc->intSubset;
12871
188k
        newDoc->extSubset = doc->extSubset;
12872
188k
        if (doc->dict) {
12873
116k
            newDoc->dict = doc->dict;
12874
116k
            xmlDictReference(newDoc->dict);
12875
116k
        }
12876
188k
        if (doc->URL != NULL) {
12877
114k
            newDoc->URL = xmlStrdup(doc->URL);
12878
114k
        }
12879
188k
    }
12880
188k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
188k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
188k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
188k
    nodePush(ctxt, newDoc->children);
12891
188k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
188k
    } else {
12894
188k
        ctxt->myDoc = doc;
12895
188k
        newRoot->doc = doc;
12896
188k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
188k
    GROW;
12904
188k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
181k
  start[0] = RAW;
12906
181k
  start[1] = NXT(1);
12907
181k
  start[2] = NXT(2);
12908
181k
  start[3] = NXT(3);
12909
181k
  enc = xmlDetectCharEncoding(start, 4);
12910
181k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
11.5k
      xmlSwitchEncoding(ctxt, enc);
12912
11.5k
  }
12913
181k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
188k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
8.15k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
8.15k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
8.15k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
498
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
498
                           "Version mismatch between document and entity\n");
12927
498
        }
12928
8.15k
    }
12929
12930
188k
    ctxt->instate = XML_PARSER_CONTENT;
12931
188k
    ctxt->depth = depth;
12932
188k
    if (oldctxt != NULL) {
12933
188k
  ctxt->_private = oldctxt->_private;
12934
188k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
188k
  ctxt->validate = oldctxt->validate;
12936
188k
  ctxt->valid = oldctxt->valid;
12937
188k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
188k
        if (oldctxt->validate) {
12939
83.1k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
83.1k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
83.1k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
83.1k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
83.1k
        }
12944
188k
  ctxt->external = oldctxt->external;
12945
188k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
188k
        ctxt->dict = oldctxt->dict;
12947
188k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
188k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
188k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
188k
        ctxt->dictNames = oldctxt->dictNames;
12951
188k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
188k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
188k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
188k
  ctxt->record_info = oldctxt->record_info;
12955
188k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
188k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
188k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
188k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
188k
    xmlParseContent(ctxt);
12970
12971
188k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
1.57k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
186k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
188k
    if (ctxt->node != newDoc->children) {
12977
9.38k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
9.38k
    }
12979
12980
188k
    if (!ctxt->wellFormed) {
12981
37.3k
  ret = (xmlParserErrors)ctxt->errNo;
12982
37.3k
        if (oldctxt != NULL) {
12983
37.3k
            oldctxt->errNo = ctxt->errNo;
12984
37.3k
            oldctxt->wellFormed = 0;
12985
37.3k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
37.3k
        }
12987
151k
    } else {
12988
151k
  if (list != NULL) {
12989
21.2k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
21.2k
      cur = newDoc->children->children;
12996
21.2k
      *list = cur;
12997
6.11M
      while (cur != NULL) {
12998
6.09M
    cur->parent = NULL;
12999
6.09M
    cur = cur->next;
13000
6.09M
      }
13001
21.2k
            newDoc->children->children = NULL;
13002
21.2k
  }
13003
151k
  ret = XML_ERR_OK;
13004
151k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
188k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
188k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
188k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
188k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
188k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
188k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
188k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
188k
    }
13020
13021
188k
    if (oldctxt != NULL) {
13022
188k
        ctxt->dict = NULL;
13023
188k
        ctxt->attsDefault = NULL;
13024
188k
        ctxt->attsSpecial = NULL;
13025
188k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
188k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
188k
        oldctxt->validate = ctxt->validate;
13028
188k
        oldctxt->valid = ctxt->valid;
13029
188k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
188k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
188k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
188k
    }
13033
188k
    ctxt->node_seq.maximum = 0;
13034
188k
    ctxt->node_seq.length = 0;
13035
188k
    ctxt->node_seq.buffer = NULL;
13036
188k
    xmlFreeParserCtxt(ctxt);
13037
188k
    newDoc->intSubset = NULL;
13038
188k
    newDoc->extSubset = NULL;
13039
188k
    xmlFreeDoc(newDoc);
13040
13041
188k
    return(ret);
13042
188k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
127k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
127k
    xmlParserCtxtPtr ctxt;
13125
127k
    xmlDocPtr newDoc = NULL;
13126
127k
    xmlNodePtr newRoot;
13127
127k
    xmlSAXHandlerPtr oldsax = NULL;
13128
127k
    xmlNodePtr content = NULL;
13129
127k
    xmlNodePtr last = NULL;
13130
127k
    int size;
13131
127k
    xmlParserErrors ret = XML_ERR_OK;
13132
127k
#ifdef SAX2
13133
127k
    int i;
13134
127k
#endif
13135
13136
127k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
127k
        (oldctxt->depth >  100)) {
13138
127
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
127
                       "Maximum entity nesting depth exceeded");
13140
127
  return(XML_ERR_ENTITY_LOOP);
13141
127
    }
13142
13143
13144
127k
    if (lst != NULL)
13145
124k
        *lst = NULL;
13146
127k
    if (string == NULL)
13147
189
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
127k
    size = xmlStrlen(string);
13150
13151
127k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
127k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
124k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
124k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
124k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
124k
    else
13158
124k
  ctxt->userData = ctxt;
13159
124k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
124k
    ctxt->dict = oldctxt->dict;
13161
124k
    ctxt->input_id = oldctxt->input_id;
13162
124k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
124k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
124k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
124k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
126k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
1.70k
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
1.70k
    }
13171
124k
#endif
13172
13173
124k
    oldsax = ctxt->sax;
13174
124k
    ctxt->sax = oldctxt->sax;
13175
124k
    xmlDetectSAX2(ctxt);
13176
124k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
124k
    ctxt->options = oldctxt->options;
13178
13179
124k
    ctxt->_private = oldctxt->_private;
13180
124k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
124k
    } else {
13193
124k
  ctxt->myDoc = oldctxt->myDoc;
13194
124k
        content = ctxt->myDoc->children;
13195
124k
  last = ctxt->myDoc->last;
13196
124k
    }
13197
124k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
124k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
124k
    ctxt->myDoc->children = NULL;
13208
124k
    ctxt->myDoc->last = NULL;
13209
124k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
124k
    nodePush(ctxt, ctxt->myDoc->children);
13211
124k
    ctxt->instate = XML_PARSER_CONTENT;
13212
124k
    ctxt->depth = oldctxt->depth;
13213
13214
124k
    ctxt->validate = 0;
13215
124k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
124k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
95.2k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
95.2k
    }
13222
124k
    ctxt->dictNames = oldctxt->dictNames;
13223
124k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
124k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
124k
    xmlParseContent(ctxt);
13227
124k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
529
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
123k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
124k
    if (ctxt->node != ctxt->myDoc->children) {
13233
2.26k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
2.26k
    }
13235
13236
124k
    if (!ctxt->wellFormed) {
13237
35.3k
  ret = (xmlParserErrors)ctxt->errNo;
13238
35.3k
        oldctxt->errNo = ctxt->errNo;
13239
35.3k
        oldctxt->wellFormed = 0;
13240
35.3k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
88.9k
    } else {
13242
88.9k
        ret = XML_ERR_OK;
13243
88.9k
    }
13244
13245
124k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
88.9k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
88.9k
  cur = ctxt->myDoc->children->children;
13253
88.9k
  *lst = cur;
13254
1.09M
  while (cur != NULL) {
13255
1.01M
#ifdef LIBXML_VALID_ENABLED
13256
1.01M
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
1.01M
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
1.01M
    (cur->type == XML_ELEMENT_NODE)) {
13259
20.9k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
20.9k
      oldctxt->myDoc, cur);
13261
20.9k
      }
13262
1.01M
#endif /* LIBXML_VALID_ENABLED */
13263
1.01M
      cur->parent = NULL;
13264
1.01M
      cur = cur->next;
13265
1.01M
  }
13266
88.9k
  ctxt->myDoc->children->children = NULL;
13267
88.9k
    }
13268
124k
    if (ctxt->myDoc != NULL) {
13269
124k
  xmlFreeNode(ctxt->myDoc->children);
13270
124k
        ctxt->myDoc->children = content;
13271
124k
        ctxt->myDoc->last = last;
13272
124k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
124k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
124k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
124k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
124k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
124k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
124k
    }
13285
13286
124k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
124k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
124k
    ctxt->sax = oldsax;
13289
124k
    ctxt->dict = NULL;
13290
124k
    ctxt->attsDefault = NULL;
13291
124k
    ctxt->attsSpecial = NULL;
13292
124k
    xmlFreeParserCtxt(ctxt);
13293
124k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
124k
    return(ret);
13298
124k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
1.02M
        xmlParserCtxtPtr pctx) {
13783
1.02M
    xmlParserCtxtPtr ctxt;
13784
1.02M
    xmlParserInputPtr inputStream;
13785
1.02M
    char *directory = NULL;
13786
1.02M
    xmlChar *uri;
13787
13788
1.02M
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
1.02M
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
1.02M
    if (pctx != NULL) {
13794
1.02M
        ctxt->options = pctx->options;
13795
1.02M
        ctxt->_private = pctx->_private;
13796
1.02M
  ctxt->input_id = pctx->input_id;
13797
1.02M
    }
13798
13799
    /* Don't read from stdin. */
13800
1.02M
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
404
        URL = BAD_CAST "./-";
13802
13803
1.02M
    uri = xmlBuildURI(URL, base);
13804
13805
1.02M
    if (uri == NULL) {
13806
21.4k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
21.4k
  if (inputStream == NULL) {
13808
20.4k
      xmlFreeParserCtxt(ctxt);
13809
20.4k
      return(NULL);
13810
20.4k
  }
13811
13812
990
  inputPush(ctxt, inputStream);
13813
13814
990
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
990
      directory = xmlParserGetDirectory((char *)URL);
13816
990
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
990
      ctxt->directory = directory;
13818
1.00M
    } else {
13819
1.00M
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
1.00M
  if (inputStream == NULL) {
13821
816k
      xmlFree(uri);
13822
816k
      xmlFreeParserCtxt(ctxt);
13823
816k
      return(NULL);
13824
816k
  }
13825
13826
187k
  inputPush(ctxt, inputStream);
13827
13828
187k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
187k
      directory = xmlParserGetDirectory((char *)uri);
13830
187k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
187k
      ctxt->directory = directory;
13832
187k
  xmlFree(uri);
13833
187k
    }
13834
188k
    return(ctxt);
13835
1.02M
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
472k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
472k
    xmlParserCtxtPtr ctxt;
14178
472k
    xmlParserInputPtr input;
14179
472k
    xmlParserInputBufferPtr buf;
14180
14181
472k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
472k
    if (size <= 0)
14184
4.11k
  return(NULL);
14185
14186
468k
    ctxt = xmlNewParserCtxt();
14187
468k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
468k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
468k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
468k
    input = xmlNewInputStream(ctxt);
14197
468k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
468k
    input->filename = NULL;
14204
468k
    input->buf = buf;
14205
468k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
468k
    inputPush(ctxt, input);
14208
468k
    return(ctxt);
14209
468k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
7.94G
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
7.94G
    if (xmlParserInitialized != 0)
14525
7.94G
  return;
14526
14527
3.72k
#ifdef LIBXML_THREAD_ENABLED
14528
3.72k
    __xmlGlobalInitMutexLock();
14529
3.72k
    if (xmlParserInitialized == 0) {
14530
3.72k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
3.72k
  xmlInitThreadsInternal();
14537
3.72k
  xmlInitGlobalsInternal();
14538
3.72k
  xmlInitMemoryInternal();
14539
3.72k
        __xmlInitializeDict();
14540
3.72k
  xmlInitEncodingInternal();
14541
3.72k
  xmlRegisterDefaultInputCallbacks();
14542
3.72k
#ifdef LIBXML_OUTPUT_ENABLED
14543
3.72k
  xmlRegisterDefaultOutputCallbacks();
14544
3.72k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
3.72k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
3.72k
  xmlInitXPathInternal();
14547
3.72k
#endif
14548
3.72k
  xmlParserInitialized = 1;
14549
3.72k
#ifdef LIBXML_THREAD_ENABLED
14550
3.72k
    }
14551
3.72k
    __xmlGlobalInitMutexUnlock();
14552
3.72k
#endif
14553
3.72k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
1.03M
{
14843
1.03M
    if (ctxt == NULL)
14844
0
        return(-1);
14845
1.03M
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
1.03M
    if (options & XML_PARSE_RECOVER) {
14851
516k
        ctxt->recovery = 1;
14852
516k
        options -= XML_PARSE_RECOVER;
14853
516k
  ctxt->options |= XML_PARSE_RECOVER;
14854
516k
    } else
14855
518k
        ctxt->recovery = 0;
14856
1.03M
    if (options & XML_PARSE_DTDLOAD) {
14857
713k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
713k
        options -= XML_PARSE_DTDLOAD;
14859
713k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
713k
    } else
14861
321k
        ctxt->loadsubset = 0;
14862
1.03M
    if (options & XML_PARSE_DTDATTR) {
14863
382k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
382k
        options -= XML_PARSE_DTDATTR;
14865
382k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
382k
    }
14867
1.03M
    if (options & XML_PARSE_NOENT) {
14868
645k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
645k
        options -= XML_PARSE_NOENT;
14871
645k
  ctxt->options |= XML_PARSE_NOENT;
14872
645k
    } else
14873
389k
        ctxt->replaceEntities = 0;
14874
1.03M
    if (options & XML_PARSE_PEDANTIC) {
14875
217k
        ctxt->pedantic = 1;
14876
217k
        options -= XML_PARSE_PEDANTIC;
14877
217k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
217k
    } else
14879
817k
        ctxt->pedantic = 0;
14880
1.03M
    if (options & XML_PARSE_NOBLANKS) {
14881
348k
        ctxt->keepBlanks = 0;
14882
348k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
348k
        options -= XML_PARSE_NOBLANKS;
14884
348k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
348k
    } else
14886
686k
        ctxt->keepBlanks = 1;
14887
1.03M
    if (options & XML_PARSE_DTDVALID) {
14888
444k
        ctxt->validate = 1;
14889
444k
        if (options & XML_PARSE_NOWARNING)
14890
249k
            ctxt->vctxt.warning = NULL;
14891
444k
        if (options & XML_PARSE_NOERROR)
14892
285k
            ctxt->vctxt.error = NULL;
14893
444k
        options -= XML_PARSE_DTDVALID;
14894
444k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
444k
    } else
14896
590k
        ctxt->validate = 0;
14897
1.03M
    if (options & XML_PARSE_NOWARNING) {
14898
333k
        ctxt->sax->warning = NULL;
14899
333k
        options -= XML_PARSE_NOWARNING;
14900
333k
    }
14901
1.03M
    if (options & XML_PARSE_NOERROR) {
14902
393k
        ctxt->sax->error = NULL;
14903
393k
        ctxt->sax->fatalError = NULL;
14904
393k
        options -= XML_PARSE_NOERROR;
14905
393k
    }
14906
1.03M
#ifdef LIBXML_SAX1_ENABLED
14907
1.03M
    if (options & XML_PARSE_SAX1) {
14908
339k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
339k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
339k
        ctxt->sax->startElementNs = NULL;
14911
339k
        ctxt->sax->endElementNs = NULL;
14912
339k
        ctxt->sax->initialized = 1;
14913
339k
        options -= XML_PARSE_SAX1;
14914
339k
  ctxt->options |= XML_PARSE_SAX1;
14915
339k
    }
14916
1.03M
#endif /* LIBXML_SAX1_ENABLED */
14917
1.03M
    if (options & XML_PARSE_NODICT) {
14918
286k
        ctxt->dictNames = 0;
14919
286k
        options -= XML_PARSE_NODICT;
14920
286k
  ctxt->options |= XML_PARSE_NODICT;
14921
748k
    } else {
14922
748k
        ctxt->dictNames = 1;
14923
748k
    }
14924
1.03M
    if (options & XML_PARSE_NOCDATA) {
14925
378k
        ctxt->sax->cdataBlock = NULL;
14926
378k
        options -= XML_PARSE_NOCDATA;
14927
378k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
378k
    }
14929
1.03M
    if (options & XML_PARSE_NSCLEAN) {
14930
430k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
430k
        options -= XML_PARSE_NSCLEAN;
14932
430k
    }
14933
1.03M
    if (options & XML_PARSE_NONET) {
14934
293k
  ctxt->options |= XML_PARSE_NONET;
14935
293k
        options -= XML_PARSE_NONET;
14936
293k
    }
14937
1.03M
    if (options & XML_PARSE_COMPACT) {
14938
544k
  ctxt->options |= XML_PARSE_COMPACT;
14939
544k
        options -= XML_PARSE_COMPACT;
14940
544k
    }
14941
1.03M
    if (options & XML_PARSE_OLD10) {
14942
275k
  ctxt->options |= XML_PARSE_OLD10;
14943
275k
        options -= XML_PARSE_OLD10;
14944
275k
    }
14945
1.03M
    if (options & XML_PARSE_NOBASEFIX) {
14946
296k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
296k
        options -= XML_PARSE_NOBASEFIX;
14948
296k
    }
14949
1.03M
    if (options & XML_PARSE_HUGE) {
14950
260k
  ctxt->options |= XML_PARSE_HUGE;
14951
260k
        options -= XML_PARSE_HUGE;
14952
260k
        if (ctxt->dict != NULL)
14953
260k
            xmlDictSetLimit(ctxt->dict, 0);
14954
260k
    }
14955
1.03M
    if (options & XML_PARSE_OLDSAX) {
14956
257k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
257k
        options -= XML_PARSE_OLDSAX;
14958
257k
    }
14959
1.03M
    if (options & XML_PARSE_IGNORE_ENC) {
14960
395k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
395k
        options -= XML_PARSE_IGNORE_ENC;
14962
395k
    }
14963
1.03M
    if (options & XML_PARSE_BIG_LINES) {
14964
352k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
352k
        options -= XML_PARSE_BIG_LINES;
14966
352k
    }
14967
1.03M
    ctxt->linenumbers = 1;
14968
1.03M
    return (options);
14969
1.03M
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
690k
{
14984
690k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
690k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
344k
{
15003
344k
    xmlDocPtr ret;
15004
15005
344k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
344k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
344k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
344k
        (ctxt->input->filename == NULL))
15015
344k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
344k
    xmlParseDocument(ctxt);
15017
344k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
198k
        ret = ctxt->myDoc;
15019
145k
    else {
15020
145k
        ret = NULL;
15021
145k
  if (ctxt->myDoc != NULL) {
15022
124k
      xmlFreeDoc(ctxt->myDoc);
15023
124k
  }
15024
145k
    }
15025
344k
    ctxt->myDoc = NULL;
15026
344k
    if (!reuse) {
15027
344k
  xmlFreeParserCtxt(ctxt);
15028
344k
    }
15029
15030
344k
    return (ret);
15031
344k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
345k
{
15096
345k
    xmlParserCtxtPtr ctxt;
15097
15098
345k
    xmlInitParser();
15099
345k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
345k
    if (ctxt == NULL)
15101
844
        return (NULL);
15102
344k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
345k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387